From a76b9e93d4de9f14a7e4aaa6d19fc721fc2e17d3 Mon Sep 17 00:00:00 2001 From: Jan200101 Date: Thu, 8 Feb 2024 22:07:04 +0100 Subject: kernel 6.7.3 --- SOURCES/0001-amd-hdr.patch | 142 +- SOURCES/Patchlist.changelog | 152 +- SOURCES/amdgpu-HAINAN-variant-fixup.patch | 34 + SOURCES/asus-linux.patch | 414 +- SOURCES/dracut-virt.conf | 6 + SOURCES/filter-modules.sh.fedora | 5 +- SOURCES/kernel-aarch64-16k-debug-fedora.config | 255 +- SOURCES/kernel-aarch64-16k-fedora.config | 255 +- SOURCES/kernel-aarch64-64k-debug-rhel.config | 194 +- SOURCES/kernel-aarch64-64k-rhel.config | 194 +- SOURCES/kernel-aarch64-debug-fedora.config | 255 +- SOURCES/kernel-aarch64-debug-rhel.config | 194 +- SOURCES/kernel-aarch64-fedora.config | 255 +- SOURCES/kernel-aarch64-rhel.config | 194 +- SOURCES/kernel-aarch64-rt-debug-rhel.config | 195 +- SOURCES/kernel-aarch64-rt-rhel.config | 195 +- SOURCES/kernel-ppc64le-debug-fedora.config | 140 +- SOURCES/kernel-ppc64le-debug-rhel.config | 188 +- SOURCES/kernel-ppc64le-fedora.config | 140 +- SOURCES/kernel-ppc64le-rhel.config | 188 +- SOURCES/kernel-s390x-debug-fedora.config | 141 +- SOURCES/kernel-s390x-debug-rhel.config | 193 +- SOURCES/kernel-s390x-fedora.config | 141 +- SOURCES/kernel-s390x-rhel.config | 193 +- SOURCES/kernel-s390x-zfcpdump-rhel.config | 195 +- SOURCES/kernel-x86_64-debug-fedora.config | 200 +- SOURCES/kernel-x86_64-debug-rhel.config | 192 +- SOURCES/kernel-x86_64-fedora.config | 200 +- SOURCES/kernel-x86_64-rhel.config | 192 +- SOURCES/kernel-x86_64-rt-debug-rhel.config | 193 +- SOURCES/kernel-x86_64-rt-rhel.config | 193 +- SOURCES/kernel.changelog | 2477 + SOURCES/linux-surface.patch | 3632 +- SOURCES/mod-internal.list | 8 + SOURCES/nouveau-gsp-default.patch | 23 + SOURCES/patch-6.6-redhat.patch | 2292 - SOURCES/patch-6.7-redhat.patch | 1553 + SOURCES/rog-ally-audio-fix.patch | 2 +- SOURCES/rog-ally-gyro-fix.patch | 2974 + SOURCES/rpminspect.yaml | 2 +- SOURCES/steam-deck.patch | 18 +- SOURCES/steamdeck-oled-audio.patch | 10 +- SOURCES/steamdeck-oled-bt.patch | 239 - SOURCES/steamdeck-oled-hw-quirks.patch | 110 +- SOURCES/steamdeck-oled-wifi.patch | 207 +- SOURCES/t2linux.patch | 12153 +++ SOURCES/tkg-BBRv2.patch | 3311 - SOURCES/tkg-bcachefs.patch | 98955 ------------------- SOURCES/tkg-misc-additions.patch | 902 +- SOURCES/tkg-unprivileged-CLONE_NEWUSER.patch | 4 +- .../valve-gamescope-framerate-control-fixups.patch | 647 + SOURCES/winesync.patch | 20 +- SPECS/kernel.spec | 783 +- TOOLS/patch_configs.py | 43 +- 54 files changed, 26038 insertions(+), 109955 deletions(-) create mode 100644 SOURCES/amdgpu-HAINAN-variant-fixup.patch create mode 100644 SOURCES/kernel.changelog create mode 100644 SOURCES/nouveau-gsp-default.patch delete mode 100644 SOURCES/patch-6.6-redhat.patch create mode 100644 SOURCES/patch-6.7-redhat.patch create mode 100644 SOURCES/rog-ally-gyro-fix.patch delete mode 100644 SOURCES/steamdeck-oled-bt.patch create mode 100644 SOURCES/t2linux.patch delete mode 100644 SOURCES/tkg-BBRv2.patch delete mode 100644 SOURCES/tkg-bcachefs.patch create mode 100644 SOURCES/valve-gamescope-framerate-control-fixups.patch diff --git a/SOURCES/0001-amd-hdr.patch b/SOURCES/0001-amd-hdr.patch index 6c0deff..c6fc3af 100644 --- a/SOURCES/0001-amd-hdr.patch +++ b/SOURCES/0001-amd-hdr.patch @@ -13,9 +13,6 @@ Subject: [PATCH] hdr .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 234 ++++- .../amd/display/dc/dcn10/dcn10_cm_common.c | 95 +- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 14 +- - .../drm/amd/display/dc/dcn30/dcn30_hwseq.c | 37 + - .../drm/amd/display/dc/dcn30/dcn30_hwseq.h | 3 + - .../drm/amd/display/dc/dcn301/dcn301_init.c | 2 +- .../gpu/drm/amd/display/include/fixed31_32.h | 12 + drivers/gpu/drm/arm/malidp_crtc.c | 2 +- drivers/gpu/drm/drm_atomic.c | 1 + @@ -25,7 +22,6 @@ Subject: [PATCH] hdr include/drm/drm_plane.h | 7 + include/drm/drm_property.h | 6 + include/uapi/drm/drm_mode.h | 8 + - 21 files changed, 1473 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 32fe05c81..84bf501b0 100644 @@ -1328,7 +1324,7 @@ index 97b7a0b8a..f1707c774 100644 state->crc_skip_count = cur->crc_skip_count; state->mpo_requested = cur->mpo_requested; /* TODO Duplicate dc_stream after objects are stream object is flattened */ -@@ -296,6 +297,70 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) +@@ -296,6 +296,70 @@ } #endif @@ -1398,7 +1394,7 @@ index 97b7a0b8a..f1707c774 100644 + /* Implemented only the options currently available for the driver */ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { - .reset = dm_crtc_reset_state, + .reset = amdgpu_dm_crtc_reset_state, @@ -314,6 +379,10 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { #if defined(CONFIG_DEBUG_FS) .late_register = amdgpu_dm_crtc_late_register, @@ -1409,7 +1405,7 @@ index 97b7a0b8a..f1707c774 100644 +#endif }; - static void dm_crtc_helper_disable(struct drm_crtc *crtc) + static void amdgpu_dm_crtc_helper_disable(struct drm_crtc *crtc) @@ -489,6 +558,9 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); @@ -1424,7 +1420,7 @@ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/g index cc74dd69a..2ed20e6e4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c -@@ -1333,8 +1333,14 @@ static void dm_drm_plane_reset(struct drm_plane *plane) +@@ -1337,8 +1337,14 @@ amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL); WARN_ON(amdgpu_state == NULL); @@ -1440,7 +1436,7 @@ index cc74dd69a..2ed20e6e4 100644 + amdgpu_state->blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; } - static struct drm_plane_state * + static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane) @@ -1354,6 +1360,32 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane) dc_plane_state_retain(dm_plane_state->dc_state); } @@ -1497,7 +1493,7 @@ index cc74dd69a..2ed20e6e4 100644 +#ifdef CONFIG_DRM_AMD_COLOR_STEAMDECK +static void -+dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, ++amdgpu_dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, + struct drm_plane *plane) +{ + struct amdgpu_mode_info mode_info = dm->adev->mode_info; @@ -1553,7 +1549,7 @@ index cc74dd69a..2ed20e6e4 100644 +} + +static int -+dm_atomic_plane_set_property(struct drm_plane *plane, ++amdgpu_dm_atomic_plane_set_property(struct drm_plane *plane, + struct drm_plane_state *state, + struct drm_property *property, + uint64_t val) @@ -1635,7 +1631,7 @@ index cc74dd69a..2ed20e6e4 100644 +} + +static int -+dm_atomic_plane_get_property(struct drm_plane *plane, ++amdgpu_dm_atomic_plane_get_property(struct drm_plane *plane, + const struct drm_plane_state *state, + struct drm_property *property, + uint64_t *val) @@ -1678,23 +1674,23 @@ index cc74dd69a..2ed20e6e4 100644 static const struct drm_plane_funcs dm_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, -@@ -1435,6 +1658,10 @@ static const struct drm_plane_funcs dm_plane_funcs = { - .atomic_duplicate_state = dm_drm_plane_duplicate_state, - .atomic_destroy_state = dm_drm_plane_destroy_state, - .format_mod_supported = dm_plane_format_mod_supported, +@@ -1658,6 +1881,10 @@ static const struct drm_plane_funcs dm_plane_funcs = { + .atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state, + .atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state, + .format_mod_supported = amdgpu_dm_plane_format_mod_supported, +#ifdef CONFIG_DRM_AMD_COLOR_STEAMDECK -+ .atomic_set_property = dm_atomic_plane_set_property, -+ .atomic_get_property = dm_atomic_plane_get_property, ++ .atomic_set_property = amdgpu_dm_atomic_plane_set_property, ++ .atomic_get_property = amdgpu_dm_atomic_plane_get_property, +#endif }; - + int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, @@ -1514,6 +1741,9 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, drm_plane_helper_add(plane, &dm_plane_helper_funcs); +#ifdef CONFIG_DRM_AMD_COLOR_STEAMDECK -+ dm_atomic_plane_attach_color_mgmt_properties(dm, plane); ++ amdgpu_dm_atomic_plane_attach_color_mgmt_properties(dm, plane); +#endif /* Create (reset) the plane state */ if (plane->funcs->reset) @@ -1817,10 +1813,10 @@ index 3538973bd..04b2e04b6 100644 j++; } } -diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 79befa17b..4daf8621b 100644 ---- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c -+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c ++++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -2486,17 +2486,17 @@ void dcn10_program_gamut_remap(struct pipe_ctx *pipe_ctx) adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; @@ -1855,81 +1851,6 @@ index 79befa17b..4daf8621b 100644 hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); /* dcn10_translate_regamma_to_hw_format takes 750us to finish -diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c -index 255713ec2..fce9b33c0 100644 ---- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c -+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c -@@ -186,6 +186,43 @@ bool dcn30_set_input_transfer_func(struct dc *dc, - return result; - } - -+void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx) -+{ -+ int i = 0; -+ struct dpp_grph_csc_adjustment dpp_adjust; -+ struct mpc_grph_gamut_adjustment mpc_adjust; -+ int mpcc_id = pipe_ctx->plane_res.hubp->inst; -+ struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; -+ -+ memset(&dpp_adjust, 0, sizeof(dpp_adjust)); -+ dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; -+ -+ if (pipe_ctx->plane_state && -+ pipe_ctx->plane_state->gamut_remap_matrix.enable_remap == true) { -+ dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; -+ for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) -+ dpp_adjust.temperature_matrix[i] = -+ pipe_ctx->plane_state->gamut_remap_matrix.matrix[i]; -+ } -+ -+ pipe_ctx->plane_res.dpp->funcs->dpp_set_gamut_remap(pipe_ctx->plane_res.dpp, -+ &dpp_adjust); -+ -+ memset(&mpc_adjust, 0, sizeof(mpc_adjust)); -+ mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; -+ -+ if (pipe_ctx->top_pipe == NULL) { -+ if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) { -+ mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; -+ for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) -+ mpc_adjust.temperature_matrix[i] = -+ pipe_ctx->stream->gamut_remap_matrix.matrix[i]; -+ } -+ } -+ -+ mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust); -+} -+ - bool dcn30_set_output_transfer_func(struct dc *dc, - struct pipe_ctx *pipe_ctx, - const struct dc_stream_state *stream) -diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h -index ce19c5409..e557e2b98 100644 ---- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h -+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h -@@ -58,6 +58,9 @@ bool dcn30_set_blend_lut(struct pipe_ctx *pipe_ctx, - bool dcn30_set_input_transfer_func(struct dc *dc, - struct pipe_ctx *pipe_ctx, - const struct dc_plane_state *plane_state); -+ -+void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx); -+ - bool dcn30_set_output_transfer_func(struct dc *dc, - struct pipe_ctx *pipe_ctx, - const struct dc_stream_state *stream); -diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c -index 61205cdbe..fdbe3d42c 100644 ---- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c -+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c -@@ -33,7 +33,7 @@ - #include "dcn301_init.h" - - static const struct hw_sequencer_funcs dcn301_funcs = { -- .program_gamut_remap = dcn10_program_gamut_remap, -+ .program_gamut_remap = dcn30_program_gamut_remap, - .init_hw = dcn10_init_hw, - .power_down_on_boot = dcn10_power_down_on_boot, - .apply_ctx_to_hw = dce110_apply_ctx_to_hw, diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index d4cf7ead1..84da1dd34 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -2120,3 +2041,28 @@ index ea1b639bc..cea5653e4 100644 -- 2.43.0 +From b938468f07222b4faab5ae5cf5391eccd9532bb0 Mon Sep 17 00:00:00 2001 +From: Bouke Sybren Haarsma +Date: Fri, 15 Dec 2023 11:14:58 +0100 +Subject: [PATCH] Don't create color_mgmt_properties on asics < SIENNA_CICHLID + +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +index 2ed20e6e439bb5..65ee8745e96540 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +@@ -1742,7 +1742,8 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, + drm_plane_helper_add(plane, &dm_plane_helper_funcs); + + #ifdef CONFIG_DRM_AMD_COLOR_STEAMDECK +- amdgpu_dm_atomic_plane_attach_color_mgmt_properties(dm, plane); ++ if (dm->adev->asic_type >= CHIP_SIENNA_CICHLID) ++ amdgpu_dm_atomic_plane_attach_color_mgmt_properties(dm, plane); + #endif + /* Create (reset) the plane state */ + if (plane->funcs->reset) +-- +2.43.0 diff --git a/SOURCES/Patchlist.changelog b/SOURCES/Patchlist.changelog index 7a6ee75..4e240b5 100644 --- a/SOURCES/Patchlist.changelog +++ b/SOURCES/Patchlist.changelog @@ -1,120 +1,96 @@ -"https://gitlab.com/cki-project/kernel-ark/-/commit"/bbdede94e2dfb64c3fdb376f90222394422d0131 - bbdede94e2dfb64c3fdb376f90222394422d0131 ida: Fix crash in ida_free when the bitmap is empty +"https://gitlab.com/cki-project/kernel-ark/-/commit"/46a8350ec1068377bdfcd55191012325f85113bb + 46a8350ec1068377bdfcd55191012325f85113bb Revert "cpupower: Bump soname version" -"https://gitlab.com/cki-project/kernel-ark/-/commit"/ed93ec720e04b598e451e23635bd8201ecaf9c60 - ed93ec720e04b598e451e23635bd8201ecaf9c60 wifi: ath10k: fix NULL pointer dereference in ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev() +"https://gitlab.com/cki-project/kernel-ark/-/commit"/caef9732dac92b5afac527584a71e0d9fe783c11 + caef9732dac92b5afac527584a71e0d9fe783c11 wifi: ath10k: fix NULL pointer dereference in ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev() -"https://gitlab.com/cki-project/kernel-ark/-/commit"/becca34be9cd8577a101032917438af982aa7d29 - becca34be9cd8577a101032917438af982aa7d29 ALSA: hda: cs35l41: Add notification support into component binding +"https://gitlab.com/cki-project/kernel-ark/-/commit"/e04ed37ee7a38d7b21d8811666ec556c83f55931 + e04ed37ee7a38d7b21d8811666ec556c83f55931 drivers/firmware: skip simpledrm if nvidia-drm.modeset=1 is set -"https://gitlab.com/cki-project/kernel-ark/-/commit"/2fa4b6a18ce384be968eda55c9cb12a6bb5cb4ca - 2fa4b6a18ce384be968eda55c9cb12a6bb5cb4ca ALSA: hda: cs35l41: Support mute notifications for CS35L41 HDA +"https://gitlab.com/cki-project/kernel-ark/-/commit"/f6b5c078b94f3e8ad78f8eb246af98a93f715bdb + f6b5c078b94f3e8ad78f8eb246af98a93f715bdb scsi: sd: Add "probe_type" module parameter to allow synchronous probing -"https://gitlab.com/cki-project/kernel-ark/-/commit"/0181cc27d637d9f2606dbf33fac4ddf5a64162ca - 0181cc27d637d9f2606dbf33fac4ddf5a64162ca Add support for various laptops using CS35L41 HDA without _DSD +"https://gitlab.com/cki-project/kernel-ark/-/commit"/af25a577fcd4af790374718790a7d9e7bace804c + af25a577fcd4af790374718790a7d9e7bace804c Enable IO_URING for RHEL -"https://gitlab.com/cki-project/kernel-ark/-/commit"/ed5f19c3892cc5dcfe95dd4d296c5e617a26c821 - ed5f19c3892cc5dcfe95dd4d296c5e617a26c821 Revert "netfilter: nf_tables: remove catchall element in GC sync path" +"https://gitlab.com/cki-project/kernel-ark/-/commit"/ad5d1b5ae72b9e8b846f94b4589d8b0430178c66 + ad5d1b5ae72b9e8b846f94b4589d8b0430178c66 redhat: version two of Makefile.rhelver tweaks -"https://gitlab.com/cki-project/kernel-ark/-/commit"/81689414a7974a3f3fa3b28c18226c9d583761d4 - 81689414a7974a3f3fa3b28c18226c9d583761d4 netfilter: nf_tables: remove catchall element in GC sync path +"https://gitlab.com/cki-project/kernel-ark/-/commit"/fa8ac4aeabd1e76ce80b4016fa3f636507e62a8e + fa8ac4aeabd1e76ce80b4016fa3f636507e62a8e redhat: adapt to upstream Makefile change -"https://gitlab.com/cki-project/kernel-ark/-/commit"/91d392fbbe771b2b4c45fd39b9150e27be3251ba - 91d392fbbe771b2b4c45fd39b9150e27be3251ba ACPI: video: Use acpi_device_fix_up_power_children() +"https://gitlab.com/cki-project/kernel-ark/-/commit"/82f10d50c891e830513a6203c8dfedc9c5fc605d + 82f10d50c891e830513a6203c8dfedc9c5fc605d Change acpi_bus_get_acpi_device to acpi_get_acpi_dev -"https://gitlab.com/cki-project/kernel-ark/-/commit"/3bd5c005766e37c5c60b1210e844091ddebd28d6 - 3bd5c005766e37c5c60b1210e844091ddebd28d6 ACPI: PM: Add acpi_device_fix_up_power_children() function +"https://gitlab.com/cki-project/kernel-ark/-/commit"/2e9237134898be1ad28c8ea25deb1c14f7d2cdc6 + 2e9237134898be1ad28c8ea25deb1c14f7d2cdc6 RHEL: disable io_uring support -"https://gitlab.com/cki-project/kernel-ark/-/commit"/46f41fa0448229c32bbc60d3c8ef50d22c33b117 - 46f41fa0448229c32bbc60d3c8ef50d22c33b117 rtc: cmos: Use ACPI alarm for non-Intel x86 systems too +"https://gitlab.com/cki-project/kernel-ark/-/commit"/670907cf282993feb5c27387e485baabbed3f82d + 670907cf282993feb5c27387e485baabbed3f82d REDHAT: coresight: etm4x: Disable coresight on HPE Apollo 70 -"https://gitlab.com/cki-project/kernel-ark/-/commit"/84c68fe1f91beef8b25ca2202d3581260447b334 - 84c68fe1f91beef8b25ca2202d3581260447b334 drivers/firmware: skip simpledrm if nvidia-drm.modeset=1 is set +"https://gitlab.com/cki-project/kernel-ark/-/commit"/4cc4347e9085cb8d6b5b5e203e76737a909bfd6d + 4cc4347e9085cb8d6b5b5e203e76737a909bfd6d KEYS: Make use of platform keyring for module signature verify -"https://gitlab.com/cki-project/kernel-ark/-/commit"/51d40b1c54cf09e93d42dc0d090765016362d692 - 51d40b1c54cf09e93d42dc0d090765016362d692 scsi: sd: Add "probe_type" module parameter to allow synchronous probing +"https://gitlab.com/cki-project/kernel-ark/-/commit"/d818fe69a863c9c9b32bf0afc0fc9ea1a13d1e03 + d818fe69a863c9c9b32bf0afc0fc9ea1a13d1e03 Input: rmi4 - remove the need for artificial IRQ in case of HID -"https://gitlab.com/cki-project/kernel-ark/-/commit"/6e9d8352504d3de95ebdff5289e2da6e93b90767 - 6e9d8352504d3de95ebdff5289e2da6e93b90767 Enable IO_URING for RHEL +"https://gitlab.com/cki-project/kernel-ark/-/commit"/54bcbdc93a456a037372861f3c305001d19c1380 + 54bcbdc93a456a037372861f3c305001d19c1380 ARM: tegra: usb no reset -"https://gitlab.com/cki-project/kernel-ark/-/commit"/e65e1c9cfce51744f3ccce9ede248e74a6e7bb5c - e65e1c9cfce51744f3ccce9ede248e74a6e7bb5c redhat: version two of Makefile.rhelver tweaks +"https://gitlab.com/cki-project/kernel-ark/-/commit"/d53b41ea053ea5dcbc6410262e64e06baa756ab8 + d53b41ea053ea5dcbc6410262e64e06baa756ab8 s390: Lock down the kernel when the IPL secure flag is set -"https://gitlab.com/cki-project/kernel-ark/-/commit"/97edcc85a591ed63b65fa583a1593e379ec779a0 - 97edcc85a591ed63b65fa583a1593e379ec779a0 redhat: adapt to upstream Makefile change +"https://gitlab.com/cki-project/kernel-ark/-/commit"/0a47e98dff708b27a6f92034258fe7b4f53a0707 + 0a47e98dff708b27a6f92034258fe7b4f53a0707 efi: Lock down the kernel if booted in secure boot mode -"https://gitlab.com/cki-project/kernel-ark/-/commit"/f0274138f93a5de8d5757dc1310a51fd9adae739 - f0274138f93a5de8d5757dc1310a51fd9adae739 Change acpi_bus_get_acpi_device to acpi_get_acpi_dev +"https://gitlab.com/cki-project/kernel-ark/-/commit"/920dfefdd06948d8741fa4846ddc9e35cd50ce65 + 920dfefdd06948d8741fa4846ddc9e35cd50ce65 efi: Add an EFI_SECURE_BOOT flag to indicate secure boot mode -"https://gitlab.com/cki-project/kernel-ark/-/commit"/b18359baab10beb33452ec4bac68a25db0ac2531 - b18359baab10beb33452ec4bac68a25db0ac2531 RHEL: disable io_uring support +"https://gitlab.com/cki-project/kernel-ark/-/commit"/b459dd558e0197eb8dd6ca0ebd7f06945f480e78 + b459dd558e0197eb8dd6ca0ebd7f06945f480e78 security: lockdown: expose a hook to lock the kernel down -"https://gitlab.com/cki-project/kernel-ark/-/commit"/e86246f6bff2cae6ed0728cb9855dc321aa22442 - e86246f6bff2cae6ed0728cb9855dc321aa22442 REDHAT: coresight: etm4x: Disable coresight on HPE Apollo 70 +"https://gitlab.com/cki-project/kernel-ark/-/commit"/872668749f5ac08e01bcb2ddf0f33ac935793aac + 872668749f5ac08e01bcb2ddf0f33ac935793aac Make get_cert_list() use efi_status_to_str() to print error messages. -"https://gitlab.com/cki-project/kernel-ark/-/commit"/780e15df6bfda4a86de773b5a76348845cd287e2 - 780e15df6bfda4a86de773b5a76348845cd287e2 KEYS: Make use of platform keyring for module signature verify +"https://gitlab.com/cki-project/kernel-ark/-/commit"/485d3acca52183cb0cd8dc62413d83b8ca6d1be9 + 485d3acca52183cb0cd8dc62413d83b8ca6d1be9 Add efi_status_to_str() and rework efi_status_to_err(). -"https://gitlab.com/cki-project/kernel-ark/-/commit"/e16c46de5e538011a405f267e0591a03fe4434f1 - e16c46de5e538011a405f267e0591a03fe4434f1 Input: rmi4 - remove the need for artificial IRQ in case of HID +"https://gitlab.com/cki-project/kernel-ark/-/commit"/f28b90cbf277b2ae8b2585fbd453dfa0d69ae53d + f28b90cbf277b2ae8b2585fbd453dfa0d69ae53d iommu/arm-smmu: workaround DMA mode issues -"https://gitlab.com/cki-project/kernel-ark/-/commit"/cefdb4374d26857e1d90cdd35936f219693dff11 - cefdb4374d26857e1d90cdd35936f219693dff11 ARM: tegra: usb no reset +"https://gitlab.com/cki-project/kernel-ark/-/commit"/faf72983b9f33a9ffd8b230496d2314e9b9a826f + faf72983b9f33a9ffd8b230496d2314e9b9a826f ipmi: do not configure ipmi for HPE m400 -"https://gitlab.com/cki-project/kernel-ark/-/commit"/161fba6e6557f41e7d2e1be2d4300aac25894c22 - 161fba6e6557f41e7d2e1be2d4300aac25894c22 s390: Lock down the kernel when the IPL secure flag is set +"https://gitlab.com/cki-project/kernel-ark/-/commit"/0088681628eecc37b983514b7aa099cb1ff4ce2c + 0088681628eecc37b983514b7aa099cb1ff4ce2c kABI: Add generic kABI macros to use for kABI workarounds -"https://gitlab.com/cki-project/kernel-ark/-/commit"/f23df5d91bbc852ed9a289c88b478b5890ff3aff - f23df5d91bbc852ed9a289c88b478b5890ff3aff efi: Lock down the kernel if booted in secure boot mode +"https://gitlab.com/cki-project/kernel-ark/-/commit"/eb7070ab5324076a14c90f71f6ed80c6d186aa90 + eb7070ab5324076a14c90f71f6ed80c6d186aa90 ahci: thunderx2: Fix for errata that affects stop engine -"https://gitlab.com/cki-project/kernel-ark/-/commit"/388c5040283f7748c06961a807ab82960cfac7b2 - 388c5040283f7748c06961a807ab82960cfac7b2 efi: Add an EFI_SECURE_BOOT flag to indicate secure boot mode +"https://gitlab.com/cki-project/kernel-ark/-/commit"/12b80aa03dd87334c9fdbda6a93bd2359a5cf15a + 12b80aa03dd87334c9fdbda6a93bd2359a5cf15a Vulcan: AHCI PCI bar fix for Broadcom Vulcan early silicon -"https://gitlab.com/cki-project/kernel-ark/-/commit"/e81d15d326fecd1c90d82b2acb9bdb259b4033ac - e81d15d326fecd1c90d82b2acb9bdb259b4033ac security: lockdown: expose a hook to lock the kernel down +"https://gitlab.com/cki-project/kernel-ark/-/commit"/59521420421d2bd7341dbcea94bf2c756f95fcac + 59521420421d2bd7341dbcea94bf2c756f95fcac tags.sh: Ignore redhat/rpm -"https://gitlab.com/cki-project/kernel-ark/-/commit"/1297962689c5c00929be45b6261ab21f0e5de41c - 1297962689c5c00929be45b6261ab21f0e5de41c Make get_cert_list() use efi_status_to_str() to print error messages. +"https://gitlab.com/cki-project/kernel-ark/-/commit"/d5ab5c4e27530f7a54b6aa9a581d3d45bb9a5b16 + d5ab5c4e27530f7a54b6aa9a581d3d45bb9a5b16 put RHEL info into generated headers -"https://gitlab.com/cki-project/kernel-ark/-/commit"/2b290761ed33270b9f8fea815c9f29476ead5d5d - 2b290761ed33270b9f8fea815c9f29476ead5d5d Add efi_status_to_str() and rework efi_status_to_err(). +"https://gitlab.com/cki-project/kernel-ark/-/commit"/dc74674e8fced0408caaad1c59abc890ecb8d6f2 + dc74674e8fced0408caaad1c59abc890ecb8d6f2 aarch64: acpi scan: Fix regression related to X-Gene UARTs -"https://gitlab.com/cki-project/kernel-ark/-/commit"/4f23de5adca7e96a6bb3abc9f7e0546b997c8ea0 - 4f23de5adca7e96a6bb3abc9f7e0546b997c8ea0 iommu/arm-smmu: workaround DMA mode issues +"https://gitlab.com/cki-project/kernel-ark/-/commit"/9f7d0c67a3cd2e80b31258a5af096e7ab5d00ea3 + 9f7d0c67a3cd2e80b31258a5af096e7ab5d00ea3 ACPI / irq: Workaround firmware issue on X-Gene based m400 -"https://gitlab.com/cki-project/kernel-ark/-/commit"/3c9be29ba9986f465b7c8fc6e391978833ffac22 - 3c9be29ba9986f465b7c8fc6e391978833ffac22 ipmi: do not configure ipmi for HPE m400 +"https://gitlab.com/cki-project/kernel-ark/-/commit"/46a9c575bbde2341f1f550fb8f479935673035f8 + 46a9c575bbde2341f1f550fb8f479935673035f8 modules: add rhelversion MODULE_INFO tag -"https://gitlab.com/cki-project/kernel-ark/-/commit"/f9bd4dd2a3e0ce4fc91eea39c747a2b06ac8852c - f9bd4dd2a3e0ce4fc91eea39c747a2b06ac8852c kABI: Add generic kABI macros to use for kABI workarounds +"https://gitlab.com/cki-project/kernel-ark/-/commit"/bc4896017cc68caa5bd7ead6a06b075b2e17c0e8 + bc4896017cc68caa5bd7ead6a06b075b2e17c0e8 ACPI: APEI: arm64: Ignore broken HPE moonshot APEI support -"https://gitlab.com/cki-project/kernel-ark/-/commit"/0a826069d941f0249fa44005fbc6511875553497 - 0a826069d941f0249fa44005fbc6511875553497 ahci: thunderx2: Fix for errata that affects stop engine +"https://gitlab.com/cki-project/kernel-ark/-/commit"/0e21b15103c452cf6cd2afe831e32b9ceb6de255 + 0e21b15103c452cf6cd2afe831e32b9ceb6de255 Pull the RHEL version defines out of the Makefile -"https://gitlab.com/cki-project/kernel-ark/-/commit"/b156077e6f7cdf9bc390551e7b65b80d1d5e285d - b156077e6f7cdf9bc390551e7b65b80d1d5e285d Vulcan: AHCI PCI bar fix for Broadcom Vulcan early silicon - -"https://gitlab.com/cki-project/kernel-ark/-/commit"/129dc65edece93e256a9c79b3e1f962fc7074406 - 129dc65edece93e256a9c79b3e1f962fc7074406 tags.sh: Ignore redhat/rpm - -"https://gitlab.com/cki-project/kernel-ark/-/commit"/7558a6cc3029f6cce7cb79ad0ab6348fa1083bb1 - 7558a6cc3029f6cce7cb79ad0ab6348fa1083bb1 put RHEL info into generated headers - -"https://gitlab.com/cki-project/kernel-ark/-/commit"/fabdc6fc464674d789063463c8a9abe954ed9f0e - fabdc6fc464674d789063463c8a9abe954ed9f0e aarch64: acpi scan: Fix regression related to X-Gene UARTs - -"https://gitlab.com/cki-project/kernel-ark/-/commit"/3332716406fc60dbfbe218275c546081215fd4b3 - 3332716406fc60dbfbe218275c546081215fd4b3 ACPI / irq: Workaround firmware issue on X-Gene based m400 - -"https://gitlab.com/cki-project/kernel-ark/-/commit"/ad67ea7af8f6e47745455046d807a8cf0b4e3864 - ad67ea7af8f6e47745455046d807a8cf0b4e3864 modules: add rhelversion MODULE_INFO tag - -"https://gitlab.com/cki-project/kernel-ark/-/commit"/4a9e157a9522218c86b14c2f91423dc8c6cc32ed - 4a9e157a9522218c86b14c2f91423dc8c6cc32ed ACPI: APEI: arm64: Ignore broken HPE moonshot APEI support - -"https://gitlab.com/cki-project/kernel-ark/-/commit"/dd03abf0c6a7dde42a4f540f782c67d81319118d - dd03abf0c6a7dde42a4f540f782c67d81319118d Pull the RHEL version defines out of the Makefile - -"https://gitlab.com/cki-project/kernel-ark/-/commit"/866075bfad9aeeb466f2516e071428e9ac5aafaa - 866075bfad9aeeb466f2516e071428e9ac5aafaa [initial commit] Add Red Hat variables in the top level makefile +"https://gitlab.com/cki-project/kernel-ark/-/commit"/1993198591da4482b9721dec18306b6d2c556e17 + 1993198591da4482b9721dec18306b6d2c556e17 [initial commit] Add Red Hat variables in the top level makefile diff --git a/SOURCES/amdgpu-HAINAN-variant-fixup.patch b/SOURCES/amdgpu-HAINAN-variant-fixup.patch new file mode 100644 index 0000000..43075e0 --- /dev/null +++ b/SOURCES/amdgpu-HAINAN-variant-fixup.patch @@ -0,0 +1,34 @@ +diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c +index fbf968e3f6d7..8afc4fa73101 100644 +--- a/drivers/gpu/drm/radeon/si_dpm.c ++++ b/drivers/gpu/drm/radeon/si_dpm.c +@@ -2959,9 +2959,11 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, + if (rdev->family == CHIP_HAINAN) { + if ((rdev->pdev->revision == 0x81) || + (rdev->pdev->revision == 0xC3) || ++ (rdev->pdev->device == 0x6660) || + (rdev->pdev->device == 0x6664) || + (rdev->pdev->device == 0x6665) || +- (rdev->pdev->device == 0x6667)) { ++ (rdev->pdev->device == 0x6667) || ++ (rdev->pdev->device == 0x666F)) { + max_sclk = 75000; + } + if ((rdev->pdev->revision == 0xC3) || +diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +index 02e69ccff3ba..b9a60851d799 100644 +--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c ++++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +@@ -3435,9 +3435,11 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, + if (adev->asic_type == CHIP_HAINAN) { + if ((adev->pdev->revision == 0x81) || + (adev->pdev->revision == 0xC3) || ++ (adev->pdev->device == 0x6660) || + (adev->pdev->device == 0x6664) || + (adev->pdev->device == 0x6665) || +- (adev->pdev->device == 0x6667)) { ++ (adev->pdev->device == 0x6667) || ++ (adev->pdev->device == 0x666F)) { + max_sclk = 75000; + } + if ((adev->pdev->revision == 0xC3) || diff --git a/SOURCES/asus-linux.patch b/SOURCES/asus-linux.patch index 6caf775..acdc4a3 100644 --- a/SOURCES/asus-linux.patch +++ b/SOURCES/asus-linux.patch @@ -36,14 +36,14 @@ diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_pr index c9eb70290..2b8f8fd52 100644 --- a/sound/pci/hda/cs35l41_hda_property.c +++ b/sound/pci/hda/cs35l41_hda_property.c -@@ -57,6 +57,7 @@ static const struct cs35l41_config cs35l41_config_table[] = { - { "104316D3", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, - { "104316F3", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, - { "104317F3", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, -+ { "10431B93", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, - { "10431863", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, - { "104318D3", I2C, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, - { "10431C9F", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, +@@ -79,6 +79,7 @@ + { "104316D3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, + { "104316F3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, + { "104317F3", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, ++ { "10431B93", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, + { "10431863", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, + { "104318D3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, + { "10431C9F", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, @@ -360,6 +361,7 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = { { "CSC3551", "104316D3", generic_dsd_config }, { "CSC3551", "104316F3", generic_dsd_config }, @@ -55,401 +55,3 @@ index c9eb70290..2b8f8fd52 100644 -- 2.41.0 - -From b35a4c957b3f0e5b4c7c73dec4fe3a5b9dbc4873 Mon Sep 17 00:00:00 2001 -From: "Luke D. Jones" -Date: Sun, 30 Apr 2023 10:56:34 +1200 -Subject: [PATCH v6 1/1] platform/x86: asus-wmi: add support for ASUS screenpad - -Add support for the WMI methods used to turn off and adjust the -brightness of the secondary "screenpad" device found on some high-end -ASUS laptops like the GX650P series and others. - -There are some small quirks with this device when considering only the -raw WMI methods: -1. The Off method can only switch the device off -2. Changing the brightness turns the device back on -3. To turn the device back on the brightness must be > 1 -4. When the device is off the brightness can't be changed (so it is - stored by the driver if device is off). -5. Booting with a value of 0 brightness (retained by bios) means the bios - will set a value of >0 <15 -6. When the device is off it is "unplugged" - -asus_wmi sets the minimum brightness as 20 in general use, and 60 for -booting with values <= min. - -The ACPI methods are used in a new backlight device named asus_screenpad. - -Signed-off-by: Luke D. Jones ---- - drivers/platform/x86/asus-wmi.c | 133 +++++++++++++++++++++ - drivers/platform/x86/asus-wmi.h | 1 + - include/linux/platform_data/x86/asus-wmi.h | 4 + - 3 files changed, 138 insertions(+) - -diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c -index f54178d6f780..0b13be703856 100644 ---- a/drivers/platform/x86/asus-wmi.c -+++ b/drivers/platform/x86/asus-wmi.c -@@ -25,6 +25,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -127,6 +128,10 @@ module_param(fnlock_default, bool, 0444); - #define NVIDIA_TEMP_MIN 75 - #define NVIDIA_TEMP_MAX 87 - -+#define ASUS_SCREENPAD_BRIGHT_MIN 20 -+#define ASUS_SCREENPAD_BRIGHT_MAX 255 -+#define ASUS_SCREENPAD_BRIGHT_DEFAULT 60 -+ - static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; - - static int throttle_thermal_policy_write(struct asus_wmi *); -@@ -212,6 +217,7 @@ struct asus_wmi { - - struct input_dev *inputdev; - struct backlight_device *backlight_device; -+ struct backlight_device *screenpad_backlight_device; - struct platform_device *platform_device; - - struct led_classdev wlan_led; -@@ -3776,6 +3782,124 @@ static int is_display_toggle(int code) - return 0; - } - -+/* Screenpad backlight *******************************************************/ -+ -+static int read_screenpad_backlight_power(struct asus_wmi *asus) -+{ -+ int ret; -+ -+ ret = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_SCREENPAD_POWER); -+ if (ret < 0) -+ return ret; -+ /* 1 == powered */ -+ return ret ? FB_BLANK_UNBLANK : FB_BLANK_POWERDOWN; -+} -+ -+static int read_screenpad_brightness(struct backlight_device *bd) -+{ -+ struct asus_wmi *asus = bl_get_data(bd); -+ u32 retval; -+ int err; -+ -+ err = read_screenpad_backlight_power(asus); -+ if (err < 0) -+ return err; -+ /* The device brightness can only be read if powered, so return stored */ -+ if (err == FB_BLANK_POWERDOWN) -+ return asus->driver->screenpad_brightness - ASUS_SCREENPAD_BRIGHT_MIN; -+ -+ err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_SCREENPAD_LIGHT, &retval); -+ if (err < 0) -+ return err; -+ -+ return (retval & ASUS_WMI_DSTS_BRIGHTNESS_MASK) - ASUS_SCREENPAD_BRIGHT_MIN; -+} -+ -+static int update_screenpad_bl_status(struct backlight_device *bd) -+{ -+ struct asus_wmi *asus = bl_get_data(bd); -+ int power, err = 0; -+ u32 ctrl_param; -+ -+ power = read_screenpad_backlight_power(asus); -+ if (power < 0) -+ return power; -+ -+ if (bd->props.power != power) { -+ if (power != FB_BLANK_UNBLANK) { -+ /* Only brightness > 0 can power it back on */ -+ ctrl_param = asus->driver->screenpad_brightness - ASUS_SCREENPAD_BRIGHT_MIN; -+ err = asus_wmi_set_devstate(ASUS_WMI_DEVID_SCREENPAD_LIGHT, -+ ctrl_param, NULL); -+ } else { -+ err = asus_wmi_set_devstate(ASUS_WMI_DEVID_SCREENPAD_POWER, 0, NULL); -+ } -+ } else if (power == FB_BLANK_UNBLANK) { -+ /* Only set brightness if powered on or we get invalid/unsync state */ -+ ctrl_param = bd->props.brightness + ASUS_SCREENPAD_BRIGHT_MIN; -+ err = asus_wmi_set_devstate(ASUS_WMI_DEVID_SCREENPAD_LIGHT, ctrl_param, NULL); -+ } -+ -+ /* Ensure brightness is stored to turn back on with */ -+ if (err == 0) -+ asus->driver->screenpad_brightness = bd->props.brightness + ASUS_SCREENPAD_BRIGHT_MIN; -+ -+ return err; -+} -+ -+static const struct backlight_ops asus_screenpad_bl_ops = { -+ .get_brightness = read_screenpad_brightness, -+ .update_status = update_screenpad_bl_status, -+ .options = BL_CORE_SUSPENDRESUME, -+}; -+ -+static int asus_screenpad_init(struct asus_wmi *asus) -+{ -+ struct backlight_device *bd; -+ struct backlight_properties props; -+ int err, power; -+ int brightness = 0; -+ -+ power = read_screenpad_backlight_power(asus); -+ if (power < 0) -+ return power; -+ -+ if (power != FB_BLANK_POWERDOWN) { -+ err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_SCREENPAD_LIGHT, &brightness); -+ if (err < 0) -+ return err; -+ } -+ /* default to an acceptable min brightness on boot if too low */ -+ if (brightness < ASUS_SCREENPAD_BRIGHT_MIN) -+ brightness = ASUS_SCREENPAD_BRIGHT_DEFAULT; -+ -+ memset(&props, 0, sizeof(struct backlight_properties)); -+ props.type = BACKLIGHT_RAW; /* ensure this bd is last to be picked */ -+ props.max_brightness = ASUS_SCREENPAD_BRIGHT_MAX - ASUS_SCREENPAD_BRIGHT_MIN; -+ bd = backlight_device_register("asus_screenpad", -+ &asus->platform_device->dev, asus, -+ &asus_screenpad_bl_ops, &props); -+ if (IS_ERR(bd)) { -+ pr_err("Could not register backlight device\n"); -+ return PTR_ERR(bd); -+ } -+ -+ asus->screenpad_backlight_device = bd; -+ asus->driver->screenpad_brightness = brightness; -+ bd->props.brightness = brightness; -+ bd->props.power = power; -+ backlight_update_status(bd); -+ -+ return 0; -+} -+ -+static void asus_screenpad_exit(struct asus_wmi *asus) -+{ -+ backlight_device_unregister(asus->screenpad_backlight_device); -+ -+ asus->screenpad_backlight_device = NULL; -+} -+ - /* Fn-lock ********************************************************************/ - - static bool asus_wmi_has_fnlock_key(struct asus_wmi *asus) -@@ -4431,6 +4555,12 @@ static int asus_wmi_add(struct platform_device *pdev) - } else if (asus->driver->quirks->wmi_backlight_set_devstate) - err = asus_wmi_set_devstate(ASUS_WMI_DEVID_BACKLIGHT, 2, NULL); - -+ if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_SCREENPAD_LIGHT)) { -+ err = asus_screenpad_init(asus); -+ if (err && err != -ENODEV) -+ goto fail_screenpad; -+ } -+ - if (asus_wmi_has_fnlock_key(asus)) { - asus->fnlock_locked = fnlock_default; - asus_wmi_fnlock_update(asus); -@@ -4454,6 +4584,8 @@ static int asus_wmi_add(struct platform_device *pdev) - asus_wmi_backlight_exit(asus); - fail_backlight: - asus_wmi_rfkill_exit(asus); -+fail_screenpad: -+ asus_screenpad_exit(asus); - fail_rfkill: - asus_wmi_led_exit(asus); - fail_leds: -@@ -4481,6 +4481,7 @@ - i8042_remove_filter(asus->driver->quirks->i8042_filter); - wmi_remove_notify_handler(asus->driver->event_guid); - asus_wmi_backlight_exit(asus); -+ asus_screenpad_exit(asus); - asus_wmi_input_exit(asus); - asus_wmi_led_exit(asus); - asus_wmi_rfkill_exit(asus); -diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h -index a478ebfd34df..5fbdd0eafa02 100644 ---- a/drivers/platform/x86/asus-wmi.h -+++ b/drivers/platform/x86/asus-wmi.h -@@ -57,6 +57,7 @@ struct quirk_entry { - struct asus_wmi_driver { - int brightness; - int panel_power; -+ int screenpad_brightness; - int wlan_ctrl_by_user; - - const char *name; -diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h -index 16e99a1c37fc..63e630276499 100644 ---- a/include/linux/platform_data/x86/asus-wmi.h -+++ b/include/linux/platform_data/x86/asus-wmi.h -@@ -58,6 +58,10 @@ - #define ASUS_WMI_DEVID_KBD_BACKLIGHT 0x00050021 - #define ASUS_WMI_DEVID_LIGHT_SENSOR 0x00050022 /* ?? */ - #define ASUS_WMI_DEVID_LIGHTBAR 0x00050025 -+/* This can only be used to disable the screen, not re-enable */ -+#define ASUS_WMI_DEVID_SCREENPAD_POWER 0x00050031 -+/* Writing a brightness re-enables the screen if disabled */ -+#define ASUS_WMI_DEVID_SCREENPAD_LIGHT 0x00050032 - #define ASUS_WMI_DEVID_FAN_BOOST_MODE 0x00110018 - #define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY 0x00120075 - --- -2.41.0 - -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Luke D. Jones" -Date: Mon, 27 Nov 2023 12:05:21 +1300 -Subject: [PATCH] platform/x86: asus-wmi: disable USB0 hub on ROG Ally before - suspend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -ASUS have worked around an issue in XInput where it doesn't support USB -selective suspend, which causes suspend issues in Windows. They worked -around this by adjusting the MCU firmware to disable the USB0 hub when -the screen is switched off during the Microsoft DSM suspend path in ACPI. - -The issue we have with this however is one of timing - the call the tells -the MCU to this isn't able to complete before suspend is done so we call -this in a prepare() and add a small msleep() to ensure it is done. This -must be done before the screen is switched off to prevent a variety of -possible races. - -Further to this the MCU powersave option must also be disabled as it can -cause a number of issues such as: -- unreliable resume connection of N-Key -- complete loss of N-Key if the power is plugged in while suspended -Disabling the powersave option prevents this. - -Without this the MCU is unable to initialise itself correctly on resume. - -Signed-off-by: "Luke D. Jones" -Tested-by: Philip Mueller -Reviewed-by: Hans de Goede -Link: https://lore.kernel.org/r/20231126230521.125708-2-luke@ljones.dev -Signed-off-by: Ilpo Järvinen -Signed-off-by: Jan200101 ---- - drivers/platform/x86/asus-wmi.c | 50 ++++++++++++++++++++++ - include/linux/platform_data/x86/asus-wmi.h | 3 ++ - 2 files changed, 53 insertions(+) - -diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c -index ca668cf04020..9f7e23c5c6b4 100644 ---- a/drivers/platform/x86/asus-wmi.c -+++ b/drivers/platform/x86/asus-wmi.c -@@ -16,6 +16,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -132,6 +133,11 @@ module_param(fnlock_default, bool, 0444); - #define ASUS_SCREENPAD_BRIGHT_MAX 255 - #define ASUS_SCREENPAD_BRIGHT_DEFAULT 60 - -+/* Controls the power state of the USB0 hub on ROG Ally which input is on */ -+#define ASUS_USB0_PWR_EC0_CSEE "\\_SB.PCI0.SBRG.EC0.CSEE" -+/* 300ms so far seems to produce a reliable result on AC and battery */ -+#define ASUS_USB0_PWR_EC0_CSEE_WAIT 300 -+ - static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; - - static int throttle_thermal_policy_write(struct asus_wmi *); -@@ -300,6 +306,9 @@ struct asus_wmi { - - bool fnlock_locked; - -+ /* The ROG Ally device requires the MCU USB device be disconnected before suspend */ -+ bool ally_mcu_usb_switch; -+ - struct asus_wmi_debug debug; - - struct asus_wmi_driver *driver; -@@ -4488,6 +4497,8 @@ static int asus_wmi_add(struct platform_device *pdev) - asus->nv_temp_tgt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_THERM_TARGET); - asus->panel_overdrive_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PANEL_OD); - asus->mini_led_mode_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE); -+ asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) -+ && dmi_match(DMI_BOARD_NAME, "RC71L"); - - err = fan_boost_mode_check_present(asus); - if (err) -@@ -4662,6 +4673,43 @@ static int asus_hotk_resume(struct device *device) - asus_wmi_fnlock_update(asus); - - asus_wmi_tablet_mode_get_state(asus); -+ -+ return 0; -+} -+ -+static int asus_hotk_resume_early(struct device *device) -+{ -+ struct asus_wmi *asus = dev_get_drvdata(device); -+ -+ if (asus->ally_mcu_usb_switch) { -+ if (ACPI_FAILURE(acpi_execute_simple_method(NULL, ASUS_USB0_PWR_EC0_CSEE, 0xB8))) -+ dev_err(device, "ROG Ally MCU failed to connect USB dev\n"); -+ else -+ msleep(ASUS_USB0_PWR_EC0_CSEE_WAIT); -+ } -+ return 0; -+} -+ -+static int asus_hotk_prepare(struct device *device) -+{ -+ struct asus_wmi *asus = dev_get_drvdata(device); -+ int result, err; -+ -+ if (asus->ally_mcu_usb_switch) { -+ /* When powersave is enabled it causes many issues with resume of USB hub */ -+ result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_MCU_POWERSAVE); -+ if (result == 1) { -+ dev_warn(device, "MCU powersave enabled, disabling to prevent resume issues"); -+ err = asus_wmi_set_devstate(ASUS_WMI_DEVID_MCU_POWERSAVE, 0, &result); -+ if (err || result != 1) -+ dev_err(device, "Failed to set MCU powersave mode: %d\n", err); -+ } -+ /* sleep required to ensure USB0 is disabled before sleep continues */ -+ if (ACPI_FAILURE(acpi_execute_simple_method(NULL, ASUS_USB0_PWR_EC0_CSEE, 0xB7))) -+ dev_err(device, "ROG Ally MCU failed to disconnect USB dev\n"); -+ else -+ msleep(ASUS_USB0_PWR_EC0_CSEE_WAIT); -+ } - return 0; - } - -@@ -4709,6 +4757,8 @@ static const struct dev_pm_ops asus_pm_ops = { - .thaw = asus_hotk_thaw, - .restore = asus_hotk_restore, - .resume = asus_hotk_resume, -+ .resume_early = asus_hotk_resume_early, -+ .prepare = asus_hotk_prepare, - }; - - /* Registration ***************************************************************/ -diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h -index 63e630276499..ab1c7deff118 100644 ---- a/include/linux/platform_data/x86/asus-wmi.h -+++ b/include/linux/platform_data/x86/asus-wmi.h -@@ -114,6 +114,9 @@ - /* Charging mode - 1=Barrel, 2=USB */ - #define ASUS_WMI_DEVID_CHARGE_MODE 0x0012006C - -+/* MCU powersave mode */ -+#define ASUS_WMI_DEVID_MCU_POWERSAVE 0x001200E2 -+ - /* epu is connected? 1 == true */ - #define ASUS_WMI_DEVID_EGPU_CONNECTED 0x00090018 - /* egpu on/off */ diff --git a/SOURCES/dracut-virt.conf b/SOURCES/dracut-virt.conf index 3724026..c639fda 100644 --- a/SOURCES/dracut-virt.conf +++ b/SOURCES/dracut-virt.conf @@ -14,6 +14,12 @@ dracutmodules+=" dm lvm rootfs-block fs-lib " # modules: tpm and crypto dracutmodules+=" crypt crypt-loop tpm2-tss " +# modules: support root on virtiofs +dracutmodules+=" virtiofs " + +# modules: use sysext images (see 'man systemd-sysext') +dracutmodules+=" systemd-sysext " + # drivers: virtual buses, pci drivers+=" virtio-pci virtio-mmio " # qemu-kvm drivers+=" hv-vmbus pci-hyperv " # hyperv diff --git a/SOURCES/filter-modules.sh.fedora b/SOURCES/filter-modules.sh.fedora index c14a790..7ef7614 100755 --- a/SOURCES/filter-modules.sh.fedora +++ b/SOURCES/filter-modules.sh.fedora @@ -45,7 +45,7 @@ netprots="6lowpan appletalk atm ax25 batman-adv bluetooth can dsa ieee802154 l2t drmdrvs="amd ast bridge gma500 i2c i915 mgag200 nouveau panel radeon" -singlemods="ntb_netdev iscsi_ibft iscsi_boot_sysfs megaraid pmcraid qedi qla1280 9pnet_rdma rpcrdma nvmet-rdma nvme-rdma hid-picolcd hid-prodikeys hwpoison-inject target_core_user sbp_target cxgbit chcr parport_serial regmap-sdw regmap-sdw-mbq arizona-micsupp hid-asus iTCO_wdt rnbd-client rnbd-server mlx5_vdpa spi-altera-dfl nct6775 hid-playstation hid-nintendo asus_wmi_sensors asus_wmi_ec_sensors mlx5-vfio-pci video int3406_thermal apple_bl ptp_dfl_tod intel-m10-bmc-hwmon intel_rapl_tpmi pds_vdpa hp-wmi-sensors pds-vfio-pci" +singlemods="ntb_netdev iscsi_ibft iscsi_boot_sysfs megaraid pmcraid qedi qla1280 9pnet_rdma rpcrdma nvmet-rdma nvme-rdma hid-picolcd hid-prodikeys hwpoison-inject target_core_user sbp_target cxgbit chcr parport_serial regmap-sdw regmap-sdw-mbq arizona-micsupp hid-asus iTCO_wdt rnbd-client rnbd-server mlx5_vdpa spi-altera-dfl nct6775 hid-playstation hid-nintendo asus_wmi_sensors asus_wmi_ec_sensors mlx5-vfio-pci video int3406_thermal apple_bl ptp_dfl_tod intel-m10-bmc-hwmon intel_rapl_tpmi pds_vdpa hp-wmi-sensors pds-vfio-pci gpio-ljca spi-ljca i2c-ljca" # Grab the arch-specific filter list overrides source ./filter-$2.sh @@ -84,6 +84,9 @@ filter_ko() { return 0 } +# HACK: move surface_fan and surface_temp to kernel-modules +singlemods="${singlemods} surface_fan surface_temp" + # Filter the drivers/ subsystems for subsys in ${driverdirs} do diff --git a/SOURCES/kernel-aarch64-16k-debug-fedora.config b/SOURCES/kernel-aarch64-16k-debug-fedora.config index 83c340e..990ffa6 100644 --- a/SOURCES/kernel-aarch64-16k-debug-fedora.config +++ b/SOURCES/kernel-aarch64-16k-debug-fedora.config @@ -254,6 +254,7 @@ CONFIG_AMD_XGBE_DCB=y CONFIG_AMD_XGBE=m # CONFIG_AMIGA_PARTITION is not set CONFIG_AMLOGIC_THERMAL=m +CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y CONFIG_AMT=m CONFIG_ANDROID_BINDER_DEVICES="binder,hwbinder,vndbinder" @@ -329,12 +330,33 @@ CONFIG_ARCH_MXC=y CONFIG_ARCH_NR_GPIO=2048 CONFIG_ARCH_NXP=y # CONFIG_ARCH_OMAP1 is not set +# CONFIG_ARCH_PENSANDO is not set # CONFIG_ARCH_PXA is not set CONFIG_ARCH_QCOM=y +CONFIG_ARCH_R8A774A1=y +# CONFIG_ARCH_R8A774B1 is not set +# CONFIG_ARCH_R8A774C0 is not set +# CONFIG_ARCH_R8A774E1 is not set +# CONFIG_ARCH_R8A77951 is not set +# CONFIG_ARCH_R8A77960 is not set +# CONFIG_ARCH_R8A77961 is not set +# CONFIG_ARCH_R8A77965 is not set +# CONFIG_ARCH_R8A77970 is not set +# CONFIG_ARCH_R8A77980 is not set +# CONFIG_ARCH_R8A77990 is not set +# CONFIG_ARCH_R8A77995 is not set +# CONFIG_ARCH_R8A779A0 is not set +# CONFIG_ARCH_R8A779F0 is not set +# CONFIG_ARCH_R8A779G0 is not set +CONFIG_ARCH_R9A07G043=y +CONFIG_ARCH_R9A07G044=y +CONFIG_ARCH_R9A07G054=y +# CONFIG_ARCH_R9A08G045 is not set +# CONFIG_ARCH_R9A09G011 is not set CONFIG_ARCH_RANDOM=y # CONFIG_ARCH_RDA is not set # CONFIG_ARCH_REALTEK is not set -# CONFIG_ARCH_RENESAS is not set +CONFIG_ARCH_RENESAS=y CONFIG_ARCH_ROCKCHIP=y # CONFIG_ARCH_S32 is not set # CONFIG_ARCH_SA1100 is not set @@ -396,6 +418,7 @@ CONFIG_ARM64_ERRATUM_2457168=y CONFIG_ARM64_ERRATUM_2645198=y CONFIG_ARM64_ERRATUM_2658417=y CONFIG_ARM64_ERRATUM_2966298=y +CONFIG_ARM64_ERRATUM_3117295=y CONFIG_ARM64_ERRATUM_819472=y CONFIG_ARM64_ERRATUM_824069=y CONFIG_ARM64_ERRATUM_826319=y @@ -484,6 +507,7 @@ CONFIG_ARM_RASPBERRYPI_CPUFREQ=m CONFIG_ARM_RK3399_DMC_DEVFREQ=m CONFIG_ARM_SBSA_WATCHDOG=m CONFIG_ARM_SCMI_CPUFREQ=m +CONFIG_ARM_SCMI_PERF_DOMAIN=y CONFIG_ARM_SCMI_POWERCAP=m CONFIG_ARM_SCMI_POWER_CONTROL=m CONFIG_ARM_SCMI_POWER_DOMAIN=m @@ -531,7 +555,7 @@ CONFIG_ATA_ACPI=y CONFIG_ATA_BMDMA=y CONFIG_ATA_FORCE=y CONFIG_ATA_GENERIC=m -# CONFIG_ATALK is not set +CONFIG_ATALK=m CONFIG_ATA_OVER_ETH=m CONFIG_ATA_PIIX=y # CONFIG_ATARI_PARTITION is not set @@ -702,6 +726,7 @@ CONFIG_BATTERY_GAUGE_LTC2941=m CONFIG_BATTERY_MAX17040=m CONFIG_BATTERY_MAX17042=m # CONFIG_BATTERY_MAX1721X is not set +# CONFIG_BATTERY_PM8916_BMS_VM is not set CONFIG_BATTERY_QCOM_BATTMGR=m CONFIG_BATTERY_RT5033=m CONFIG_BATTERY_SAMSUNG_SDI=y @@ -715,6 +740,15 @@ CONFIG_BAYCOM_SER_HDX=m # CONFIG_BCACHE_ASYNC_REGISTRATION is not set # CONFIG_BCACHE_CLOSURES_DEBUG is not set # CONFIG_BCACHE_DEBUG is not set +CONFIG_BCACHEFS_DEBUG_TRANSACTIONS=y +CONFIG_BCACHEFS_DEBUG=y +# CONFIG_BCACHEFS_ERASURE_CODING is not set +CONFIG_BCACHEFS_FS=m +CONFIG_BCACHEFS_LOCK_TIME_STATS=y +# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set +CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_BCACHEFS_QUOTA=y +# CONFIG_BCACHEFS_TESTS is not set CONFIG_BCACHE=m CONFIG_BCM2711_THERMAL=m CONFIG_BCM2835_MBOX=y @@ -867,7 +901,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_BRCMSTB_L2_IRQ=y CONFIG_BRCM_TRACING=y CONFIG_BRCMUTIL=m @@ -980,7 +1013,6 @@ CONFIG_CADENCE_WATCHDOG=m # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -CONFIG_CAN_BXCAN=m CONFIG_CAN_CALC_BITTIMING=y CONFIG_CAN_CAN327=m # CONFIG_CAN_CC770 is not set @@ -1017,6 +1049,8 @@ CONFIG_CAN_NETLINK=y CONFIG_CAN_PEAK_PCIEFD=m CONFIG_CAN_PEAK_USB=m CONFIG_CAN_RAW=m +# CONFIG_CAN_RCAR_CANFD is not set +# CONFIG_CAN_RCAR is not set # CONFIG_CAN_SJA1000 is not set CONFIG_CAN_SLCAN=m # CONFIG_CAN_SOFTING is not set @@ -1075,6 +1109,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFI_CLANG is not set CONFIG_CFS_BANDWIDTH=y @@ -1117,6 +1152,7 @@ CONFIG_CHARGER_MAX77650=m CONFIG_CHARGER_MAX77976=m # CONFIG_CHARGER_MAX8903 is not set CONFIG_CHARGER_MT6370=m +# CONFIG_CHARGER_PM8916_LBC is not set # CONFIG_CHARGER_QCOM_SMB2 is not set CONFIG_CHARGER_QCOM_SMBB=m CONFIG_CHARGER_RK817=m @@ -1168,6 +1204,7 @@ CONFIG_CIO2_BRIDGE=y CONFIG_CLEANCACHE=y CONFIG_CLK_BCM2711_DVP=m CONFIG_CLK_BCM2835=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set CONFIG_CLK_ICST=y @@ -1183,6 +1220,7 @@ CONFIG_CLK_LS1028A_PLLDIG=y CONFIG_CLK_PX30=y CONFIG_CLK_QORIQ=y CONFIG_CLK_RASPBERRYPI=y +# CONFIG_CLK_RCAR_USB2_CLOCK_SEL is not set CONFIG_CLK_RK3036=y CONFIG_CLK_RK312X=y CONFIG_CLK_RK3188=y @@ -1268,6 +1306,8 @@ CONFIG_COMMON_CLK_QCOM=y CONFIG_COMMON_CLK_RK808=m CONFIG_COMMON_CLK_ROCKCHIP=y CONFIG_COMMON_CLK_RS9_PCIE=m +CONFIG_COMMON_CLK_S4_PERIPHERALS=y +CONFIG_COMMON_CLK_S4_PLL=y CONFIG_COMMON_CLK_SCMI=y CONFIG_COMMON_CLK_SCPI=m # CONFIG_COMMON_CLK_SI514 is not set @@ -1299,7 +1339,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1387,6 +1426,7 @@ CONFIG_CROS_EC_UART=m CONFIG_CROS_EC_VBC=m CONFIG_CROS_HPS_I2C=m CONFIG_CROS_KBD_LED_BACKLIGHT=m +CONFIG_CROS_KUNIT_EC_PROTO_TEST=m CONFIG_CROS_KUNIT=m CONFIG_CROSS_MEMORY_ATTACH=y CONFIG_CROS_TYPEC_SWITCH=m @@ -1533,6 +1573,11 @@ CONFIG_CRYPTO_GHASH=y CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1644,6 +1689,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_CGROUP_REF is not set +# CONFIG_DEBUG_CLOSURES is not set CONFIG_DEBUG_CREDENTIALS=y # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_DRIVER is not set @@ -1740,7 +1786,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=32768 CONFIG_DEFAULT_SECURITY_SELINUX=y # CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEV_APPLETALK is not set CONFIG_DEV_DAX_CXL=m CONFIG_DEV_DAX_HMEM=m CONFIG_DEV_DAX_KMEM=m @@ -1853,6 +1898,7 @@ CONFIG_DPOT_DAC=m # CONFIG_DPS310 is not set CONFIG_DRAGONRISE_FF=y CONFIG_DRBD_FAULT_INJECTION=y +CONFIG_DRIVER_PE_KUNIT_TEST=m CONFIG_DRM_ACCEL_QAIC=m CONFIG_DRM_ACCEL=y CONFIG_DRM_AMD_ACP=y @@ -1936,6 +1982,7 @@ CONFIG_DRM_IMX8QXP_LDB=m CONFIG_DRM_IMX8QXP_PIXEL_COMBINER=m CONFIG_DRM_IMX8QXP_PIXEL_LINK=m CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI=m +CONFIG_DRM_IMX93_MIPI_DSI=m CONFIG_DRM_IMX_DCSS=m CONFIG_DRM_IMX_LCDC=m CONFIG_DRM_IMX_LCDIF=m @@ -1999,9 +2046,11 @@ CONFIG_DRM_PANEL_HIMAX_HX8394=m CONFIG_DRM_PANEL_ILITEK_IL9322=m CONFIG_DRM_PANEL_ILITEK_ILI9341=m CONFIG_DRM_PANEL_ILITEK_ILI9881C=m +CONFIG_DRM_PANEL_ILITEK_ILI9882T=m CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m # CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m +CONFIG_DRM_PANEL_JDI_LPM102A188A=m # CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set CONFIG_DRM_PANEL_JDI_R63452=m CONFIG_DRM_PANEL_KHADAS_TS050=m @@ -2031,6 +2080,7 @@ CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m # CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set CONFIG_DRM_PANEL_RAYDIUM_RM68200=m +CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m CONFIG_DRM_PANEL_RONBO_RB070D30=m CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m CONFIG_DRM_PANEL_SAMSUNG_DB7430=m @@ -2075,13 +2125,16 @@ CONFIG_DRM_PL111=m CONFIG_DRM_QXL=m CONFIG_DRM_RADEON=m CONFIG_DRM_RADEON_USERPTR=y +# CONFIG_DRM_RCAR_DU is not set # CONFIG_DRM_RCAR_DW_HDMI is not set # CONFIG_DRM_RCAR_LVDS is not set # CONFIG_DRM_RCAR_MIPI_DSI is not set # CONFIG_DRM_RCAR_USE_LVDS is not set # CONFIG_DRM_RCAR_USE_MIPI_DSI is not set CONFIG_DRM_ROCKCHIP=m +# CONFIG_DRM_RZG2L_MIPI_DSI is not set CONFIG_DRM_SAMSUNG_DSIM=m +# CONFIG_DRM_SHMOBILE is not set # CONFIG_DRM_SII902X is not set CONFIG_DRM_SII9234=m # CONFIG_DRM_SIL_SII8620 is not set @@ -2101,7 +2154,7 @@ CONFIG_DRM_TEGRA_STAGING=y # CONFIG_DRM_THINE_THC63LVD1024 is not set CONFIG_DRM_TI_DLPC3433=m CONFIG_DRM_TIDSS=m -# CONFIG_DRM_TI_SN65DSI83 is not set +CONFIG_DRM_TI_SN65DSI83=m CONFIG_DRM_TI_SN65DSI86=m CONFIG_DRM_TI_TFP410=m CONFIG_DRM_TI_TPD12S015=m @@ -2269,6 +2322,7 @@ CONFIG_EDAC_LEGACY_SYSFS=y CONFIG_EDAC_QCOM=m CONFIG_EDAC_SYNOPSYS=m CONFIG_EDAC_THUNDERX=m +CONFIG_EDAC_VERSAL=m CONFIG_EDAC_XGENE=m CONFIG_EDAC=y CONFIG_EDAC_ZYNQMP=m @@ -2278,7 +2332,6 @@ CONFIG_EEPROM_AT24=m CONFIG_EEPROM_AT25=m CONFIG_EEPROM_EE1004=m CONFIG_EEPROM_IDT_89HPESX=m -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m CONFIG_EFI_ARMSTUB_DTB_LOADER=y # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -2409,7 +2462,7 @@ CONFIG_FAULT_INJECTION=y # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -2563,6 +2616,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y # CONFIG_FTWDT010_WATCHDOG is not set +CONFIG_FUEL_GAUGE_MM8013=m CONFIG_FUJITSU_ERRATUM_010001=y # CONFIG_FUJITSU_ES is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set @@ -2693,6 +2747,7 @@ CONFIG_GPIO_PCI_IDIO_16=m # CONFIG_GPIO_PISOSR is not set CONFIG_GPIO_PL061=y CONFIG_GPIO_RASPBERRYPI_EXP=m +CONFIG_GPIO_RCAR=m # CONFIG_GPIO_RDC321X is not set CONFIG_GPIO_ROCKCHIP=y # CONFIG_GPIO_SAMA5D2_PIOBU is not set @@ -2922,6 +2977,7 @@ CONFIG_HNS_ENET=m CONFIG_HOLTEK_FF=y # CONFIG_HOSTAP is not set CONFIG_HOTPLUG_CPU=y +CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA=m CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -3075,9 +3131,13 @@ CONFIG_I2C_PXA=m CONFIG_I2C_QCOM_CCI=m CONFIG_I2C_QCOM_GENI=m CONFIG_I2C_QUP=m +# CONFIG_I2C_RCAR is not set +# CONFIG_I2C_RIIC is not set CONFIG_I2C_RK3X=y # CONFIG_I2C_ROBOTFUZZ_OSIF is not set +# CONFIG_I2C_RZV2M is not set CONFIG_I2C_SCMI=m +# CONFIG_I2C_SH_MOBILE is not set CONFIG_I2C_SI470X=m # CONFIG_I2C_SI4713 is not set CONFIG_I2C_SIMTEC=m @@ -3119,6 +3179,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m CONFIG_IEEE802154_ADF7242=m # CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set @@ -3211,7 +3272,6 @@ CONFIG_IMA_NG_TEMPLATE=y CONFIG_IMA_READ_POLICY=y # CONFIG_IMA_SIG_TEMPLATE is not set # CONFIG_IMA_TEMPLATE is not set -# CONFIG_IMA_TRUSTED_KEYRING is not set CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -3420,6 +3480,7 @@ CONFIG_INTERCONNECT_QCOM_SC8280XP=m CONFIG_INTERCONNECT_QCOM_SDM845=m # CONFIG_INTERCONNECT_QCOM_SDX55 is not set # CONFIG_INTERCONNECT_QCOM_SDX65 is not set +CONFIG_INTERCONNECT_QCOM_SDX75=m # CONFIG_INTERCONNECT_QCOM_SM6350 is not set CONFIG_INTERCONNECT_QCOM_SM8150=m CONFIG_INTERCONNECT_QCOM_SM8250=m @@ -3482,8 +3543,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IPC_NS=y # CONFIG_IP_DCCP is not set -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP=m CONFIG_IP_FIB_TRIE_STATS=y CONFIG_IPMB_DEVICE_INTERFACE=m CONFIG_IPMI_DEVICE_INTERFACE=m @@ -3494,6 +3553,7 @@ CONFIG_IPMI_POWEROFF=m CONFIG_IPMI_SI=m CONFIG_IPMI_SSIF=m CONFIG_IPMI_WATCHDOG=m +# CONFIG_IPMMU_VMSA is not set CONFIG_IP_MROUTE_MULTIPLE_TABLES=y CONFIG_IP_MROUTE=y CONFIG_IP_MULTICAST=y @@ -3824,7 +3884,7 @@ CONFIG_KEYS_REQUEST_CACHE=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set # CONFIG_KFENCE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3859,6 +3919,7 @@ CONFIG_KUNIT=m CONFIG_KUNIT_TEST=m # CONFIG_KUNPENG_HCCS is not set CONFIG_KUSER_HELPERS=y +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_PROVE_MMU=y CONFIG_KVM_SMM=y # CONFIG_KVM_WERROR is not set @@ -3916,6 +3977,7 @@ CONFIG_LEDS_GPIO=m CONFIG_LEDS_GROUP_MULTICOLOR=m # CONFIG_LEDS_IS31FL319X is not set CONFIG_LEDS_IS31FL32XX=m +CONFIG_LEDS_KTD202X=m # CONFIG_LEDS_KTD2692 is not set # CONFIG_LEDS_LGM is not set CONFIG_LEDS_LM3530=m @@ -4043,6 +4105,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock" CONFIG_LSM_MMAP_MIN_ADDR=65535 CONFIG_LTC1660=m +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -4057,6 +4120,7 @@ CONFIG_LTO_NONE=y CONFIG_LTR501=m CONFIG_LTRF216A=m CONFIG_LV0104CS=m +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -4065,6 +4129,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y @@ -4137,6 +4202,7 @@ CONFIG_MB1232=m # CONFIG_MCORE2 is not set CONFIG_MCP320X=m CONFIG_MCP3422=m +# CONFIG_MCP3564 is not set CONFIG_MCP3911=m CONFIG_MCP4018=m CONFIG_MCP41010=m @@ -4147,6 +4213,7 @@ CONFIG_MCP4728=m # CONFIG_MCP4922 is not set CONFIG_MCTP_SERIAL=m # CONFIG_MCTP_TRANSPORT_I2C is not set +# CONFIG_MCTP_TRANSPORT_I3C is not set CONFIG_MCTP=y CONFIG_MD_AUTODETECT=y CONFIG_MD_BITMAP_FILE=y @@ -4168,7 +4235,7 @@ CONFIG_MDIO_I2C=m CONFIG_MDIO_IPQ8064=m # CONFIG_MDIO_MSCC_MIIM is not set CONFIG_MDIO_MVUSB=m -# CONFIG_MDIO_OCTEON is not set +CONFIG_MDIO_OCTEON=m # CONFIG_MDIO_SUN4I is not set CONFIG_MDIO_THUNDER=m CONFIG_MDIO_XGENE=m @@ -4182,6 +4249,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m CONFIG_MEDIA_ANALOG_TV_SUPPORT=y CONFIG_MEDIA_ATTACH=y @@ -4423,18 +4491,22 @@ CONFIG_MLX4_DEBUG=y CONFIG_MLX4_EN_DCB=y CONFIG_MLX4_EN=m CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_ACCEL=y CONFIG_MLX5_CLS_ACT=y CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y CONFIG_MLX5_EN_RXNFC=y CONFIG_MLX5_EN_TLS=y CONFIG_MLX5_ESWITCH=y -# CONFIG_MLX5_FPGA is not set +# CONFIG_MLX5_FPGA_IPSEC is not set +# CONFIG_MLX5_FPGA_TLS is not set +CONFIG_MLX5_FPGA=y CONFIG_MLX5_INFINIBAND=m CONFIG_MLX5_IPSEC=y CONFIG_MLX5_MACSEC=y @@ -4522,7 +4594,11 @@ CONFIG_MMC_SDHCI_PLTFM=m CONFIG_MMC_SDHCI_PXAV3=m CONFIG_MMC_SDHCI_TEGRA=m CONFIG_MMC_SDHCI_XENON=m +CONFIG_MMC_SDHI_INTERNAL_DMAC=m +CONFIG_MMC_SDHI=m +# CONFIG_MMC_SDHI_SYS_DMAC is not set CONFIG_MMC_SDRICOH_CS=m +# CONFIG_MMC_SH_MMCIF is not set CONFIG_MMC_SPI=m # CONFIG_MMC_STM32_SDMMC is not set CONFIG_MMC_SUNXI=m @@ -4556,6 +4632,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -4646,6 +4725,8 @@ CONFIG_MT7915E=m CONFIG_MT7921E=m CONFIG_MT7921S=m CONFIG_MT7921U=m +CONFIG_MT7925E=m +CONFIG_MT7925U=m CONFIG_MT7996E=m # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -4704,6 +4785,7 @@ CONFIG_MTD_NAND_NANDSIM=m # CONFIG_MTD_NAND_PL35X is not set # CONFIG_MTD_NAND_PLATFORM is not set # CONFIG_MTD_NAND_QCOM is not set +# CONFIG_MTD_NAND_RENESAS is not set # CONFIG_MTD_NAND_RICOH is not set # CONFIG_MTD_NAND_ROCKCHIP is not set # CONFIG_MTD_NAND_SUNXI is not set @@ -4771,7 +4853,6 @@ CONFIG_MWIFIEX_PCIE=m CONFIG_MWIFIEX_SDIO=m CONFIG_MWIFIEX_USB=m CONFIG_MWL8K=m -# CONFIG_MX3_IPU is not set CONFIG_MXC4005=m CONFIG_MXC6255=m # CONFIG_MXS_DMA is not set @@ -4825,9 +4906,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4900,12 +4978,12 @@ CONFIG_NETFILTER_EGRESS=y CONFIG_NETFILTER_INGRESS=y CONFIG_NETFILTER_NETLINK_ACCT=m # CONFIG_NETFILTER_NETLINK_GLUE_CT is not set -# CONFIG_NETFILTER_NETLINK_HOOK is not set +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_OSF=m CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_XTABLES_COMPAT=y +# CONFIG_NETFILTER_XTABLES_COMPAT is not set CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_CONNMARK=m CONFIG_NETFILTER_XT_MARK=m @@ -4998,6 +5076,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +CONFIG_NETKIT=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -5010,15 +5089,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NETROM=m CONFIG_NET_SB1000=y -CONFIG_NET_SCH_ATM=m CONFIG_NET_SCH_CAKE=m -CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m # CONFIG_NET_SCH_DEFAULT is not set CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -5052,6 +5128,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_NET_TEAM_MODE_RANDOM=m CONFIG_NET_TEAM_MODE_ROUNDROBIN=m +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -5189,7 +5266,7 @@ CONFIG_NFC_ST21NFCA=m # CONFIG_NFC_ST_NCI_I2C is not set # CONFIG_NFC_ST_NCI_SPI is not set CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NFC_TRF7970A=m @@ -5383,11 +5460,13 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y +CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m CONFIG_NVIDIA_SHIELD_FF=y # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set CONFIG_NVME_APPLE=m -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y CONFIG_NVME_HWMON=y CONFIG_NVMEM_APPLE_EFUSES=m # CONFIG_NVMEM_IMX_IIM is not set @@ -5423,7 +5502,9 @@ CONFIG_NVME_TARGET=m CONFIG_NVME_TARGET_PASSTHRU=y CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -5572,6 +5653,7 @@ CONFIG_PCI_AARDVARK=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_AL is not set @@ -5610,10 +5692,13 @@ CONFIG_PCIE_MOBIVEIL=y CONFIG_PCIEPORTBUS=y CONFIG_PCIE_PTM=y CONFIG_PCIE_QCOM=y +# CONFIG_PCIE_RCAR_GEN4_HOST is not set +# CONFIG_PCIE_RCAR_HOST is not set CONFIG_PCIE_ROCKCHIP_DW_HOST=y CONFIG_PCIE_ROCKCHIP_HOST=y CONFIG_PCIE_TEGRA194_HOST=y CONFIG_PCIE_XILINX_CPM=y +CONFIG_PCIE_XILINX_DMA_PL=y CONFIG_PCIE_XILINX_NWL=y CONFIG_PCIE_XILINX=y # CONFIG_PCI_FTPCI100 is not set @@ -5647,6 +5732,7 @@ CONFIG_PCI_XGENE_MSI=y CONFIG_PCI_XGENE=y CONFIG_PCI=y CONFIG_PCNET32=m +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -5696,7 +5782,7 @@ CONFIG_PHY_MESON_G12A_MIPI_DPHY_ANALOG=y CONFIG_PHY_MESON_G12A_USB2=y CONFIG_PHY_MESON_G12A_USB3_PCIE=m CONFIG_PHY_MESON_GXL_USB2=m -# CONFIG_PHY_MIXEL_LVDS_PHY is not set +CONFIG_PHY_MIXEL_LVDS_PHY=m CONFIG_PHY_MIXEL_MIPI_DPHY=m CONFIG_PHY_MVEBU_A3700_COMPHY=m CONFIG_PHY_MVEBU_A3700_UTMI=m @@ -5730,6 +5816,11 @@ CONFIG_PHY_QCOM_USB_HSIC=m CONFIG_PHY_QCOM_USB_HS=m CONFIG_PHY_QCOM_USB_SNPS_FEMTO_V2=m CONFIG_PHY_QCOM_USB_SS=m +# CONFIG_PHY_R8A779F0_ETHERNET_SERDES is not set +# CONFIG_PHY_RCAR_GEN2 is not set +# CONFIG_PHY_RCAR_GEN3_PCIE is not set +CONFIG_PHY_RCAR_GEN3_USB2=m +# CONFIG_PHY_RCAR_GEN3_USB3 is not set CONFIG_PHY_ROCKCHIP_DPHY_RX0=m CONFIG_PHY_ROCKCHIP_DP=m CONFIG_PHY_ROCKCHIP_EMMC=m @@ -5762,6 +5853,7 @@ CONFIG_PINCONF=y CONFIG_PINCTRL_ALDERLAKE=m CONFIG_PINCTRL_AMD=y CONFIG_PINCTRL_AMLOGIC_C3=y +CONFIG_PINCTRL_AMLOGIC_T7=y CONFIG_PINCTRL_APPLE_GPIO=m CONFIG_PINCTRL_AS3722=y CONFIG_PINCTRL_AXP209=m @@ -5882,12 +5974,13 @@ CONFIG_PINCTRL_SUN50I_H6=y # CONFIG_PINCTRL_SUN8I_A33 is not set # CONFIG_PINCTRL_SUN8I_A83T is not set # CONFIG_PINCTRL_SUN8I_A83T_R is not set -# CONFIG_PINCTRL_SUN8I_H3 is not set CONFIG_PINCTRL_SUN8I_H3_R=y +CONFIG_PINCTRL_SUN8I_H3=y # CONFIG_PINCTRL_SUN8I_V3S is not set # CONFIG_PINCTRL_SUN9I_A80 is not set # CONFIG_PINCTRL_SUN9I_A80_R is not set # CONFIG_PINCTRL_SX150X is not set +CONFIG_PINCTRL_TEGRA234=y CONFIG_PINCTRL=y CONFIG_PINCTRL_ZYNQMP=y # CONFIG_PING is not set @@ -5933,7 +6026,6 @@ CONFIG_POWERCAP=y CONFIG_POWER_MLXBF=m CONFIG_POWER_RESET_AS3722=y # CONFIG_POWER_RESET_BRCMKONA is not set -# CONFIG_POWER_RESET_BRCMSTB is not set CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_HISI=y @@ -6068,6 +6160,8 @@ CONFIG_PWM_MESON=m CONFIG_PWM_OMAP_DMTIMER=m CONFIG_PWM_PCA9685=m CONFIG_PWM_RASPBERRYPI_POE=m +# CONFIG_PWM_RCAR is not set +# CONFIG_PWM_RENESAS_TPU is not set CONFIG_PWM_ROCKCHIP=m CONFIG_PWM_STMPE=y CONFIG_PWM_SUN4I=m @@ -6134,6 +6228,8 @@ CONFIG_QCOM_Q6V5_WCSS=m CONFIG_QCOM_QDF2400_ERRATUM_0065=y CONFIG_QCOM_QFPROM=m CONFIG_QCOM_QMI_HELPERS=m +CONFIG_QCOM_QSEECOM_UEFISECAPP=y +CONFIG_QCOM_QSEECOM=y CONFIG_QCOM_RAMP_CTRL=m CONFIG_QCOM_RMTFS_MEM=m CONFIG_QCOM_RPMHPD=y @@ -6240,6 +6336,10 @@ CONFIG_RASPBERRYPI_POWER=y CONFIG_RATIONAL_KUNIT_TEST=m # CONFIG_RAVE_SP_CORE is not set # CONFIG_RBTREE_TEST is not set +# CONFIG_RCAR_DMAC is not set +# CONFIG_RCAR_GEN3_THERMAL is not set +# CONFIG_RCAR_REMOTEPROC is not set +# CONFIG_RCAR_THERMAL is not set CONFIG_RC_ATI_REMOTE=m CONFIG_RC_CORE=y CONFIG_RC_DECODERS=y @@ -6275,7 +6375,7 @@ CONFIG_RD_ZSTD=y # CONFIG_READABLE_ASM is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_REALTEK_AUTOPM=y -CONFIG_REALTEK_PHY=y +CONFIG_REALTEK_PHY=m # CONFIG_REED_SOLOMON_TEST is not set # CONFIG_REGMAP_BUILD is not set CONFIG_REGMAP_I2C=y @@ -6319,6 +6419,7 @@ CONFIG_REGULATOR_HI655X=m CONFIG_REGULATOR_MAX20411=m CONFIG_REGULATOR_MAX5970=m CONFIG_REGULATOR_MAX597X=m +CONFIG_REGULATOR_MAX77503=m CONFIG_REGULATOR_MAX77620=y CONFIG_REGULATOR_MAX77650=m CONFIG_REGULATOR_MAX77686=m @@ -6405,7 +6506,14 @@ CONFIG_RELOCATABLE=y # CONFIG_REMOTEPROC_CDEV is not set CONFIG_REMOTEPROC=y CONFIG_REMOTE_TARGET=m +# CONFIG_RENESAS_OSTM is not set # CONFIG_RENESAS_PHY is not set +# CONFIG_RENESAS_RPCIF is not set +# CONFIG_RENESAS_RZAWDT is not set +# CONFIG_RENESAS_RZG2LWDT is not set +# CONFIG_RENESAS_RZN1WDT is not set +# CONFIG_RENESAS_USB_DMAC is not set +# CONFIG_RENESAS_WDT is not set # CONFIG_RESET_ATTACK_MITIGATION is not set CONFIG_RESET_CONTROLLER=y CONFIG_RESET_HISI=y @@ -6416,6 +6524,7 @@ CONFIG_RESET_MESON=m CONFIG_RESET_QCOM_AOSS=y CONFIG_RESET_QCOM_PDC=m CONFIG_RESET_RASPBERRYPI=y +CONFIG_RESET_RZG2L_USBPHY_CTRL=m CONFIG_RESET_SCMI=y CONFIG_RESET_SIMPLE=y CONFIG_RESET_TI_SCI=m @@ -6472,6 +6581,7 @@ CONFIG_ROCKCHIP_VOP2=y CONFIG_ROCKCHIP_VOP=y CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set CONFIG_ROHM_BU27034=m CONFIG_ROMFS_BACKED_BY_BLOCK=y @@ -6531,7 +6641,6 @@ CONFIG_RTC_DRV_ARMADA38X=m CONFIG_RTC_DRV_AS3722=m CONFIG_RTC_DRV_BBNSM=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m CONFIG_RTC_DRV_CADENCE=m CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_CROS_EC=m @@ -6619,6 +6728,7 @@ CONFIG_RTC_DRV_RX8581=m CONFIG_RTC_DRV_S35390A=m # CONFIG_RTC_DRV_SA1100 is not set CONFIG_RTC_DRV_SD3078=m +# CONFIG_RTC_DRV_SH is not set CONFIG_RTC_DRV_SNVS=m CONFIG_RTC_DRV_STK17TA8=m CONFIG_RTC_DRV_SUN6I=y @@ -6694,6 +6804,10 @@ CONFIG_RV_REACT_PRINTK=y CONFIG_RV=y CONFIG_RXKAD=y # CONFIG_RXPERF is not set +# CONFIG_RZ_DMAC is not set +# CONFIG_RZG2L_ADC is not set +# CONFIG_RZG2L_THERMAL is not set +# CONFIG_RZ_MTU3 is not set CONFIG_S2IO=m # CONFIG_S390_KPROBES_SANITY_TEST is not set # CONFIG_S390_MODULES_SANITY_TEST is not set @@ -6714,6 +6828,7 @@ CONFIG_SATA_MV=m CONFIG_SATA_PMP=y # CONFIG_SATA_PROMISE is not set # CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_RCAR is not set CONFIG_SATA_SIL24=m # CONFIG_SATA_SIL is not set # CONFIG_SATA_SIS is not set @@ -6863,6 +6978,7 @@ CONFIG_SCSI_UFS_HISI=m CONFIG_SCSI_UFS_HPB=y CONFIG_SCSI_UFS_HWMON=y CONFIG_SCSI_UFS_QCOM=m +# CONFIG_SCSI_UFS_RENESAS is not set CONFIG_SCSI_UFS_TI_J721E=m CONFIG_SCSI_VIRTIO=m CONFIG_SCSI_WD719X=m @@ -6892,11 +7008,12 @@ CONFIG_SDM_VIDEOCC_845=m # CONFIG_SDX_GCC_75 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y # CONFIG_SECURITY_APPARMOR is not set -# CONFIG_SECURITY_DMESG_RESTRICT is not set +CONFIG_SECURITY_DMESG_RESTRICT=y CONFIG_SECURITYFS=y CONFIG_SECURITY_INFINIBAND=y CONFIG_SECURITY_LANDLOCK=y @@ -7038,6 +7155,7 @@ CONFIG_SENSORS_LTC2947_SPI=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set CONFIG_SENSORS_LTC2990=m +CONFIG_SENSORS_LTC2991=m # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m CONFIG_SENSORS_LTC4151=m @@ -7098,6 +7216,7 @@ CONFIG_SENSORS_PLI1209BC=m CONFIG_SENSORS_PLI1209BC_REGULATOR=y CONFIG_SENSORS_PM6764TR=m CONFIG_SENSORS_PMBUS=m +CONFIG_SENSORS_POWERZ=m CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -7171,6 +7290,7 @@ CONFIG_SERIAL_8250_CS=m CONFIG_SERIAL_8250_DFL=m CONFIG_SERIAL_8250_DMA=y CONFIG_SERIAL_8250_DW=y +# CONFIG_SERIAL_8250_EM is not set CONFIG_SERIAL_8250_EXAR=m CONFIG_SERIAL_8250_EXTENDED=y # CONFIG_SERIAL_8250_FINTEK is not set @@ -7233,6 +7353,11 @@ CONFIG_SERIAL_SC16IS7XX_I2C=y CONFIG_SERIAL_SC16IS7XX=m CONFIG_SERIAL_SC16IS7XX_SPI=y # CONFIG_SERIAL_SCCNXP is not set +CONFIG_SERIAL_SH_SCI_CONSOLE=y +CONFIG_SERIAL_SH_SCI_DMA=y +CONFIG_SERIAL_SH_SCI_EARLYCON=y +CONFIG_SERIAL_SH_SCI_NR_UARTS=18 +CONFIG_SERIAL_SH_SCI=y # CONFIG_SERIAL_SIFIVE is not set # CONFIG_SERIAL_SPRD is not set # CONFIG_SERIAL_ST_ASC is not set @@ -7299,7 +7424,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -7313,12 +7437,14 @@ CONFIG_SMC91X=m # CONFIG_SM_CAMCC_6350 is not set # CONFIG_SM_CAMCC_8250 is not set # CONFIG_SM_CAMCC_8450 is not set +# CONFIG_SM_CAMCC_8550 is not set CONFIG_SMC_DIAG=m CONFIG_SMC=m # CONFIG_SM_DISPCC_8250 is not set CONFIG_SM_DISPCC_8450=m # CONFIG_SM_DISPCC_8550 is not set # CONFIG_SM_FTL is not set +# CONFIG_SM_GCC_4450 is not set # CONFIG_SM_GCC_6115 is not set # CONFIG_SM_GCC_6125 is not set # CONFIG_SM_GCC_6350 is not set @@ -7353,7 +7479,7 @@ CONFIG_SMS_USB_DRV=m # CONFIG_SM_TCSRCC_8550 is not set # CONFIG_SM_VIDEOCC_8150 is not set # CONFIG_SM_VIDEOCC_8250 is not set -# CONFIG_SM_VIDEOCC_8350 is not set +CONFIG_SM_VIDEOCC_8350=m # CONFIG_SM_VIDEOCC_8450 is not set # CONFIG_SM_VIDEOCC_8550 is not set CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0 @@ -7422,6 +7548,7 @@ CONFIG_SND_FM801=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -7575,8 +7702,10 @@ CONFIG_SND_SOC_APQ8016_SBC=m CONFIG_SND_SOC_ARNDALE=m CONFIG_SND_SOC_AUDIO_IIO_AUX=m CONFIG_SND_SOC_AW8738=m +CONFIG_SND_SOC_AW87390=m CONFIG_SND_SOC_AW88261=m CONFIG_SND_SOC_AW88395=m +CONFIG_SND_SOC_AW88399=m CONFIG_SND_SOC_BD28623=m CONFIG_SND_SOC_BT_SCO=m CONFIG_SND_SOC_CHV3_CODEC=m @@ -7782,6 +7911,7 @@ CONFIG_SND_SOC_PCM512x_SPI=m # CONFIG_SND_SOC_PEB2466 is not set CONFIG_SND_SOC_QCOM=m CONFIG_SND_SOC_QDSP6=m +# CONFIG_SND_SOC_RCAR is not set CONFIG_SND_SOC_RK3288_HDMI_ANALOG=m CONFIG_SND_SOC_RK3328=m CONFIG_SND_SOC_RK3399_GRU_SOUND=m @@ -7816,6 +7946,8 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m CONFIG_SND_SOC_RT715_SDW=m CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +CONFIG_SND_SOC_RTQ9128=m +# CONFIG_SND_SOC_RZ is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811=m @@ -7827,6 +7959,7 @@ CONFIG_SND_SOC_SC8280XP=m CONFIG_SND_SOC_SDM845=m # CONFIG_SND_SOC_SDW_MOCKUP is not set CONFIG_SND_SOC_SGTL5000=m +# CONFIG_SND_SOC_SH4_FSI is not set CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m CONFIG_SND_SOC_SIMPLE_MUX=m # CONFIG_SND_SOC_SM8250 is not set @@ -8120,8 +8253,12 @@ CONFIG_SPI_QCOM_GENI=m CONFIG_SPI_QCOM_QSPI=m CONFIG_SPI_QUP=m CONFIG_SPI_ROCKCHIP=m -# CONFIG_SPI_ROCKCHIP_SFC is not set +CONFIG_SPI_ROCKCHIP_SFC=m +# CONFIG_SPI_RSPI is not set +# CONFIG_SPI_RZV2M_CSI is not set # CONFIG_SPI_SC18IS602 is not set +# CONFIG_SPI_SH_HSPI is not set +# CONFIG_SPI_SH_MSIOF is not set # CONFIG_SPI_SIFIVE is not set # CONFIG_SPI_SLAVE is not set CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m @@ -8354,6 +8491,7 @@ CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX_DEBUG is not set CONFIG_TCM_QLA2XXX=m CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -8430,6 +8568,7 @@ CONFIG_TEST_LOCKUP=m # CONFIG_TEST_MEMINIT is not set CONFIG_TEST_MIN_HEAP=m # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -8502,7 +8641,7 @@ CONFIG_TIFM_7XX1=m CONFIG_TIFM_CORE=m CONFIG_TIGON3_HWMON=y CONFIG_TIGON3=m -# CONFIG_TI_ICSSG_PRUETH is not set +CONFIG_TI_ICSSG_PRUETH=m CONFIG_TI_ICSS_IEP=m CONFIG_TI_K3_AM65_CPSW_NUSS=m CONFIG_TI_K3_AM65_CPSW_SWITCHDEV=y @@ -8717,6 +8856,7 @@ CONFIG_TYPEC_MUX_FSA4480=m CONFIG_TYPEC_MUX_GPIO_SBU=m CONFIG_TYPEC_MUX_NB7VPQ904M=m CONFIG_TYPEC_MUX_PI3USB30532=m +CONFIG_TYPEC_MUX_PTN36502=m CONFIG_TYPEC_NVIDIA_ALTMODE=m CONFIG_TYPEC_QCOM_PMIC=m # CONFIG_TYPEC_RT1711H is not set @@ -8728,7 +8868,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m CONFIG_TYPEC_TCPM=m CONFIG_TYPEC_TPS6598X=m CONFIG_TYPEC_UCSI=m -CONFIG_TYPEC_WCOVE=m CONFIG_TYPEC_WUSB3801=m CONFIG_TYPHOON=m CONFIG_UACCE=m @@ -8821,6 +8960,7 @@ CONFIG_USB_CHIPIDEA_HOST=y CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_PCI=m CONFIG_USB_CHIPIDEA_TEGRA=m CONFIG_USB_CHIPIDEA_UDC=y @@ -8885,6 +9025,7 @@ CONFIG_USB_EHCI_TT_NEWSCHED=y # CONFIG_USB_EHSET_TEST_FIXTURE is not set CONFIG_USB_EMI26=m CONFIG_USB_EMI62=m +# CONFIG_USB_EMXX is not set CONFIG_USB_EPSON2888=y # CONFIG_USB_ETH is not set CONFIG_USB_EZUSB_FX2=m @@ -8997,6 +9138,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_M66592 is not set CONFIG_USB_MA901=m @@ -9057,6 +9199,7 @@ CONFIG_USB_OTG_FSM=m # CONFIG_USB_OTG_PRODUCTLIST is not set CONFIG_USB_OTG=y # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y CONFIG_USBPCWATCHDOG=m CONFIG_USB_PEGASUS=m @@ -9073,6 +9216,9 @@ CONFIG_USB_QCOM_EUD=m CONFIG_USB_RAINSHADOW_CEC=m # CONFIG_USB_RAREMONO is not set CONFIG_USB_RAW_GADGET=m +# CONFIG_USB_RENESAS_USB3 is not set +# CONFIG_USB_RENESAS_USBF is not set +# CONFIG_USB_RENESAS_USBHS is not set CONFIG_USB_ROLE_SWITCH=y CONFIG_USB_RTL8150=m CONFIG_USB_RTL8152=m @@ -9184,6 +9330,7 @@ CONFIG_USB_XHCI_MVEBU=m CONFIG_USB_XHCI_PCI_RENESAS=y CONFIG_USB_XHCI_PCI=y CONFIG_USB_XHCI_PLATFORM=m +CONFIG_USB_XHCI_RCAR=m CONFIG_USB_XHCI_TEGRA=m CONFIG_USB_XUSBATM=m CONFIG_USB=y @@ -9354,7 +9501,7 @@ CONFIG_VIDEO_IMX8_ISI=m CONFIG_VIDEO_IMX8_ISI_M2M=y CONFIG_VIDEO_IMX8_JPEG=m CONFIG_VIDEO_IMX8MQ_MIPI_CSI2=m -CONFIG_VIDEO_IMX_MEDIA=m +# CONFIG_VIDEO_IMX_MEDIA is not set CONFIG_VIDEO_IMX_MIPI_CSIS=m CONFIG_VIDEO_IMX_PXP=m # CONFIG_VIDEO_IPU3_CIO2 is not set @@ -9371,10 +9518,12 @@ CONFIG_VIDEO_MAX9286=m # CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set CONFIG_VIDEO_MESON_GE2D=m CONFIG_VIDEO_MESON_VDEC=m +# CONFIG_VIDEO_MGB4 is not set CONFIG_VIDEO_ML86V7667=m CONFIG_VIDEO_MSP3400=m CONFIG_VIDEO_MT9M001=m # CONFIG_VIDEO_MT9M111 is not set +CONFIG_VIDEO_MT9M114=m CONFIG_VIDEO_MT9P031=m CONFIG_VIDEO_MT9T112=m CONFIG_VIDEO_MT9V011=m @@ -9422,12 +9571,19 @@ CONFIG_VIDEO_PVRUSB2=m CONFIG_VIDEO_PVRUSB2_SYSFS=y CONFIG_VIDEO_QCOM_CAMSS=m CONFIG_VIDEO_QCOM_VENUS=m +# CONFIG_VIDEO_RCAR_CSI2 is not set +# CONFIG_VIDEO_RCAR_ISP is not set +# CONFIG_VIDEO_RCAR_VIN is not set CONFIG_VIDEO_RDACM20=m # CONFIG_VIDEO_RDACM21 is not set +# CONFIG_VIDEO_RENESAS_FCP is not set +# CONFIG_VIDEO_RENESAS_JPU is not set CONFIG_VIDEO_RJ54N1=m CONFIG_VIDEO_ROCKCHIP_ISP1=m CONFIG_VIDEO_ROCKCHIP_RGA=m CONFIG_VIDEO_ROCKCHIP_VDEC=m +# CONFIG_VIDEO_RZG2L_CRU is not set +# CONFIG_VIDEO_RZG2L_CSI2 is not set CONFIG_VIDEO_S5C73M3=m CONFIG_VIDEO_S5K4ECGX=m CONFIG_VIDEO_S5K5BAF=m @@ -9478,6 +9634,7 @@ CONFIG_VIDEO_THS7303=m CONFIG_VIDEO_THS8200=m CONFIG_VIDEO_TI_CAL=m CONFIG_VIDEO_TI_CAL_MC=y +CONFIG_VIDEO_TI_J721E_CSI2RX=m CONFIG_VIDEO_TLV320AIC23B=m CONFIG_VIDEO_TM6000_ALSA=m CONFIG_VIDEO_TM6000_DVB=m @@ -9683,6 +9840,7 @@ CONFIG_XDP_SOCKETS=y # CONFIG_XEN_GRANT_DMA_ALLOC is not set # CONFIG_XEN is not set CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512 +CONFIG_XEN_PRIVCMD_EVENTFD=y CONFIG_XEN_PRIVCMD_IRQFD=y CONFIG_XEN_PRIVCMD=m # CONFIG_XEN_PVCALLS_FRONTEND is not set @@ -9801,19 +9959,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-aarch64-16k-fedora.config b/SOURCES/kernel-aarch64-16k-fedora.config index 2de220e..ec0e8aa 100644 --- a/SOURCES/kernel-aarch64-16k-fedora.config +++ b/SOURCES/kernel-aarch64-16k-fedora.config @@ -254,6 +254,7 @@ CONFIG_AMD_XGBE_DCB=y CONFIG_AMD_XGBE=m # CONFIG_AMIGA_PARTITION is not set CONFIG_AMLOGIC_THERMAL=m +CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y CONFIG_AMT=m CONFIG_ANDROID_BINDER_DEVICES="binder,hwbinder,vndbinder" @@ -329,12 +330,33 @@ CONFIG_ARCH_MXC=y CONFIG_ARCH_NR_GPIO=2048 CONFIG_ARCH_NXP=y # CONFIG_ARCH_OMAP1 is not set +# CONFIG_ARCH_PENSANDO is not set # CONFIG_ARCH_PXA is not set CONFIG_ARCH_QCOM=y +CONFIG_ARCH_R8A774A1=y +# CONFIG_ARCH_R8A774B1 is not set +# CONFIG_ARCH_R8A774C0 is not set +# CONFIG_ARCH_R8A774E1 is not set +# CONFIG_ARCH_R8A77951 is not set +# CONFIG_ARCH_R8A77960 is not set +# CONFIG_ARCH_R8A77961 is not set +# CONFIG_ARCH_R8A77965 is not set +# CONFIG_ARCH_R8A77970 is not set +# CONFIG_ARCH_R8A77980 is not set +# CONFIG_ARCH_R8A77990 is not set +# CONFIG_ARCH_R8A77995 is not set +# CONFIG_ARCH_R8A779A0 is not set +# CONFIG_ARCH_R8A779F0 is not set +# CONFIG_ARCH_R8A779G0 is not set +CONFIG_ARCH_R9A07G043=y +CONFIG_ARCH_R9A07G044=y +CONFIG_ARCH_R9A07G054=y +# CONFIG_ARCH_R9A08G045 is not set +# CONFIG_ARCH_R9A09G011 is not set CONFIG_ARCH_RANDOM=y # CONFIG_ARCH_RDA is not set # CONFIG_ARCH_REALTEK is not set -# CONFIG_ARCH_RENESAS is not set +CONFIG_ARCH_RENESAS=y CONFIG_ARCH_ROCKCHIP=y # CONFIG_ARCH_S32 is not set # CONFIG_ARCH_SA1100 is not set @@ -396,6 +418,7 @@ CONFIG_ARM64_ERRATUM_2457168=y CONFIG_ARM64_ERRATUM_2645198=y CONFIG_ARM64_ERRATUM_2658417=y CONFIG_ARM64_ERRATUM_2966298=y +CONFIG_ARM64_ERRATUM_3117295=y CONFIG_ARM64_ERRATUM_819472=y CONFIG_ARM64_ERRATUM_824069=y CONFIG_ARM64_ERRATUM_826319=y @@ -484,6 +507,7 @@ CONFIG_ARM_RASPBERRYPI_CPUFREQ=m CONFIG_ARM_RK3399_DMC_DEVFREQ=m CONFIG_ARM_SBSA_WATCHDOG=m CONFIG_ARM_SCMI_CPUFREQ=m +CONFIG_ARM_SCMI_PERF_DOMAIN=y CONFIG_ARM_SCMI_POWERCAP=m CONFIG_ARM_SCMI_POWER_CONTROL=m CONFIG_ARM_SCMI_POWER_DOMAIN=m @@ -531,7 +555,7 @@ CONFIG_ATA_ACPI=y CONFIG_ATA_BMDMA=y CONFIG_ATA_FORCE=y CONFIG_ATA_GENERIC=m -# CONFIG_ATALK is not set +CONFIG_ATALK=m CONFIG_ATA_OVER_ETH=m CONFIG_ATA_PIIX=y # CONFIG_ATARI_PARTITION is not set @@ -702,6 +726,7 @@ CONFIG_BATTERY_GAUGE_LTC2941=m CONFIG_BATTERY_MAX17040=m CONFIG_BATTERY_MAX17042=m # CONFIG_BATTERY_MAX1721X is not set +# CONFIG_BATTERY_PM8916_BMS_VM is not set CONFIG_BATTERY_QCOM_BATTMGR=m CONFIG_BATTERY_RT5033=m CONFIG_BATTERY_SAMSUNG_SDI=y @@ -715,6 +740,15 @@ CONFIG_BAYCOM_SER_HDX=m # CONFIG_BCACHE_ASYNC_REGISTRATION is not set # CONFIG_BCACHE_CLOSURES_DEBUG is not set # CONFIG_BCACHE_DEBUG is not set +# CONFIG_BCACHEFS_DEBUG is not set +# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set +# CONFIG_BCACHEFS_ERASURE_CODING is not set +CONFIG_BCACHEFS_FS=m +# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set +# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set +CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_BCACHEFS_QUOTA=y +# CONFIG_BCACHEFS_TESTS is not set CONFIG_BCACHE=m CONFIG_BCM2711_THERMAL=m CONFIG_BCM2835_MBOX=y @@ -867,7 +901,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_BRCMSTB_L2_IRQ=y # CONFIG_BRCM_TRACING is not set CONFIG_BRCMUTIL=m @@ -980,7 +1013,6 @@ CONFIG_CADENCE_WATCHDOG=m # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -CONFIG_CAN_BXCAN=m CONFIG_CAN_CALC_BITTIMING=y CONFIG_CAN_CAN327=m # CONFIG_CAN_CC770 is not set @@ -1017,6 +1049,8 @@ CONFIG_CAN_NETLINK=y CONFIG_CAN_PEAK_PCIEFD=m CONFIG_CAN_PEAK_USB=m CONFIG_CAN_RAW=m +# CONFIG_CAN_RCAR_CANFD is not set +# CONFIG_CAN_RCAR is not set # CONFIG_CAN_SJA1000 is not set CONFIG_CAN_SLCAN=m # CONFIG_CAN_SOFTING is not set @@ -1075,6 +1109,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFI_CLANG is not set CONFIG_CFS_BANDWIDTH=y @@ -1117,6 +1152,7 @@ CONFIG_CHARGER_MAX77650=m CONFIG_CHARGER_MAX77976=m # CONFIG_CHARGER_MAX8903 is not set CONFIG_CHARGER_MT6370=m +# CONFIG_CHARGER_PM8916_LBC is not set # CONFIG_CHARGER_QCOM_SMB2 is not set CONFIG_CHARGER_QCOM_SMBB=m CONFIG_CHARGER_RK817=m @@ -1168,6 +1204,7 @@ CONFIG_CIO2_BRIDGE=y CONFIG_CLEANCACHE=y CONFIG_CLK_BCM2711_DVP=m CONFIG_CLK_BCM2835=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set CONFIG_CLK_ICST=y @@ -1183,6 +1220,7 @@ CONFIG_CLK_LS1028A_PLLDIG=y CONFIG_CLK_PX30=y CONFIG_CLK_QORIQ=y CONFIG_CLK_RASPBERRYPI=y +# CONFIG_CLK_RCAR_USB2_CLOCK_SEL is not set CONFIG_CLK_RK3036=y CONFIG_CLK_RK312X=y CONFIG_CLK_RK3188=y @@ -1268,6 +1306,8 @@ CONFIG_COMMON_CLK_QCOM=y CONFIG_COMMON_CLK_RK808=m CONFIG_COMMON_CLK_ROCKCHIP=y CONFIG_COMMON_CLK_RS9_PCIE=m +CONFIG_COMMON_CLK_S4_PERIPHERALS=y +CONFIG_COMMON_CLK_S4_PLL=y CONFIG_COMMON_CLK_SCMI=y CONFIG_COMMON_CLK_SCPI=m # CONFIG_COMMON_CLK_SI514 is not set @@ -1299,7 +1339,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1387,6 +1426,7 @@ CONFIG_CROS_EC_UART=m CONFIG_CROS_EC_VBC=m CONFIG_CROS_HPS_I2C=m CONFIG_CROS_KBD_LED_BACKLIGHT=m +CONFIG_CROS_KUNIT_EC_PROTO_TEST=m CONFIG_CROS_KUNIT=m CONFIG_CROSS_MEMORY_ATTACH=y CONFIG_CROS_TYPEC_SWITCH=m @@ -1533,6 +1573,11 @@ CONFIG_CRYPTO_GHASH=y CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1644,6 +1689,7 @@ CONFIG_DE2104X=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_CGROUP_REF is not set +# CONFIG_DEBUG_CLOSURES is not set # CONFIG_DEBUG_CREDENTIALS is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_DRIVER is not set @@ -1732,7 +1778,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=32768 CONFIG_DEFAULT_SECURITY_SELINUX=y # CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set # CONFIG_DETECT_HUNG_TASK is not set -# CONFIG_DEV_APPLETALK is not set CONFIG_DEV_DAX_CXL=m CONFIG_DEV_DAX_HMEM=m CONFIG_DEV_DAX_KMEM=m @@ -1844,6 +1889,7 @@ CONFIG_DPOT_DAC=m # CONFIG_DPS310 is not set CONFIG_DRAGONRISE_FF=y # CONFIG_DRBD_FAULT_INJECTION is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m CONFIG_DRM_ACCEL_QAIC=m CONFIG_DRM_ACCEL=y CONFIG_DRM_AMD_ACP=y @@ -1927,6 +1973,7 @@ CONFIG_DRM_IMX8QXP_LDB=m CONFIG_DRM_IMX8QXP_PIXEL_COMBINER=m CONFIG_DRM_IMX8QXP_PIXEL_LINK=m CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI=m +CONFIG_DRM_IMX93_MIPI_DSI=m CONFIG_DRM_IMX_DCSS=m CONFIG_DRM_IMX_LCDC=m CONFIG_DRM_IMX_LCDIF=m @@ -1990,9 +2037,11 @@ CONFIG_DRM_PANEL_HIMAX_HX8394=m CONFIG_DRM_PANEL_ILITEK_IL9322=m CONFIG_DRM_PANEL_ILITEK_ILI9341=m CONFIG_DRM_PANEL_ILITEK_ILI9881C=m +CONFIG_DRM_PANEL_ILITEK_ILI9882T=m CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m # CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m +CONFIG_DRM_PANEL_JDI_LPM102A188A=m # CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set CONFIG_DRM_PANEL_JDI_R63452=m CONFIG_DRM_PANEL_KHADAS_TS050=m @@ -2022,6 +2071,7 @@ CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m # CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set CONFIG_DRM_PANEL_RAYDIUM_RM68200=m +CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m CONFIG_DRM_PANEL_RONBO_RB070D30=m CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m CONFIG_DRM_PANEL_SAMSUNG_DB7430=m @@ -2066,13 +2116,16 @@ CONFIG_DRM_PL111=m CONFIG_DRM_QXL=m CONFIG_DRM_RADEON=m CONFIG_DRM_RADEON_USERPTR=y +# CONFIG_DRM_RCAR_DU is not set # CONFIG_DRM_RCAR_DW_HDMI is not set # CONFIG_DRM_RCAR_LVDS is not set # CONFIG_DRM_RCAR_MIPI_DSI is not set # CONFIG_DRM_RCAR_USE_LVDS is not set # CONFIG_DRM_RCAR_USE_MIPI_DSI is not set CONFIG_DRM_ROCKCHIP=m +# CONFIG_DRM_RZG2L_MIPI_DSI is not set CONFIG_DRM_SAMSUNG_DSIM=m +# CONFIG_DRM_SHMOBILE is not set # CONFIG_DRM_SII902X is not set CONFIG_DRM_SII9234=m # CONFIG_DRM_SIL_SII8620 is not set @@ -2092,7 +2145,7 @@ CONFIG_DRM_TEGRA_STAGING=y # CONFIG_DRM_THINE_THC63LVD1024 is not set CONFIG_DRM_TI_DLPC3433=m CONFIG_DRM_TIDSS=m -# CONFIG_DRM_TI_SN65DSI83 is not set +CONFIG_DRM_TI_SN65DSI83=m CONFIG_DRM_TI_SN65DSI86=m CONFIG_DRM_TI_TFP410=m CONFIG_DRM_TI_TPD12S015=m @@ -2260,6 +2313,7 @@ CONFIG_EDAC_LEGACY_SYSFS=y CONFIG_EDAC_QCOM=m CONFIG_EDAC_SYNOPSYS=m CONFIG_EDAC_THUNDERX=m +CONFIG_EDAC_VERSAL=m CONFIG_EDAC_XGENE=m CONFIG_EDAC=y CONFIG_EDAC_ZYNQMP=m @@ -2269,7 +2323,6 @@ CONFIG_EEPROM_AT24=m CONFIG_EEPROM_AT25=m CONFIG_EEPROM_EE1004=m CONFIG_EEPROM_IDT_89HPESX=m -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m CONFIG_EFI_ARMSTUB_DTB_LOADER=y # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -2392,7 +2445,7 @@ CONFIG_FAT_KUNIT_TEST=m # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -2546,6 +2599,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y # CONFIG_FTWDT010_WATCHDOG is not set +CONFIG_FUEL_GAUGE_MM8013=m CONFIG_FUJITSU_ERRATUM_010001=y # CONFIG_FUJITSU_ES is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set @@ -2676,6 +2730,7 @@ CONFIG_GPIO_PCI_IDIO_16=m # CONFIG_GPIO_PISOSR is not set CONFIG_GPIO_PL061=y CONFIG_GPIO_RASPBERRYPI_EXP=m +CONFIG_GPIO_RCAR=m # CONFIG_GPIO_RDC321X is not set CONFIG_GPIO_ROCKCHIP=y # CONFIG_GPIO_SAMA5D2_PIOBU is not set @@ -2905,6 +2960,7 @@ CONFIG_HNS_ENET=m CONFIG_HOLTEK_FF=y # CONFIG_HOSTAP is not set CONFIG_HOTPLUG_CPU=y +CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA=m CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -3058,9 +3114,13 @@ CONFIG_I2C_PXA=m CONFIG_I2C_QCOM_CCI=m CONFIG_I2C_QCOM_GENI=m CONFIG_I2C_QUP=m +# CONFIG_I2C_RCAR is not set +# CONFIG_I2C_RIIC is not set CONFIG_I2C_RK3X=y # CONFIG_I2C_ROBOTFUZZ_OSIF is not set +# CONFIG_I2C_RZV2M is not set CONFIG_I2C_SCMI=m +# CONFIG_I2C_SH_MOBILE is not set CONFIG_I2C_SI470X=m # CONFIG_I2C_SI4713 is not set CONFIG_I2C_SIMTEC=m @@ -3102,6 +3162,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m CONFIG_IEEE802154_ADF7242=m # CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set @@ -3194,7 +3255,6 @@ CONFIG_IMA_NG_TEMPLATE=y CONFIG_IMA_READ_POLICY=y # CONFIG_IMA_SIG_TEMPLATE is not set # CONFIG_IMA_TEMPLATE is not set -# CONFIG_IMA_TRUSTED_KEYRING is not set CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -3403,6 +3463,7 @@ CONFIG_INTERCONNECT_QCOM_SC8280XP=m CONFIG_INTERCONNECT_QCOM_SDM845=m # CONFIG_INTERCONNECT_QCOM_SDX55 is not set # CONFIG_INTERCONNECT_QCOM_SDX65 is not set +CONFIG_INTERCONNECT_QCOM_SDX75=m # CONFIG_INTERCONNECT_QCOM_SM6350 is not set CONFIG_INTERCONNECT_QCOM_SM8150=m CONFIG_INTERCONNECT_QCOM_SM8250=m @@ -3465,8 +3526,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IPC_NS=y # CONFIG_IP_DCCP is not set -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP=m CONFIG_IP_FIB_TRIE_STATS=y CONFIG_IPMB_DEVICE_INTERFACE=m CONFIG_IPMI_DEVICE_INTERFACE=m @@ -3477,6 +3536,7 @@ CONFIG_IPMI_POWEROFF=m CONFIG_IPMI_SI=m CONFIG_IPMI_SSIF=m CONFIG_IPMI_WATCHDOG=m +# CONFIG_IPMMU_VMSA is not set CONFIG_IP_MROUTE_MULTIPLE_TABLES=y CONFIG_IP_MROUTE=y CONFIG_IP_MULTICAST=y @@ -3799,7 +3859,7 @@ CONFIG_KEY_NOTIFICATIONS=y CONFIG_KEYS_REQUEST_CACHE=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3835,6 +3895,7 @@ CONFIG_KUNIT=m CONFIG_KUNIT_TEST=m # CONFIG_KUNPENG_HCCS is not set CONFIG_KUSER_HELPERS=y +CONFIG_KVM_MAX_NR_VCPUS=4096 # CONFIG_KVM_PROVE_MMU is not set CONFIG_KVM_SMM=y # CONFIG_KVM_WERROR is not set @@ -3892,6 +3953,7 @@ CONFIG_LEDS_GPIO=m CONFIG_LEDS_GROUP_MULTICOLOR=m # CONFIG_LEDS_IS31FL319X is not set CONFIG_LEDS_IS31FL32XX=m +CONFIG_LEDS_KTD202X=m # CONFIG_LEDS_KTD2692 is not set # CONFIG_LEDS_LGM is not set CONFIG_LEDS_LM3530=m @@ -4019,6 +4081,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock" CONFIG_LSM_MMAP_MIN_ADDR=65535 CONFIG_LTC1660=m +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -4033,6 +4096,7 @@ CONFIG_LTO_NONE=y CONFIG_LTR501=m CONFIG_LTRF216A=m CONFIG_LV0104CS=m +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -4041,6 +4105,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y @@ -4112,6 +4177,7 @@ CONFIG_MB1232=m # CONFIG_MCORE2 is not set CONFIG_MCP320X=m CONFIG_MCP3422=m +# CONFIG_MCP3564 is not set CONFIG_MCP3911=m CONFIG_MCP4018=m CONFIG_MCP41010=m @@ -4122,6 +4188,7 @@ CONFIG_MCP4728=m # CONFIG_MCP4922 is not set CONFIG_MCTP_SERIAL=m # CONFIG_MCTP_TRANSPORT_I2C is not set +# CONFIG_MCTP_TRANSPORT_I3C is not set CONFIG_MCTP=y CONFIG_MD_AUTODETECT=y CONFIG_MD_BITMAP_FILE=y @@ -4143,7 +4210,7 @@ CONFIG_MDIO_I2C=m CONFIG_MDIO_IPQ8064=m # CONFIG_MDIO_MSCC_MIIM is not set CONFIG_MDIO_MVUSB=m -# CONFIG_MDIO_OCTEON is not set +CONFIG_MDIO_OCTEON=m # CONFIG_MDIO_SUN4I is not set CONFIG_MDIO_THUNDER=m CONFIG_MDIO_XGENE=m @@ -4157,6 +4224,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m CONFIG_MEDIA_ANALOG_TV_SUPPORT=y CONFIG_MEDIA_ATTACH=y @@ -4398,18 +4466,22 @@ CONFIG_MLX4_DEBUG=y CONFIG_MLX4_EN_DCB=y CONFIG_MLX4_EN=m CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_ACCEL=y CONFIG_MLX5_CLS_ACT=y CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y CONFIG_MLX5_EN_RXNFC=y CONFIG_MLX5_EN_TLS=y CONFIG_MLX5_ESWITCH=y -# CONFIG_MLX5_FPGA is not set +# CONFIG_MLX5_FPGA_IPSEC is not set +# CONFIG_MLX5_FPGA_TLS is not set +CONFIG_MLX5_FPGA=y CONFIG_MLX5_INFINIBAND=m CONFIG_MLX5_IPSEC=y CONFIG_MLX5_MACSEC=y @@ -4497,7 +4569,11 @@ CONFIG_MMC_SDHCI_PLTFM=m CONFIG_MMC_SDHCI_PXAV3=m CONFIG_MMC_SDHCI_TEGRA=m CONFIG_MMC_SDHCI_XENON=m +CONFIG_MMC_SDHI_INTERNAL_DMAC=m +CONFIG_MMC_SDHI=m +# CONFIG_MMC_SDHI_SYS_DMAC is not set CONFIG_MMC_SDRICOH_CS=m +# CONFIG_MMC_SH_MMCIF is not set CONFIG_MMC_SPI=m # CONFIG_MMC_STM32_SDMMC is not set CONFIG_MMC_SUNXI=m @@ -4530,6 +4606,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -4620,6 +4699,8 @@ CONFIG_MT7915E=m CONFIG_MT7921E=m CONFIG_MT7921S=m CONFIG_MT7921U=m +CONFIG_MT7925E=m +CONFIG_MT7925U=m CONFIG_MT7996E=m # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -4678,6 +4759,7 @@ CONFIG_MTD_NAND_NANDSIM=m # CONFIG_MTD_NAND_PL35X is not set # CONFIG_MTD_NAND_PLATFORM is not set # CONFIG_MTD_NAND_QCOM is not set +# CONFIG_MTD_NAND_RENESAS is not set # CONFIG_MTD_NAND_RICOH is not set # CONFIG_MTD_NAND_ROCKCHIP is not set # CONFIG_MTD_NAND_SUNXI is not set @@ -4745,7 +4827,6 @@ CONFIG_MWIFIEX_PCIE=m CONFIG_MWIFIEX_SDIO=m CONFIG_MWIFIEX_USB=m CONFIG_MWL8K=m -# CONFIG_MX3_IPU is not set CONFIG_MXC4005=m CONFIG_MXC6255=m # CONFIG_MXS_DMA is not set @@ -4799,9 +4880,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4874,12 +4952,12 @@ CONFIG_NETFILTER_EGRESS=y CONFIG_NETFILTER_INGRESS=y CONFIG_NETFILTER_NETLINK_ACCT=m # CONFIG_NETFILTER_NETLINK_GLUE_CT is not set -# CONFIG_NETFILTER_NETLINK_HOOK is not set +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_OSF=m CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_XTABLES_COMPAT=y +# CONFIG_NETFILTER_XTABLES_COMPAT is not set CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_CONNMARK=m CONFIG_NETFILTER_XT_MARK=m @@ -4972,6 +5050,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +CONFIG_NETKIT=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -4984,15 +5063,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NETROM=m CONFIG_NET_SB1000=y -CONFIG_NET_SCH_ATM=m CONFIG_NET_SCH_CAKE=m -CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m # CONFIG_NET_SCH_DEFAULT is not set CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -5026,6 +5102,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_NET_TEAM_MODE_RANDOM=m CONFIG_NET_TEAM_MODE_ROUNDROBIN=m +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -5163,7 +5240,7 @@ CONFIG_NFC_ST21NFCA=m # CONFIG_NFC_ST_NCI_I2C is not set # CONFIG_NFC_ST_NCI_SPI is not set CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NFC_TRF7970A=m @@ -5357,11 +5434,13 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y +CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m CONFIG_NVIDIA_SHIELD_FF=y # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set CONFIG_NVME_APPLE=m -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y CONFIG_NVME_HWMON=y CONFIG_NVMEM_APPLE_EFUSES=m # CONFIG_NVMEM_IMX_IIM is not set @@ -5397,7 +5476,9 @@ CONFIG_NVME_TARGET=m CONFIG_NVME_TARGET_PASSTHRU=y CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -5545,6 +5626,7 @@ CONFIG_PCI_AARDVARK=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_AL is not set @@ -5583,10 +5665,13 @@ CONFIG_PCIE_MOBIVEIL=y CONFIG_PCIEPORTBUS=y CONFIG_PCIE_PTM=y CONFIG_PCIE_QCOM=y +# CONFIG_PCIE_RCAR_GEN4_HOST is not set +# CONFIG_PCIE_RCAR_HOST is not set CONFIG_PCIE_ROCKCHIP_DW_HOST=y CONFIG_PCIE_ROCKCHIP_HOST=y CONFIG_PCIE_TEGRA194_HOST=y CONFIG_PCIE_XILINX_CPM=y +CONFIG_PCIE_XILINX_DMA_PL=y CONFIG_PCIE_XILINX_NWL=y CONFIG_PCIE_XILINX=y # CONFIG_PCI_FTPCI100 is not set @@ -5620,6 +5705,7 @@ CONFIG_PCI_XGENE_MSI=y CONFIG_PCI_XGENE=y CONFIG_PCI=y CONFIG_PCNET32=m +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -5669,7 +5755,7 @@ CONFIG_PHY_MESON_G12A_MIPI_DPHY_ANALOG=y CONFIG_PHY_MESON_G12A_USB2=y CONFIG_PHY_MESON_G12A_USB3_PCIE=m CONFIG_PHY_MESON_GXL_USB2=m -# CONFIG_PHY_MIXEL_LVDS_PHY is not set +CONFIG_PHY_MIXEL_LVDS_PHY=m CONFIG_PHY_MIXEL_MIPI_DPHY=m CONFIG_PHY_MVEBU_A3700_COMPHY=m CONFIG_PHY_MVEBU_A3700_UTMI=m @@ -5703,6 +5789,11 @@ CONFIG_PHY_QCOM_USB_HSIC=m CONFIG_PHY_QCOM_USB_HS=m CONFIG_PHY_QCOM_USB_SNPS_FEMTO_V2=m CONFIG_PHY_QCOM_USB_SS=m +# CONFIG_PHY_R8A779F0_ETHERNET_SERDES is not set +# CONFIG_PHY_RCAR_GEN2 is not set +# CONFIG_PHY_RCAR_GEN3_PCIE is not set +CONFIG_PHY_RCAR_GEN3_USB2=m +# CONFIG_PHY_RCAR_GEN3_USB3 is not set CONFIG_PHY_ROCKCHIP_DPHY_RX0=m CONFIG_PHY_ROCKCHIP_DP=m CONFIG_PHY_ROCKCHIP_EMMC=m @@ -5735,6 +5826,7 @@ CONFIG_PINCONF=y CONFIG_PINCTRL_ALDERLAKE=m CONFIG_PINCTRL_AMD=y CONFIG_PINCTRL_AMLOGIC_C3=y +CONFIG_PINCTRL_AMLOGIC_T7=y CONFIG_PINCTRL_APPLE_GPIO=m CONFIG_PINCTRL_AS3722=y CONFIG_PINCTRL_AXP209=m @@ -5855,12 +5947,13 @@ CONFIG_PINCTRL_SUN50I_H6=y # CONFIG_PINCTRL_SUN8I_A33 is not set # CONFIG_PINCTRL_SUN8I_A83T is not set # CONFIG_PINCTRL_SUN8I_A83T_R is not set -# CONFIG_PINCTRL_SUN8I_H3 is not set CONFIG_PINCTRL_SUN8I_H3_R=y +CONFIG_PINCTRL_SUN8I_H3=y # CONFIG_PINCTRL_SUN8I_V3S is not set # CONFIG_PINCTRL_SUN9I_A80 is not set # CONFIG_PINCTRL_SUN9I_A80_R is not set # CONFIG_PINCTRL_SX150X is not set +CONFIG_PINCTRL_TEGRA234=y CONFIG_PINCTRL=y CONFIG_PINCTRL_ZYNQMP=y # CONFIG_PING is not set @@ -5906,7 +5999,6 @@ CONFIG_POWERCAP=y CONFIG_POWER_MLXBF=m CONFIG_POWER_RESET_AS3722=y # CONFIG_POWER_RESET_BRCMKONA is not set -# CONFIG_POWER_RESET_BRCMSTB is not set CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_HISI=y @@ -6041,6 +6133,8 @@ CONFIG_PWM_MESON=m CONFIG_PWM_OMAP_DMTIMER=m CONFIG_PWM_PCA9685=m CONFIG_PWM_RASPBERRYPI_POE=m +# CONFIG_PWM_RCAR is not set +# CONFIG_PWM_RENESAS_TPU is not set CONFIG_PWM_ROCKCHIP=m CONFIG_PWM_STMPE=y CONFIG_PWM_SUN4I=m @@ -6107,6 +6201,8 @@ CONFIG_QCOM_Q6V5_WCSS=m CONFIG_QCOM_QDF2400_ERRATUM_0065=y CONFIG_QCOM_QFPROM=m CONFIG_QCOM_QMI_HELPERS=m +CONFIG_QCOM_QSEECOM_UEFISECAPP=y +CONFIG_QCOM_QSEECOM=y CONFIG_QCOM_RAMP_CTRL=m CONFIG_QCOM_RMTFS_MEM=m CONFIG_QCOM_RPMHPD=y @@ -6213,6 +6309,10 @@ CONFIG_RASPBERRYPI_POWER=y CONFIG_RATIONAL_KUNIT_TEST=m # CONFIG_RAVE_SP_CORE is not set # CONFIG_RBTREE_TEST is not set +# CONFIG_RCAR_DMAC is not set +# CONFIG_RCAR_GEN3_THERMAL is not set +# CONFIG_RCAR_REMOTEPROC is not set +# CONFIG_RCAR_THERMAL is not set CONFIG_RC_ATI_REMOTE=m CONFIG_RC_CORE=y CONFIG_RC_DECODERS=y @@ -6248,7 +6348,7 @@ CONFIG_RD_ZSTD=y # CONFIG_READABLE_ASM is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_REALTEK_AUTOPM=y -CONFIG_REALTEK_PHY=y +CONFIG_REALTEK_PHY=m # CONFIG_REED_SOLOMON_TEST is not set # CONFIG_REGMAP_BUILD is not set CONFIG_REGMAP_I2C=y @@ -6292,6 +6392,7 @@ CONFIG_REGULATOR_HI655X=m CONFIG_REGULATOR_MAX20411=m CONFIG_REGULATOR_MAX5970=m CONFIG_REGULATOR_MAX597X=m +CONFIG_REGULATOR_MAX77503=m CONFIG_REGULATOR_MAX77620=y CONFIG_REGULATOR_MAX77650=m CONFIG_REGULATOR_MAX77686=m @@ -6378,7 +6479,14 @@ CONFIG_RELOCATABLE=y # CONFIG_REMOTEPROC_CDEV is not set CONFIG_REMOTEPROC=y CONFIG_REMOTE_TARGET=m +# CONFIG_RENESAS_OSTM is not set # CONFIG_RENESAS_PHY is not set +# CONFIG_RENESAS_RPCIF is not set +# CONFIG_RENESAS_RZAWDT is not set +# CONFIG_RENESAS_RZG2LWDT is not set +# CONFIG_RENESAS_RZN1WDT is not set +# CONFIG_RENESAS_USB_DMAC is not set +# CONFIG_RENESAS_WDT is not set # CONFIG_RESET_ATTACK_MITIGATION is not set CONFIG_RESET_CONTROLLER=y CONFIG_RESET_HISI=y @@ -6389,6 +6497,7 @@ CONFIG_RESET_MESON=m CONFIG_RESET_QCOM_AOSS=y CONFIG_RESET_QCOM_PDC=m CONFIG_RESET_RASPBERRYPI=y +CONFIG_RESET_RZG2L_USBPHY_CTRL=m CONFIG_RESET_SCMI=y CONFIG_RESET_SIMPLE=y CONFIG_RESET_TI_SCI=m @@ -6445,6 +6554,7 @@ CONFIG_ROCKCHIP_VOP2=y CONFIG_ROCKCHIP_VOP=y CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set CONFIG_ROHM_BU27034=m CONFIG_ROMFS_BACKED_BY_BLOCK=y @@ -6504,7 +6614,6 @@ CONFIG_RTC_DRV_ARMADA38X=m CONFIG_RTC_DRV_AS3722=m CONFIG_RTC_DRV_BBNSM=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m CONFIG_RTC_DRV_CADENCE=m CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_CROS_EC=m @@ -6592,6 +6701,7 @@ CONFIG_RTC_DRV_RX8581=m CONFIG_RTC_DRV_S35390A=m # CONFIG_RTC_DRV_SA1100 is not set CONFIG_RTC_DRV_SD3078=m +# CONFIG_RTC_DRV_SH is not set CONFIG_RTC_DRV_SNVS=m CONFIG_RTC_DRV_STK17TA8=m CONFIG_RTC_DRV_SUN6I=y @@ -6667,6 +6777,10 @@ CONFIG_RV_REACT_PRINTK=y CONFIG_RV=y CONFIG_RXKAD=y # CONFIG_RXPERF is not set +# CONFIG_RZ_DMAC is not set +# CONFIG_RZG2L_ADC is not set +# CONFIG_RZG2L_THERMAL is not set +# CONFIG_RZ_MTU3 is not set CONFIG_S2IO=m # CONFIG_S390_KPROBES_SANITY_TEST is not set # CONFIG_S390_MODULES_SANITY_TEST is not set @@ -6687,6 +6801,7 @@ CONFIG_SATA_MV=m CONFIG_SATA_PMP=y # CONFIG_SATA_PROMISE is not set # CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_RCAR is not set CONFIG_SATA_SIL24=m # CONFIG_SATA_SIL is not set # CONFIG_SATA_SIS is not set @@ -6836,6 +6951,7 @@ CONFIG_SCSI_UFS_HISI=m CONFIG_SCSI_UFS_HPB=y CONFIG_SCSI_UFS_HWMON=y CONFIG_SCSI_UFS_QCOM=m +# CONFIG_SCSI_UFS_RENESAS is not set CONFIG_SCSI_UFS_TI_J721E=m CONFIG_SCSI_VIRTIO=m CONFIG_SCSI_WD719X=m @@ -6865,11 +6981,12 @@ CONFIG_SDM_VIDEOCC_845=m # CONFIG_SDX_GCC_75 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y # CONFIG_SECURITY_APPARMOR is not set -# CONFIG_SECURITY_DMESG_RESTRICT is not set +CONFIG_SECURITY_DMESG_RESTRICT=y CONFIG_SECURITYFS=y CONFIG_SECURITY_INFINIBAND=y CONFIG_SECURITY_LANDLOCK=y @@ -7011,6 +7128,7 @@ CONFIG_SENSORS_LTC2947_SPI=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set CONFIG_SENSORS_LTC2990=m +CONFIG_SENSORS_LTC2991=m # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m CONFIG_SENSORS_LTC4151=m @@ -7071,6 +7189,7 @@ CONFIG_SENSORS_PLI1209BC=m CONFIG_SENSORS_PLI1209BC_REGULATOR=y CONFIG_SENSORS_PM6764TR=m CONFIG_SENSORS_PMBUS=m +CONFIG_SENSORS_POWERZ=m CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -7144,6 +7263,7 @@ CONFIG_SERIAL_8250_CS=m CONFIG_SERIAL_8250_DFL=m CONFIG_SERIAL_8250_DMA=y CONFIG_SERIAL_8250_DW=y +# CONFIG_SERIAL_8250_EM is not set CONFIG_SERIAL_8250_EXAR=m CONFIG_SERIAL_8250_EXTENDED=y # CONFIG_SERIAL_8250_FINTEK is not set @@ -7206,6 +7326,11 @@ CONFIG_SERIAL_SC16IS7XX_I2C=y CONFIG_SERIAL_SC16IS7XX=m CONFIG_SERIAL_SC16IS7XX_SPI=y # CONFIG_SERIAL_SCCNXP is not set +CONFIG_SERIAL_SH_SCI_CONSOLE=y +CONFIG_SERIAL_SH_SCI_DMA=y +CONFIG_SERIAL_SH_SCI_EARLYCON=y +CONFIG_SERIAL_SH_SCI_NR_UARTS=18 +CONFIG_SERIAL_SH_SCI=y # CONFIG_SERIAL_SIFIVE is not set # CONFIG_SERIAL_SPRD is not set # CONFIG_SERIAL_ST_ASC is not set @@ -7272,7 +7397,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -7286,12 +7410,14 @@ CONFIG_SMC91X=m # CONFIG_SM_CAMCC_6350 is not set # CONFIG_SM_CAMCC_8250 is not set # CONFIG_SM_CAMCC_8450 is not set +# CONFIG_SM_CAMCC_8550 is not set CONFIG_SMC_DIAG=m CONFIG_SMC=m # CONFIG_SM_DISPCC_8250 is not set CONFIG_SM_DISPCC_8450=m # CONFIG_SM_DISPCC_8550 is not set # CONFIG_SM_FTL is not set +# CONFIG_SM_GCC_4450 is not set # CONFIG_SM_GCC_6115 is not set # CONFIG_SM_GCC_6125 is not set # CONFIG_SM_GCC_6350 is not set @@ -7326,7 +7452,7 @@ CONFIG_SMS_USB_DRV=m # CONFIG_SM_TCSRCC_8550 is not set # CONFIG_SM_VIDEOCC_8150 is not set # CONFIG_SM_VIDEOCC_8250 is not set -# CONFIG_SM_VIDEOCC_8350 is not set +CONFIG_SM_VIDEOCC_8350=m # CONFIG_SM_VIDEOCC_8450 is not set # CONFIG_SM_VIDEOCC_8550 is not set CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0 @@ -7395,6 +7521,7 @@ CONFIG_SND_FM801=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -7547,8 +7674,10 @@ CONFIG_SND_SOC_APQ8016_SBC=m CONFIG_SND_SOC_ARNDALE=m CONFIG_SND_SOC_AUDIO_IIO_AUX=m CONFIG_SND_SOC_AW8738=m +CONFIG_SND_SOC_AW87390=m CONFIG_SND_SOC_AW88261=m CONFIG_SND_SOC_AW88395=m +CONFIG_SND_SOC_AW88399=m CONFIG_SND_SOC_BD28623=m CONFIG_SND_SOC_BT_SCO=m CONFIG_SND_SOC_CHV3_CODEC=m @@ -7754,6 +7883,7 @@ CONFIG_SND_SOC_PCM512x_SPI=m # CONFIG_SND_SOC_PEB2466 is not set CONFIG_SND_SOC_QCOM=m CONFIG_SND_SOC_QDSP6=m +# CONFIG_SND_SOC_RCAR is not set CONFIG_SND_SOC_RK3288_HDMI_ANALOG=m CONFIG_SND_SOC_RK3328=m CONFIG_SND_SOC_RK3399_GRU_SOUND=m @@ -7788,6 +7918,8 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m CONFIG_SND_SOC_RT715_SDW=m CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +CONFIG_SND_SOC_RTQ9128=m +# CONFIG_SND_SOC_RZ is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811=m @@ -7799,6 +7931,7 @@ CONFIG_SND_SOC_SC8280XP=m CONFIG_SND_SOC_SDM845=m # CONFIG_SND_SOC_SDW_MOCKUP is not set CONFIG_SND_SOC_SGTL5000=m +# CONFIG_SND_SOC_SH4_FSI is not set CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m CONFIG_SND_SOC_SIMPLE_MUX=m # CONFIG_SND_SOC_SM8250 is not set @@ -8091,8 +8224,12 @@ CONFIG_SPI_QCOM_GENI=m CONFIG_SPI_QCOM_QSPI=m CONFIG_SPI_QUP=m CONFIG_SPI_ROCKCHIP=m -# CONFIG_SPI_ROCKCHIP_SFC is not set +CONFIG_SPI_ROCKCHIP_SFC=m +# CONFIG_SPI_RSPI is not set +# CONFIG_SPI_RZV2M_CSI is not set # CONFIG_SPI_SC18IS602 is not set +# CONFIG_SPI_SH_HSPI is not set +# CONFIG_SPI_SH_MSIOF is not set # CONFIG_SPI_SIFIVE is not set # CONFIG_SPI_SLAVE is not set CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m @@ -8325,6 +8462,7 @@ CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX_DEBUG is not set CONFIG_TCM_QLA2XXX=m CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -8401,6 +8539,7 @@ CONFIG_TEST_KSTRTOX=y # CONFIG_TEST_MEMINIT is not set # CONFIG_TEST_MIN_HEAP is not set # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -8473,7 +8612,7 @@ CONFIG_TIFM_7XX1=m CONFIG_TIFM_CORE=m CONFIG_TIGON3_HWMON=y CONFIG_TIGON3=m -# CONFIG_TI_ICSSG_PRUETH is not set +CONFIG_TI_ICSSG_PRUETH=m CONFIG_TI_ICSS_IEP=m CONFIG_TI_K3_AM65_CPSW_NUSS=m CONFIG_TI_K3_AM65_CPSW_SWITCHDEV=y @@ -8688,6 +8827,7 @@ CONFIG_TYPEC_MUX_FSA4480=m CONFIG_TYPEC_MUX_GPIO_SBU=m CONFIG_TYPEC_MUX_NB7VPQ904M=m CONFIG_TYPEC_MUX_PI3USB30532=m +CONFIG_TYPEC_MUX_PTN36502=m CONFIG_TYPEC_NVIDIA_ALTMODE=m CONFIG_TYPEC_QCOM_PMIC=m # CONFIG_TYPEC_RT1711H is not set @@ -8699,7 +8839,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m CONFIG_TYPEC_TCPM=m CONFIG_TYPEC_TPS6598X=m CONFIG_TYPEC_UCSI=m -CONFIG_TYPEC_WCOVE=m CONFIG_TYPEC_WUSB3801=m CONFIG_TYPHOON=m CONFIG_UACCE=m @@ -8792,6 +8931,7 @@ CONFIG_USB_CHIPIDEA_HOST=y CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_PCI=m CONFIG_USB_CHIPIDEA_TEGRA=m CONFIG_USB_CHIPIDEA_UDC=y @@ -8856,6 +8996,7 @@ CONFIG_USB_EHCI_TT_NEWSCHED=y # CONFIG_USB_EHSET_TEST_FIXTURE is not set CONFIG_USB_EMI26=m CONFIG_USB_EMI62=m +# CONFIG_USB_EMXX is not set CONFIG_USB_EPSON2888=y # CONFIG_USB_ETH is not set CONFIG_USB_EZUSB_FX2=m @@ -8968,6 +9109,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_M66592 is not set CONFIG_USB_MA901=m @@ -9028,6 +9170,7 @@ CONFIG_USB_OTG_FSM=m # CONFIG_USB_OTG_PRODUCTLIST is not set CONFIG_USB_OTG=y # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y CONFIG_USBPCWATCHDOG=m CONFIG_USB_PEGASUS=m @@ -9044,6 +9187,9 @@ CONFIG_USB_QCOM_EUD=m CONFIG_USB_RAINSHADOW_CEC=m # CONFIG_USB_RAREMONO is not set CONFIG_USB_RAW_GADGET=m +# CONFIG_USB_RENESAS_USB3 is not set +# CONFIG_USB_RENESAS_USBF is not set +# CONFIG_USB_RENESAS_USBHS is not set CONFIG_USB_ROLE_SWITCH=y CONFIG_USB_RTL8150=m CONFIG_USB_RTL8152=m @@ -9155,6 +9301,7 @@ CONFIG_USB_XHCI_MVEBU=m CONFIG_USB_XHCI_PCI_RENESAS=y CONFIG_USB_XHCI_PCI=y CONFIG_USB_XHCI_PLATFORM=m +CONFIG_USB_XHCI_RCAR=m CONFIG_USB_XHCI_TEGRA=m CONFIG_USB_XUSBATM=m CONFIG_USB=y @@ -9325,7 +9472,7 @@ CONFIG_VIDEO_IMX8_ISI=m CONFIG_VIDEO_IMX8_ISI_M2M=y CONFIG_VIDEO_IMX8_JPEG=m CONFIG_VIDEO_IMX8MQ_MIPI_CSI2=m -CONFIG_VIDEO_IMX_MEDIA=m +# CONFIG_VIDEO_IMX_MEDIA is not set CONFIG_VIDEO_IMX_MIPI_CSIS=m CONFIG_VIDEO_IMX_PXP=m # CONFIG_VIDEO_IPU3_CIO2 is not set @@ -9342,10 +9489,12 @@ CONFIG_VIDEO_MAX9286=m # CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set CONFIG_VIDEO_MESON_GE2D=m CONFIG_VIDEO_MESON_VDEC=m +# CONFIG_VIDEO_MGB4 is not set CONFIG_VIDEO_ML86V7667=m CONFIG_VIDEO_MSP3400=m CONFIG_VIDEO_MT9M001=m # CONFIG_VIDEO_MT9M111 is not set +CONFIG_VIDEO_MT9M114=m CONFIG_VIDEO_MT9P031=m CONFIG_VIDEO_MT9T112=m CONFIG_VIDEO_MT9V011=m @@ -9393,12 +9542,19 @@ CONFIG_VIDEO_PVRUSB2=m CONFIG_VIDEO_PVRUSB2_SYSFS=y CONFIG_VIDEO_QCOM_CAMSS=m CONFIG_VIDEO_QCOM_VENUS=m +# CONFIG_VIDEO_RCAR_CSI2 is not set +# CONFIG_VIDEO_RCAR_ISP is not set +# CONFIG_VIDEO_RCAR_VIN is not set CONFIG_VIDEO_RDACM20=m # CONFIG_VIDEO_RDACM21 is not set +# CONFIG_VIDEO_RENESAS_FCP is not set +# CONFIG_VIDEO_RENESAS_JPU is not set CONFIG_VIDEO_RJ54N1=m CONFIG_VIDEO_ROCKCHIP_ISP1=m CONFIG_VIDEO_ROCKCHIP_RGA=m CONFIG_VIDEO_ROCKCHIP_VDEC=m +# CONFIG_VIDEO_RZG2L_CRU is not set +# CONFIG_VIDEO_RZG2L_CSI2 is not set CONFIG_VIDEO_S5C73M3=m CONFIG_VIDEO_S5K4ECGX=m CONFIG_VIDEO_S5K5BAF=m @@ -9449,6 +9605,7 @@ CONFIG_VIDEO_THS7303=m CONFIG_VIDEO_THS8200=m CONFIG_VIDEO_TI_CAL=m CONFIG_VIDEO_TI_CAL_MC=y +CONFIG_VIDEO_TI_J721E_CSI2RX=m CONFIG_VIDEO_TLV320AIC23B=m CONFIG_VIDEO_TM6000_ALSA=m CONFIG_VIDEO_TM6000_DVB=m @@ -9654,6 +9811,7 @@ CONFIG_XDP_SOCKETS=y # CONFIG_XEN_GRANT_DMA_ALLOC is not set # CONFIG_XEN is not set CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512 +CONFIG_XEN_PRIVCMD_EVENTFD=y CONFIG_XEN_PRIVCMD_IRQFD=y CONFIG_XEN_PRIVCMD=m # CONFIG_XEN_PVCALLS_FRONTEND is not set @@ -9772,19 +9930,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-aarch64-64k-debug-rhel.config b/SOURCES/kernel-aarch64-64k-debug-rhel.config index a38ed02..3c11aac 100644 --- a/SOURCES/kernel-aarch64-64k-debug-rhel.config +++ b/SOURCES/kernel-aarch64-64k-debug-rhel.config @@ -220,6 +220,7 @@ CONFIG_AMD_PMC=m # CONFIG_AMD_XGBE_DCB is not set CONFIG_AMD_XGBE=m # CONFIG_AMIGA_PARTITION is not set +CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y # CONFIG_AMT is not set # CONFIG_ANDROID_BINDER_IPC is not set @@ -261,6 +262,7 @@ CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8 CONFIG_ARCH_MXC=y # CONFIG_ARCH_NPCM is not set CONFIG_ARCH_NXP=y +CONFIG_ARCH_PENSANDO=y CONFIG_ARCH_QCOM=y CONFIG_ARCH_RANDOM=y # CONFIG_ARCH_REALTEK is not set @@ -324,6 +326,7 @@ CONFIG_ARM64_ERRATUM_2457168=y CONFIG_ARM64_ERRATUM_2645198=y CONFIG_ARM64_ERRATUM_2658417=y CONFIG_ARM64_ERRATUM_2966298=y +CONFIG_ARM64_ERRATUM_3117295=y CONFIG_ARM64_ERRATUM_819472=y CONFIG_ARM64_ERRATUM_824069=y CONFIG_ARM64_ERRATUM_826319=y @@ -378,6 +381,7 @@ CONFIG_ARM_PMU=y # CONFIG_ARM_QCOM_CPUFREQ_HW is not set CONFIG_ARM_SBSA_WATCHDOG=m CONFIG_ARM_SCMI_CPUFREQ=m +CONFIG_ARM_SCMI_PERF_DOMAIN=y # CONFIG_ARM_SCMI_POWER_CONTROL is not set CONFIG_ARM_SCMI_POWER_DOMAIN=m CONFIG_ARM_SCMI_PROTOCOL=y @@ -410,6 +414,7 @@ CONFIG_ARM_TI_CPUFREQ=y CONFIG_ASN1=y # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -523,6 +528,7 @@ CONFIG_BASE_FULL=y # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -658,7 +664,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_BRCM_TRACING=y # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -751,7 +756,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -831,6 +835,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -912,6 +917,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CLEANCACHE=y CONFIG_CLK_BCM_NS2=y CONFIG_CLK_BCM_SR=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -1004,7 +1010,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1088,7 +1093,6 @@ CONFIG_CRYPTO_AES_ARM64_CE_CCM=y CONFIG_CRYPTO_AES_ARM64_CE=y CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y CONFIG_CRYPTO_AES_ARM64=y -CONFIG_CRYPTO_AES_GCM_P10=y # CONFIG_CRYPTO_AES_TI is not set CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_ANSI_CPRNG=m @@ -1105,7 +1109,6 @@ CONFIG_CRYPTO_CCM=y CONFIG_CRYPTO_CFB=y CONFIG_CRYPTO_CHACHA20=m CONFIG_CRYPTO_CHACHA20_NEON=y -# CONFIG_CRYPTO_CHACHA20_P10 is not set CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CMAC=y # CONFIG_CRYPTO_CRC32C_VPMSUM is not set @@ -1192,6 +1195,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1216,7 +1224,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_POLY1305=m CONFIG_CRYPTO_POLY1305_NEON=y -# CONFIG_CRYPTO_POLY1305_P10 is not set # CONFIG_CRYPTO_POLYVAL_ARM64_CE is not set # CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set CONFIG_CRYPTO_RMD160=m @@ -1488,6 +1495,7 @@ CONFIG_DPAA2_CONSOLE=m # CONFIG_DPOT_DAC is not set # CONFIG_DPS310 is not set # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1560,6 +1568,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set CONFIG_DRM_IMX_DCSS=m # CONFIG_DRM_IMX_LCDC is not set # CONFIG_DRM_IMX_LCDIF is not set @@ -1589,38 +1598,92 @@ CONFIG_DRM_NOUVEAU=m # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LG_LG4573 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set # CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1640,7 +1703,8 @@ CONFIG_DRM_RADEON_USERPTR=y # CONFIG_DRM_SIMPLE_BRIDGE is not set CONFIG_DRM_SIMPLEDRM=y # CONFIG_DRM_SSD130X is not set -# CONFIG_DRM_TEGRA is not set +# CONFIG_DRM_TEGRA_DEBUG is not set +CONFIG_DRM_TEGRA=m # CONFIG_DRM_THINE_THC63LVD1024 is not set # CONFIG_DRM_TI_DLPC3433 is not set # CONFIG_DRM_TIDSS is not set @@ -1808,7 +1872,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_EEPROM_AT25 is not set CONFIG_EEPROM_EE1004=m # CONFIG_EEPROM_IDT_89HPESX is not set -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1842,7 +1905,12 @@ CONFIG_ENIC=m # CONFIG_EPIC100 is not set CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y CONFIG_ETHOC=m CONFIG_ETHTOOL_NETLINK=y @@ -1915,7 +1983,7 @@ CONFIG_FAULT_INJECTION=y # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -2033,7 +2101,9 @@ CONFIG_FSL_PQ_MDIO=m # CONFIG_FSL_RCPM is not set CONFIG_FSL_XGMAC_MDIO=m CONFIG_FSNOTIFY=y -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_RECORD_RECURSION is not set @@ -2041,6 +2111,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +# CONFIG_FUEL_GAUGE_MM8013 is not set CONFIG_FUJITSU_ERRATUM_010001=y # CONFIG_FUJITSU_ES is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set @@ -2193,6 +2264,7 @@ CONFIG_GPIO_XLP=m # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set CONFIG_GUP_TEST=y CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2376,6 +2448,7 @@ CONFIG_HNS_ENET=m CONFIG_HNS=m # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2539,6 +2612,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set # CONFIG_IAQCORE is not set CONFIG_IAVF=m # CONFIG_IB700_WDT is not set @@ -2552,6 +2626,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set # CONFIG_IEEE802154_AT86RF230 is not set @@ -2621,7 +2696,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10 CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2755,6 +2829,7 @@ CONFIG_INPUT_SPARSEKMAP=m CONFIG_INPUT_UINPUT=m CONFIG_INPUT=y # CONFIG_INPUT_YEALINK is not set +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_INTEGRITY_AUDIT=y @@ -2797,6 +2872,7 @@ CONFIG_INTEL_SDSI=m # CONFIG_INTEL_SOC_PMIC_CHTWC is not set # CONFIG_INTEL_SOC_PMIC is not set # CONFIG_INTEL_TCC_COOLING is not set +# CONFIG_INTEL_TDX_HOST is not set # CONFIG_INTEL_TH is not set CONFIG_INTEL_UNCORE_FREQ_CONTROL=m # CONFIG_INTEL_VSC is not set @@ -2824,7 +2900,8 @@ CONFIG_IOMMU_DEBUGFS=y CONFIG_IOMMU_DEFAULT_DMA_LAZY=y # CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -3126,7 +3203,7 @@ CONFIG_KEY_NOTIFICATIONS=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set # CONFIG_KFENCE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3156,6 +3233,7 @@ CONFIG_KUNIT_TEST=m CONFIG_KVM_AMD_SEV=y # CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set # CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_PROVE_MMU=y CONFIG_KVM_SMM=y # CONFIG_KVM_WERROR is not set @@ -3323,6 +3401,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3337,6 +3416,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3345,6 +3425,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m # CONFIG_MAC80211_MESH is not set @@ -3411,6 +3492,7 @@ CONFIG_MAX_SKB_FRAGS=17 # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3448,6 +3530,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3514,7 +3597,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3655,6 +3738,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3771,6 +3855,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3843,6 +3930,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3920,7 +4009,6 @@ CONFIG_MWIFIEX_PCIE=m CONFIG_MWIFIEX_SDIO=m CONFIG_MWIFIEX_USB=m # CONFIG_MWL8K is not set -# CONFIG_MX3_IPU is not set # CONFIG_MXC4005 is not set # CONFIG_MXC6255 is not set # CONFIG_MXS_DMA is not set @@ -3965,9 +4053,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4089,6 +4174,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -4101,15 +4187,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -4138,6 +4221,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -4242,7 +4326,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4440,9 +4524,11 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y +CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_BCM_OCOTP is not set # CONFIG_NVMEM_IMX_IIM is not set @@ -4469,7 +4555,9 @@ CONFIG_NVME_TARGET=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -4589,6 +4677,7 @@ CONFIG_PCC=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_AL is not set @@ -4654,6 +4743,7 @@ CONFIG_PCI_XGENE_MSI=y CONFIG_PCI_XGENE=y CONFIG_PCI=y # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4733,6 +4823,7 @@ CONFIG_PINCTRL_ALDERLAKE=m # CONFIG_PINCTRL_AMD is not set # CONFIG_PINCTRL_BROXTON is not set # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_ELKHARTLAKE=m CONFIG_PINCTRL_EMMITSBURG=m @@ -4821,7 +4912,6 @@ CONFIG_POSIX_TIMERS=y CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_HISI=y @@ -4978,6 +5068,7 @@ CONFIG_QCOM_L3_PMU=y # CONFIG_QCOM_PDC is not set CONFIG_QCOM_QDF2400_ERRATUM_0065=y # CONFIG_QCOM_QFPROM is not set +# CONFIG_QCOM_QSEECOM is not set # CONFIG_QCOM_RAMP_CTRL is not set # CONFIG_QCOM_RMTFS_MEM is not set # CONFIG_QCOM_RPMH is not set @@ -5006,7 +5097,7 @@ CONFIG_QLA3XXX=m # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set +CONFIG_QORIQ_THERMAL=m CONFIG_QRTR=m CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -5114,6 +5205,7 @@ CONFIG_REGULATOR_GPIO=y # CONFIG_REGULATOR_MAX1586 is not set # CONFIG_REGULATOR_MAX20086 is not set # CONFIG_REGULATOR_MAX20411 is not set +# CONFIG_REGULATOR_MAX77503 is not set CONFIG_REGULATOR_MAX77620=y CONFIG_REGULATOR_MAX77686=m # CONFIG_REGULATOR_MAX77826 is not set @@ -5218,6 +5310,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -5258,7 +5351,6 @@ CONFIG_RTC_DRV_ABB5ZES3=m CONFIG_RTC_DRV_ABX80X=m CONFIG_RTC_DRV_BBNSM=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5548,6 +5640,7 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5703,6 +5796,7 @@ CONFIG_SENSORS_LTC2945=m # CONFIG_SENSORS_LTC2978 is not set # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m # CONFIG_SENSORS_LTC4151 is not set @@ -5761,6 +5855,7 @@ CONFIG_SENSORS_NTC_THERMISTOR=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set # CONFIG_SENSORS_PMBUS is not set +# CONFIG_SENSORS_POWERZ is not set CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -5937,7 +6032,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -6021,6 +6115,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -6158,8 +6253,10 @@ CONFIG_SND_SEQ_UMP=y # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -6250,6 +6347,7 @@ CONFIG_SND_SOC_CX2072X=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -6364,12 +6462,6 @@ CONFIG_SND_SOC_MAX98927=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set # CONFIG_SND_SOC_RL6231 is not set -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -6392,6 +6484,7 @@ CONFIG_SND_SOC_RT1318_SDW=m # CONFIG_SND_SOC_RT715_SDW is not set CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6588,7 +6681,6 @@ CONFIG_SND_VX222=m # CONFIG_SND_XEN_FRONTEND is not set # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set CONFIG_SOC_IMX8M=y CONFIG_SOC_IMX9=m # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set @@ -6819,6 +6911,7 @@ CONFIG_TCM_IBLOCK=m CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6885,6 +6978,7 @@ CONFIG_TEST_LIST_SORT=m # CONFIG_TEST_MEMINIT is not set CONFIG_TEST_MIN_HEAP=m # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -7116,6 +7210,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -7194,6 +7289,7 @@ CONFIG_USB_CHIPIDEA_HOST=y CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_TEGRA=m CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CONN_GPIO=m @@ -7297,6 +7393,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -7340,6 +7437,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -7490,7 +7588,10 @@ CONFIG_VEXPRESS_CONFIG=y # CONFIG_VF610_DAC is not set CONFIG_VFAT_FS=m # CONFIG_VFIO_AMBA is not set +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y CONFIG_VFIO_FSL_MC=m +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO=m # CONFIG_VFIO_MDEV is not set @@ -7605,11 +7706,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -7944,19 +8047,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-aarch64-64k-rhel.config b/SOURCES/kernel-aarch64-64k-rhel.config index bd46bde..77f32a4 100644 --- a/SOURCES/kernel-aarch64-64k-rhel.config +++ b/SOURCES/kernel-aarch64-64k-rhel.config @@ -220,6 +220,7 @@ CONFIG_AMD_PMC=m # CONFIG_AMD_XGBE_DCB is not set CONFIG_AMD_XGBE=m # CONFIG_AMIGA_PARTITION is not set +CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y # CONFIG_AMT is not set # CONFIG_ANDROID_BINDER_IPC is not set @@ -261,6 +262,7 @@ CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8 CONFIG_ARCH_MXC=y # CONFIG_ARCH_NPCM is not set CONFIG_ARCH_NXP=y +CONFIG_ARCH_PENSANDO=y CONFIG_ARCH_QCOM=y CONFIG_ARCH_RANDOM=y # CONFIG_ARCH_REALTEK is not set @@ -324,6 +326,7 @@ CONFIG_ARM64_ERRATUM_2457168=y CONFIG_ARM64_ERRATUM_2645198=y CONFIG_ARM64_ERRATUM_2658417=y CONFIG_ARM64_ERRATUM_2966298=y +CONFIG_ARM64_ERRATUM_3117295=y CONFIG_ARM64_ERRATUM_819472=y CONFIG_ARM64_ERRATUM_824069=y CONFIG_ARM64_ERRATUM_826319=y @@ -378,6 +381,7 @@ CONFIG_ARM_PMU=y # CONFIG_ARM_QCOM_CPUFREQ_HW is not set CONFIG_ARM_SBSA_WATCHDOG=m CONFIG_ARM_SCMI_CPUFREQ=m +CONFIG_ARM_SCMI_PERF_DOMAIN=y # CONFIG_ARM_SCMI_POWER_CONTROL is not set CONFIG_ARM_SCMI_POWER_DOMAIN=m CONFIG_ARM_SCMI_PROTOCOL=y @@ -410,6 +414,7 @@ CONFIG_ARM_TI_CPUFREQ=y CONFIG_ASN1=y # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -523,6 +528,7 @@ CONFIG_BASE_FULL=y # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -658,7 +664,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set # CONFIG_BRCM_TRACING is not set # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -751,7 +756,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -831,6 +835,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y # CONFIG_CFG80211_DEBUGFS is not set CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -912,6 +917,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CLEANCACHE=y CONFIG_CLK_BCM_NS2=y CONFIG_CLK_BCM_SR=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -1004,7 +1010,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1088,7 +1093,6 @@ CONFIG_CRYPTO_AES_ARM64_CE_CCM=y CONFIG_CRYPTO_AES_ARM64_CE=y CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y CONFIG_CRYPTO_AES_ARM64=y -CONFIG_CRYPTO_AES_GCM_P10=y # CONFIG_CRYPTO_AES_TI is not set CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_ANSI_CPRNG=m @@ -1105,7 +1109,6 @@ CONFIG_CRYPTO_CCM=y CONFIG_CRYPTO_CFB=y CONFIG_CRYPTO_CHACHA20=m CONFIG_CRYPTO_CHACHA20_NEON=y -# CONFIG_CRYPTO_CHACHA20_P10 is not set CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CMAC=y # CONFIG_CRYPTO_CRC32C_VPMSUM is not set @@ -1192,6 +1195,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1216,7 +1224,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_POLY1305=m CONFIG_CRYPTO_POLY1305_NEON=y -# CONFIG_CRYPTO_POLY1305_P10 is not set # CONFIG_CRYPTO_POLYVAL_ARM64_CE is not set # CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set CONFIG_CRYPTO_RMD160=m @@ -1480,6 +1487,7 @@ CONFIG_DPAA2_CONSOLE=m # CONFIG_DPOT_DAC is not set # CONFIG_DPS310 is not set # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1552,6 +1560,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set CONFIG_DRM_IMX_DCSS=m # CONFIG_DRM_IMX_LCDC is not set # CONFIG_DRM_IMX_LCDIF is not set @@ -1581,38 +1590,92 @@ CONFIG_DRM_NOUVEAU=m # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LG_LG4573 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set # CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1632,7 +1695,8 @@ CONFIG_DRM_RADEON_USERPTR=y # CONFIG_DRM_SIMPLE_BRIDGE is not set CONFIG_DRM_SIMPLEDRM=y # CONFIG_DRM_SSD130X is not set -# CONFIG_DRM_TEGRA is not set +# CONFIG_DRM_TEGRA_DEBUG is not set +CONFIG_DRM_TEGRA=m # CONFIG_DRM_THINE_THC63LVD1024 is not set # CONFIG_DRM_TI_DLPC3433 is not set # CONFIG_DRM_TIDSS is not set @@ -1800,7 +1864,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_EEPROM_AT25 is not set CONFIG_EEPROM_EE1004=m # CONFIG_EEPROM_IDT_89HPESX is not set -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1834,7 +1897,12 @@ CONFIG_ENIC=m # CONFIG_EPIC100 is not set CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y CONFIG_ETHOC=m CONFIG_ETHTOOL_NETLINK=y @@ -1899,7 +1967,7 @@ CONFIG_FAT_KUNIT_TEST=m # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -2017,7 +2085,9 @@ CONFIG_FSL_PQ_MDIO=m # CONFIG_FSL_RCPM is not set CONFIG_FSL_XGMAC_MDIO=m CONFIG_FSNOTIFY=y -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_RECORD_RECURSION is not set @@ -2025,6 +2095,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +# CONFIG_FUEL_GAUGE_MM8013 is not set CONFIG_FUJITSU_ERRATUM_010001=y # CONFIG_FUJITSU_ES is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set @@ -2177,6 +2248,7 @@ CONFIG_GPIO_XLP=m # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set # CONFIG_GUP_TEST is not set CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2360,6 +2432,7 @@ CONFIG_HNS_ENET=m CONFIG_HNS=m # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2523,6 +2596,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set # CONFIG_IAQCORE is not set CONFIG_IAVF=m # CONFIG_IB700_WDT is not set @@ -2536,6 +2610,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set # CONFIG_IEEE802154_AT86RF230 is not set @@ -2605,7 +2680,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10 CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2739,6 +2813,7 @@ CONFIG_INPUT_SPARSEKMAP=m CONFIG_INPUT_UINPUT=m CONFIG_INPUT=y # CONFIG_INPUT_YEALINK is not set +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_INTEGRITY_AUDIT=y @@ -2781,6 +2856,7 @@ CONFIG_INTEL_SDSI=m # CONFIG_INTEL_SOC_PMIC_CHTWC is not set # CONFIG_INTEL_SOC_PMIC is not set # CONFIG_INTEL_TCC_COOLING is not set +# CONFIG_INTEL_TDX_HOST is not set # CONFIG_INTEL_TH is not set CONFIG_INTEL_UNCORE_FREQ_CONTROL=m # CONFIG_INTEL_VSC is not set @@ -2808,7 +2884,8 @@ CONFIG_IO_DELAY_0X80=y CONFIG_IOMMU_DEFAULT_DMA_LAZY=y # CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -3104,7 +3181,7 @@ CONFIG_KEY_NOTIFICATIONS=y # CONFIG_KEYS_REQUEST_CACHE is not set CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3135,6 +3212,7 @@ CONFIG_KUNIT_TEST=m CONFIG_KVM_AMD_SEV=y # CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set # CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set +CONFIG_KVM_MAX_NR_VCPUS=4096 # CONFIG_KVM_PROVE_MMU is not set CONFIG_KVM_SMM=y # CONFIG_KVM_WERROR is not set @@ -3302,6 +3380,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3316,6 +3395,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3324,6 +3404,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m # CONFIG_MAC80211_MESH is not set @@ -3390,6 +3471,7 @@ CONFIG_MAX_SKB_FRAGS=17 # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3427,6 +3509,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3493,7 +3576,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3634,6 +3717,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3750,6 +3834,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3822,6 +3909,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3899,7 +3988,6 @@ CONFIG_MWIFIEX_PCIE=m CONFIG_MWIFIEX_SDIO=m CONFIG_MWIFIEX_USB=m # CONFIG_MWL8K is not set -# CONFIG_MX3_IPU is not set # CONFIG_MXC4005 is not set # CONFIG_MXC6255 is not set # CONFIG_MXS_DMA is not set @@ -3944,9 +4032,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4068,6 +4153,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -4080,15 +4166,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -4117,6 +4200,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -4221,7 +4305,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4419,9 +4503,11 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y +CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_BCM_OCOTP is not set # CONFIG_NVMEM_IMX_IIM is not set @@ -4448,7 +4534,9 @@ CONFIG_NVME_TARGET=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -4566,6 +4654,7 @@ CONFIG_PCC=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_AL is not set @@ -4631,6 +4720,7 @@ CONFIG_PCI_XGENE_MSI=y CONFIG_PCI_XGENE=y CONFIG_PCI=y # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4710,6 +4800,7 @@ CONFIG_PINCTRL_ALDERLAKE=m # CONFIG_PINCTRL_AMD is not set # CONFIG_PINCTRL_BROXTON is not set # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_ELKHARTLAKE=m CONFIG_PINCTRL_EMMITSBURG=m @@ -4798,7 +4889,6 @@ CONFIG_POSIX_TIMERS=y CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_HISI=y @@ -4955,6 +5045,7 @@ CONFIG_QCOM_L3_PMU=y # CONFIG_QCOM_PDC is not set CONFIG_QCOM_QDF2400_ERRATUM_0065=y # CONFIG_QCOM_QFPROM is not set +# CONFIG_QCOM_QSEECOM is not set # CONFIG_QCOM_RAMP_CTRL is not set # CONFIG_QCOM_RMTFS_MEM is not set # CONFIG_QCOM_RPMH is not set @@ -4983,7 +5074,7 @@ CONFIG_QLA3XXX=m # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set +CONFIG_QORIQ_THERMAL=m CONFIG_QRTR=m CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -5091,6 +5182,7 @@ CONFIG_REGULATOR_GPIO=y # CONFIG_REGULATOR_MAX1586 is not set # CONFIG_REGULATOR_MAX20086 is not set # CONFIG_REGULATOR_MAX20411 is not set +# CONFIG_REGULATOR_MAX77503 is not set CONFIG_REGULATOR_MAX77620=y CONFIG_REGULATOR_MAX77686=m # CONFIG_REGULATOR_MAX77826 is not set @@ -5195,6 +5287,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -5235,7 +5328,6 @@ CONFIG_RTC_DRV_ABB5ZES3=m CONFIG_RTC_DRV_ABX80X=m CONFIG_RTC_DRV_BBNSM=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5525,6 +5617,7 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5680,6 +5773,7 @@ CONFIG_SENSORS_LTC2945=m # CONFIG_SENSORS_LTC2978 is not set # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m # CONFIG_SENSORS_LTC4151 is not set @@ -5738,6 +5832,7 @@ CONFIG_SENSORS_NTC_THERMISTOR=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set # CONFIG_SENSORS_PMBUS is not set +# CONFIG_SENSORS_POWERZ is not set CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -5914,7 +6009,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -5998,6 +6092,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -6134,8 +6229,10 @@ CONFIG_SND_SEQ_UMP=y # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -6226,6 +6323,7 @@ CONFIG_SND_SOC_CX2072X=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -6340,12 +6438,6 @@ CONFIG_SND_SOC_MAX98927=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set # CONFIG_SND_SOC_RL6231 is not set -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -6368,6 +6460,7 @@ CONFIG_SND_SOC_RT1318_SDW=m # CONFIG_SND_SOC_RT715_SDW is not set CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6563,7 +6656,6 @@ CONFIG_SND_VX222=m # CONFIG_SND_XEN_FRONTEND is not set # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set CONFIG_SOC_IMX8M=y CONFIG_SOC_IMX9=m # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set @@ -6794,6 +6886,7 @@ CONFIG_TCM_IBLOCK=m CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6860,6 +6953,7 @@ CONFIG_TEST_KSTRTOX=y # CONFIG_TEST_MEMINIT is not set # CONFIG_TEST_MIN_HEAP is not set # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -7091,6 +7185,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -7169,6 +7264,7 @@ CONFIG_USB_CHIPIDEA_HOST=y CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_TEGRA=m CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CONN_GPIO=m @@ -7272,6 +7368,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -7315,6 +7412,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -7465,7 +7563,10 @@ CONFIG_VEXPRESS_CONFIG=y # CONFIG_VF610_DAC is not set CONFIG_VFAT_FS=m # CONFIG_VFIO_AMBA is not set +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y CONFIG_VFIO_FSL_MC=m +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO=m # CONFIG_VFIO_MDEV is not set @@ -7580,11 +7681,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -7919,19 +8022,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-aarch64-debug-fedora.config b/SOURCES/kernel-aarch64-debug-fedora.config index 3a39af6..4d85066 100644 --- a/SOURCES/kernel-aarch64-debug-fedora.config +++ b/SOURCES/kernel-aarch64-debug-fedora.config @@ -254,6 +254,7 @@ CONFIG_AMD_XGBE_DCB=y CONFIG_AMD_XGBE=m # CONFIG_AMIGA_PARTITION is not set CONFIG_AMLOGIC_THERMAL=m +CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y CONFIG_AMT=m CONFIG_ANDROID_BINDER_DEVICES="binder,hwbinder,vndbinder" @@ -329,12 +330,33 @@ CONFIG_ARCH_MXC=y CONFIG_ARCH_NR_GPIO=2048 CONFIG_ARCH_NXP=y # CONFIG_ARCH_OMAP1 is not set +# CONFIG_ARCH_PENSANDO is not set # CONFIG_ARCH_PXA is not set CONFIG_ARCH_QCOM=y +CONFIG_ARCH_R8A774A1=y +# CONFIG_ARCH_R8A774B1 is not set +# CONFIG_ARCH_R8A774C0 is not set +# CONFIG_ARCH_R8A774E1 is not set +# CONFIG_ARCH_R8A77951 is not set +# CONFIG_ARCH_R8A77960 is not set +# CONFIG_ARCH_R8A77961 is not set +# CONFIG_ARCH_R8A77965 is not set +# CONFIG_ARCH_R8A77970 is not set +# CONFIG_ARCH_R8A77980 is not set +# CONFIG_ARCH_R8A77990 is not set +# CONFIG_ARCH_R8A77995 is not set +# CONFIG_ARCH_R8A779A0 is not set +# CONFIG_ARCH_R8A779F0 is not set +# CONFIG_ARCH_R8A779G0 is not set +CONFIG_ARCH_R9A07G043=y +CONFIG_ARCH_R9A07G044=y +CONFIG_ARCH_R9A07G054=y +# CONFIG_ARCH_R9A08G045 is not set +# CONFIG_ARCH_R9A09G011 is not set CONFIG_ARCH_RANDOM=y # CONFIG_ARCH_RDA is not set # CONFIG_ARCH_REALTEK is not set -# CONFIG_ARCH_RENESAS is not set +CONFIG_ARCH_RENESAS=y CONFIG_ARCH_ROCKCHIP=y # CONFIG_ARCH_S32 is not set # CONFIG_ARCH_SA1100 is not set @@ -396,6 +418,7 @@ CONFIG_ARM64_ERRATUM_2457168=y CONFIG_ARM64_ERRATUM_2645198=y CONFIG_ARM64_ERRATUM_2658417=y CONFIG_ARM64_ERRATUM_2966298=y +CONFIG_ARM64_ERRATUM_3117295=y CONFIG_ARM64_ERRATUM_819472=y CONFIG_ARM64_ERRATUM_824069=y CONFIG_ARM64_ERRATUM_826319=y @@ -484,6 +507,7 @@ CONFIG_ARM_RASPBERRYPI_CPUFREQ=m CONFIG_ARM_RK3399_DMC_DEVFREQ=m CONFIG_ARM_SBSA_WATCHDOG=m CONFIG_ARM_SCMI_CPUFREQ=m +CONFIG_ARM_SCMI_PERF_DOMAIN=y CONFIG_ARM_SCMI_POWERCAP=m CONFIG_ARM_SCMI_POWER_CONTROL=m CONFIG_ARM_SCMI_POWER_DOMAIN=m @@ -531,7 +555,7 @@ CONFIG_ATA_ACPI=y CONFIG_ATA_BMDMA=y CONFIG_ATA_FORCE=y CONFIG_ATA_GENERIC=m -# CONFIG_ATALK is not set +CONFIG_ATALK=m CONFIG_ATA_OVER_ETH=m CONFIG_ATA_PIIX=y # CONFIG_ATARI_PARTITION is not set @@ -702,6 +726,7 @@ CONFIG_BATTERY_GAUGE_LTC2941=m CONFIG_BATTERY_MAX17040=m CONFIG_BATTERY_MAX17042=m # CONFIG_BATTERY_MAX1721X is not set +# CONFIG_BATTERY_PM8916_BMS_VM is not set CONFIG_BATTERY_QCOM_BATTMGR=m CONFIG_BATTERY_RT5033=m CONFIG_BATTERY_SAMSUNG_SDI=y @@ -715,6 +740,15 @@ CONFIG_BAYCOM_SER_HDX=m # CONFIG_BCACHE_ASYNC_REGISTRATION is not set # CONFIG_BCACHE_CLOSURES_DEBUG is not set # CONFIG_BCACHE_DEBUG is not set +CONFIG_BCACHEFS_DEBUG_TRANSACTIONS=y +CONFIG_BCACHEFS_DEBUG=y +# CONFIG_BCACHEFS_ERASURE_CODING is not set +CONFIG_BCACHEFS_FS=m +CONFIG_BCACHEFS_LOCK_TIME_STATS=y +# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set +CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_BCACHEFS_QUOTA=y +# CONFIG_BCACHEFS_TESTS is not set CONFIG_BCACHE=m CONFIG_BCM2711_THERMAL=m CONFIG_BCM2835_MBOX=y @@ -867,7 +901,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_BRCMSTB_L2_IRQ=y CONFIG_BRCM_TRACING=y CONFIG_BRCMUTIL=m @@ -980,7 +1013,6 @@ CONFIG_CADENCE_WATCHDOG=m # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -CONFIG_CAN_BXCAN=m CONFIG_CAN_CALC_BITTIMING=y CONFIG_CAN_CAN327=m # CONFIG_CAN_CC770 is not set @@ -1017,6 +1049,8 @@ CONFIG_CAN_NETLINK=y CONFIG_CAN_PEAK_PCIEFD=m CONFIG_CAN_PEAK_USB=m CONFIG_CAN_RAW=m +# CONFIG_CAN_RCAR_CANFD is not set +# CONFIG_CAN_RCAR is not set # CONFIG_CAN_SJA1000 is not set CONFIG_CAN_SLCAN=m # CONFIG_CAN_SOFTING is not set @@ -1075,6 +1109,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFI_CLANG is not set CONFIG_CFS_BANDWIDTH=y @@ -1117,6 +1152,7 @@ CONFIG_CHARGER_MAX77650=m CONFIG_CHARGER_MAX77976=m # CONFIG_CHARGER_MAX8903 is not set CONFIG_CHARGER_MT6370=m +# CONFIG_CHARGER_PM8916_LBC is not set # CONFIG_CHARGER_QCOM_SMB2 is not set CONFIG_CHARGER_QCOM_SMBB=m CONFIG_CHARGER_RK817=m @@ -1168,6 +1204,7 @@ CONFIG_CIO2_BRIDGE=y CONFIG_CLEANCACHE=y CONFIG_CLK_BCM2711_DVP=m CONFIG_CLK_BCM2835=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set CONFIG_CLK_ICST=y @@ -1183,6 +1220,7 @@ CONFIG_CLK_LS1028A_PLLDIG=y CONFIG_CLK_PX30=y CONFIG_CLK_QORIQ=y CONFIG_CLK_RASPBERRYPI=y +# CONFIG_CLK_RCAR_USB2_CLOCK_SEL is not set CONFIG_CLK_RK3036=y CONFIG_CLK_RK312X=y CONFIG_CLK_RK3188=y @@ -1268,6 +1306,8 @@ CONFIG_COMMON_CLK_QCOM=y CONFIG_COMMON_CLK_RK808=m CONFIG_COMMON_CLK_ROCKCHIP=y CONFIG_COMMON_CLK_RS9_PCIE=m +CONFIG_COMMON_CLK_S4_PERIPHERALS=y +CONFIG_COMMON_CLK_S4_PLL=y CONFIG_COMMON_CLK_SCMI=y CONFIG_COMMON_CLK_SCPI=m # CONFIG_COMMON_CLK_SI514 is not set @@ -1299,7 +1339,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1387,6 +1426,7 @@ CONFIG_CROS_EC_UART=m CONFIG_CROS_EC_VBC=m CONFIG_CROS_HPS_I2C=m CONFIG_CROS_KBD_LED_BACKLIGHT=m +CONFIG_CROS_KUNIT_EC_PROTO_TEST=m CONFIG_CROS_KUNIT=m CONFIG_CROSS_MEMORY_ATTACH=y CONFIG_CROS_TYPEC_SWITCH=m @@ -1533,6 +1573,11 @@ CONFIG_CRYPTO_GHASH=y CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1644,6 +1689,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_CGROUP_REF is not set +# CONFIG_DEBUG_CLOSURES is not set CONFIG_DEBUG_CREDENTIALS=y # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_DRIVER is not set @@ -1740,7 +1786,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=32768 CONFIG_DEFAULT_SECURITY_SELINUX=y # CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEV_APPLETALK is not set CONFIG_DEV_DAX_CXL=m CONFIG_DEV_DAX_HMEM=m CONFIG_DEV_DAX_KMEM=m @@ -1853,6 +1898,7 @@ CONFIG_DPOT_DAC=m # CONFIG_DPS310 is not set CONFIG_DRAGONRISE_FF=y CONFIG_DRBD_FAULT_INJECTION=y +CONFIG_DRIVER_PE_KUNIT_TEST=m CONFIG_DRM_ACCEL_QAIC=m CONFIG_DRM_ACCEL=y CONFIG_DRM_AMD_ACP=y @@ -1936,6 +1982,7 @@ CONFIG_DRM_IMX8QXP_LDB=m CONFIG_DRM_IMX8QXP_PIXEL_COMBINER=m CONFIG_DRM_IMX8QXP_PIXEL_LINK=m CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI=m +CONFIG_DRM_IMX93_MIPI_DSI=m CONFIG_DRM_IMX_DCSS=m CONFIG_DRM_IMX_LCDC=m CONFIG_DRM_IMX_LCDIF=m @@ -1999,9 +2046,11 @@ CONFIG_DRM_PANEL_HIMAX_HX8394=m CONFIG_DRM_PANEL_ILITEK_IL9322=m CONFIG_DRM_PANEL_ILITEK_ILI9341=m CONFIG_DRM_PANEL_ILITEK_ILI9881C=m +CONFIG_DRM_PANEL_ILITEK_ILI9882T=m CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m # CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m +CONFIG_DRM_PANEL_JDI_LPM102A188A=m # CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set CONFIG_DRM_PANEL_JDI_R63452=m CONFIG_DRM_PANEL_KHADAS_TS050=m @@ -2031,6 +2080,7 @@ CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m # CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set CONFIG_DRM_PANEL_RAYDIUM_RM68200=m +CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m CONFIG_DRM_PANEL_RONBO_RB070D30=m CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m CONFIG_DRM_PANEL_SAMSUNG_DB7430=m @@ -2075,13 +2125,16 @@ CONFIG_DRM_PL111=m CONFIG_DRM_QXL=m CONFIG_DRM_RADEON=m CONFIG_DRM_RADEON_USERPTR=y +# CONFIG_DRM_RCAR_DU is not set # CONFIG_DRM_RCAR_DW_HDMI is not set # CONFIG_DRM_RCAR_LVDS is not set # CONFIG_DRM_RCAR_MIPI_DSI is not set # CONFIG_DRM_RCAR_USE_LVDS is not set # CONFIG_DRM_RCAR_USE_MIPI_DSI is not set CONFIG_DRM_ROCKCHIP=m +# CONFIG_DRM_RZG2L_MIPI_DSI is not set CONFIG_DRM_SAMSUNG_DSIM=m +# CONFIG_DRM_SHMOBILE is not set # CONFIG_DRM_SII902X is not set CONFIG_DRM_SII9234=m # CONFIG_DRM_SIL_SII8620 is not set @@ -2101,7 +2154,7 @@ CONFIG_DRM_TEGRA_STAGING=y # CONFIG_DRM_THINE_THC63LVD1024 is not set CONFIG_DRM_TI_DLPC3433=m CONFIG_DRM_TIDSS=m -# CONFIG_DRM_TI_SN65DSI83 is not set +CONFIG_DRM_TI_SN65DSI83=m CONFIG_DRM_TI_SN65DSI86=m CONFIG_DRM_TI_TFP410=m CONFIG_DRM_TI_TPD12S015=m @@ -2269,6 +2322,7 @@ CONFIG_EDAC_LEGACY_SYSFS=y CONFIG_EDAC_QCOM=m CONFIG_EDAC_SYNOPSYS=m CONFIG_EDAC_THUNDERX=m +CONFIG_EDAC_VERSAL=m CONFIG_EDAC_XGENE=m CONFIG_EDAC=y CONFIG_EDAC_ZYNQMP=m @@ -2278,7 +2332,6 @@ CONFIG_EEPROM_AT24=m CONFIG_EEPROM_AT25=m CONFIG_EEPROM_EE1004=m CONFIG_EEPROM_IDT_89HPESX=m -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m CONFIG_EFI_ARMSTUB_DTB_LOADER=y # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -2409,7 +2462,7 @@ CONFIG_FAULT_INJECTION=y # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -2563,6 +2616,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y # CONFIG_FTWDT010_WATCHDOG is not set +CONFIG_FUEL_GAUGE_MM8013=m CONFIG_FUJITSU_ERRATUM_010001=y # CONFIG_FUJITSU_ES is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set @@ -2693,6 +2747,7 @@ CONFIG_GPIO_PCI_IDIO_16=m # CONFIG_GPIO_PISOSR is not set CONFIG_GPIO_PL061=y CONFIG_GPIO_RASPBERRYPI_EXP=m +CONFIG_GPIO_RCAR=m # CONFIG_GPIO_RDC321X is not set CONFIG_GPIO_ROCKCHIP=y # CONFIG_GPIO_SAMA5D2_PIOBU is not set @@ -2922,6 +2977,7 @@ CONFIG_HNS_ENET=m CONFIG_HOLTEK_FF=y # CONFIG_HOSTAP is not set CONFIG_HOTPLUG_CPU=y +CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA=m CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -3075,9 +3131,13 @@ CONFIG_I2C_PXA=m CONFIG_I2C_QCOM_CCI=m CONFIG_I2C_QCOM_GENI=m CONFIG_I2C_QUP=m +# CONFIG_I2C_RCAR is not set +# CONFIG_I2C_RIIC is not set CONFIG_I2C_RK3X=y # CONFIG_I2C_ROBOTFUZZ_OSIF is not set +# CONFIG_I2C_RZV2M is not set CONFIG_I2C_SCMI=m +# CONFIG_I2C_SH_MOBILE is not set CONFIG_I2C_SI470X=m # CONFIG_I2C_SI4713 is not set CONFIG_I2C_SIMTEC=m @@ -3119,6 +3179,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m CONFIG_IEEE802154_ADF7242=m # CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set @@ -3211,7 +3272,6 @@ CONFIG_IMA_NG_TEMPLATE=y CONFIG_IMA_READ_POLICY=y # CONFIG_IMA_SIG_TEMPLATE is not set # CONFIG_IMA_TEMPLATE is not set -# CONFIG_IMA_TRUSTED_KEYRING is not set CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -3420,6 +3480,7 @@ CONFIG_INTERCONNECT_QCOM_SC8280XP=m CONFIG_INTERCONNECT_QCOM_SDM845=m # CONFIG_INTERCONNECT_QCOM_SDX55 is not set # CONFIG_INTERCONNECT_QCOM_SDX65 is not set +CONFIG_INTERCONNECT_QCOM_SDX75=m # CONFIG_INTERCONNECT_QCOM_SM6350 is not set CONFIG_INTERCONNECT_QCOM_SM8150=m CONFIG_INTERCONNECT_QCOM_SM8250=m @@ -3482,8 +3543,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IPC_NS=y # CONFIG_IP_DCCP is not set -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP=m CONFIG_IP_FIB_TRIE_STATS=y CONFIG_IPMB_DEVICE_INTERFACE=m CONFIG_IPMI_DEVICE_INTERFACE=m @@ -3494,6 +3553,7 @@ CONFIG_IPMI_POWEROFF=m CONFIG_IPMI_SI=m CONFIG_IPMI_SSIF=m CONFIG_IPMI_WATCHDOG=m +# CONFIG_IPMMU_VMSA is not set CONFIG_IP_MROUTE_MULTIPLE_TABLES=y CONFIG_IP_MROUTE=y CONFIG_IP_MULTICAST=y @@ -3824,7 +3884,7 @@ CONFIG_KEYS_REQUEST_CACHE=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set # CONFIG_KFENCE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3859,6 +3919,7 @@ CONFIG_KUNIT=m CONFIG_KUNIT_TEST=m # CONFIG_KUNPENG_HCCS is not set CONFIG_KUSER_HELPERS=y +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_PROVE_MMU=y CONFIG_KVM_SMM=y # CONFIG_KVM_WERROR is not set @@ -3916,6 +3977,7 @@ CONFIG_LEDS_GPIO=m CONFIG_LEDS_GROUP_MULTICOLOR=m # CONFIG_LEDS_IS31FL319X is not set CONFIG_LEDS_IS31FL32XX=m +CONFIG_LEDS_KTD202X=m # CONFIG_LEDS_KTD2692 is not set # CONFIG_LEDS_LGM is not set CONFIG_LEDS_LM3530=m @@ -4043,6 +4105,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock" CONFIG_LSM_MMAP_MIN_ADDR=65535 CONFIG_LTC1660=m +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -4057,6 +4120,7 @@ CONFIG_LTO_NONE=y CONFIG_LTR501=m CONFIG_LTRF216A=m CONFIG_LV0104CS=m +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -4065,6 +4129,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y @@ -4137,6 +4202,7 @@ CONFIG_MB1232=m # CONFIG_MCORE2 is not set CONFIG_MCP320X=m CONFIG_MCP3422=m +# CONFIG_MCP3564 is not set CONFIG_MCP3911=m CONFIG_MCP4018=m CONFIG_MCP41010=m @@ -4147,6 +4213,7 @@ CONFIG_MCP4728=m # CONFIG_MCP4922 is not set CONFIG_MCTP_SERIAL=m # CONFIG_MCTP_TRANSPORT_I2C is not set +# CONFIG_MCTP_TRANSPORT_I3C is not set CONFIG_MCTP=y CONFIG_MD_AUTODETECT=y CONFIG_MD_BITMAP_FILE=y @@ -4168,7 +4235,7 @@ CONFIG_MDIO_I2C=m CONFIG_MDIO_IPQ8064=m # CONFIG_MDIO_MSCC_MIIM is not set CONFIG_MDIO_MVUSB=m -# CONFIG_MDIO_OCTEON is not set +CONFIG_MDIO_OCTEON=m # CONFIG_MDIO_SUN4I is not set CONFIG_MDIO_THUNDER=m CONFIG_MDIO_XGENE=m @@ -4182,6 +4249,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m CONFIG_MEDIA_ANALOG_TV_SUPPORT=y CONFIG_MEDIA_ATTACH=y @@ -4423,18 +4491,22 @@ CONFIG_MLX4_DEBUG=y CONFIG_MLX4_EN_DCB=y CONFIG_MLX4_EN=m CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_ACCEL=y CONFIG_MLX5_CLS_ACT=y CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y CONFIG_MLX5_EN_RXNFC=y CONFIG_MLX5_EN_TLS=y CONFIG_MLX5_ESWITCH=y -# CONFIG_MLX5_FPGA is not set +# CONFIG_MLX5_FPGA_IPSEC is not set +# CONFIG_MLX5_FPGA_TLS is not set +CONFIG_MLX5_FPGA=y CONFIG_MLX5_INFINIBAND=m CONFIG_MLX5_IPSEC=y CONFIG_MLX5_MACSEC=y @@ -4522,7 +4594,11 @@ CONFIG_MMC_SDHCI_PLTFM=m CONFIG_MMC_SDHCI_PXAV3=m CONFIG_MMC_SDHCI_TEGRA=m CONFIG_MMC_SDHCI_XENON=m +CONFIG_MMC_SDHI_INTERNAL_DMAC=m +CONFIG_MMC_SDHI=m +# CONFIG_MMC_SDHI_SYS_DMAC is not set CONFIG_MMC_SDRICOH_CS=m +# CONFIG_MMC_SH_MMCIF is not set CONFIG_MMC_SPI=m # CONFIG_MMC_STM32_SDMMC is not set CONFIG_MMC_SUNXI=m @@ -4556,6 +4632,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -4646,6 +4725,8 @@ CONFIG_MT7915E=m CONFIG_MT7921E=m CONFIG_MT7921S=m CONFIG_MT7921U=m +CONFIG_MT7925E=m +CONFIG_MT7925U=m CONFIG_MT7996E=m # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -4704,6 +4785,7 @@ CONFIG_MTD_NAND_NANDSIM=m # CONFIG_MTD_NAND_PL35X is not set # CONFIG_MTD_NAND_PLATFORM is not set # CONFIG_MTD_NAND_QCOM is not set +# CONFIG_MTD_NAND_RENESAS is not set # CONFIG_MTD_NAND_RICOH is not set # CONFIG_MTD_NAND_ROCKCHIP is not set # CONFIG_MTD_NAND_SUNXI is not set @@ -4771,7 +4853,6 @@ CONFIG_MWIFIEX_PCIE=m CONFIG_MWIFIEX_SDIO=m CONFIG_MWIFIEX_USB=m CONFIG_MWL8K=m -# CONFIG_MX3_IPU is not set CONFIG_MXC4005=m CONFIG_MXC6255=m # CONFIG_MXS_DMA is not set @@ -4825,9 +4906,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4900,12 +4978,12 @@ CONFIG_NETFILTER_EGRESS=y CONFIG_NETFILTER_INGRESS=y CONFIG_NETFILTER_NETLINK_ACCT=m # CONFIG_NETFILTER_NETLINK_GLUE_CT is not set -# CONFIG_NETFILTER_NETLINK_HOOK is not set +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_OSF=m CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_XTABLES_COMPAT=y +# CONFIG_NETFILTER_XTABLES_COMPAT is not set CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_CONNMARK=m CONFIG_NETFILTER_XT_MARK=m @@ -4998,6 +5076,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +CONFIG_NETKIT=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -5010,15 +5089,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NETROM=m CONFIG_NET_SB1000=y -CONFIG_NET_SCH_ATM=m CONFIG_NET_SCH_CAKE=m -CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m # CONFIG_NET_SCH_DEFAULT is not set CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -5052,6 +5128,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_NET_TEAM_MODE_RANDOM=m CONFIG_NET_TEAM_MODE_ROUNDROBIN=m +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -5189,7 +5266,7 @@ CONFIG_NFC_ST21NFCA=m # CONFIG_NFC_ST_NCI_I2C is not set # CONFIG_NFC_ST_NCI_SPI is not set CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NFC_TRF7970A=m @@ -5383,11 +5460,13 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y +CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m CONFIG_NVIDIA_SHIELD_FF=y # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set CONFIG_NVME_APPLE=m -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y CONFIG_NVME_HWMON=y CONFIG_NVMEM_APPLE_EFUSES=m # CONFIG_NVMEM_IMX_IIM is not set @@ -5423,7 +5502,9 @@ CONFIG_NVME_TARGET=m CONFIG_NVME_TARGET_PASSTHRU=y CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -5572,6 +5653,7 @@ CONFIG_PCI_AARDVARK=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_AL is not set @@ -5610,10 +5692,13 @@ CONFIG_PCIE_MOBIVEIL=y CONFIG_PCIEPORTBUS=y CONFIG_PCIE_PTM=y CONFIG_PCIE_QCOM=y +# CONFIG_PCIE_RCAR_GEN4_HOST is not set +# CONFIG_PCIE_RCAR_HOST is not set CONFIG_PCIE_ROCKCHIP_DW_HOST=y CONFIG_PCIE_ROCKCHIP_HOST=y CONFIG_PCIE_TEGRA194_HOST=y CONFIG_PCIE_XILINX_CPM=y +CONFIG_PCIE_XILINX_DMA_PL=y CONFIG_PCIE_XILINX_NWL=y CONFIG_PCIE_XILINX=y # CONFIG_PCI_FTPCI100 is not set @@ -5647,6 +5732,7 @@ CONFIG_PCI_XGENE_MSI=y CONFIG_PCI_XGENE=y CONFIG_PCI=y CONFIG_PCNET32=m +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -5696,7 +5782,7 @@ CONFIG_PHY_MESON_G12A_MIPI_DPHY_ANALOG=y CONFIG_PHY_MESON_G12A_USB2=y CONFIG_PHY_MESON_G12A_USB3_PCIE=m CONFIG_PHY_MESON_GXL_USB2=m -# CONFIG_PHY_MIXEL_LVDS_PHY is not set +CONFIG_PHY_MIXEL_LVDS_PHY=m CONFIG_PHY_MIXEL_MIPI_DPHY=m CONFIG_PHY_MVEBU_A3700_COMPHY=m CONFIG_PHY_MVEBU_A3700_UTMI=m @@ -5730,6 +5816,11 @@ CONFIG_PHY_QCOM_USB_HSIC=m CONFIG_PHY_QCOM_USB_HS=m CONFIG_PHY_QCOM_USB_SNPS_FEMTO_V2=m CONFIG_PHY_QCOM_USB_SS=m +# CONFIG_PHY_R8A779F0_ETHERNET_SERDES is not set +# CONFIG_PHY_RCAR_GEN2 is not set +# CONFIG_PHY_RCAR_GEN3_PCIE is not set +CONFIG_PHY_RCAR_GEN3_USB2=m +# CONFIG_PHY_RCAR_GEN3_USB3 is not set CONFIG_PHY_ROCKCHIP_DPHY_RX0=m CONFIG_PHY_ROCKCHIP_DP=m CONFIG_PHY_ROCKCHIP_EMMC=m @@ -5762,6 +5853,7 @@ CONFIG_PINCONF=y CONFIG_PINCTRL_ALDERLAKE=m CONFIG_PINCTRL_AMD=y CONFIG_PINCTRL_AMLOGIC_C3=y +CONFIG_PINCTRL_AMLOGIC_T7=y CONFIG_PINCTRL_APPLE_GPIO=m CONFIG_PINCTRL_AS3722=y CONFIG_PINCTRL_AXP209=m @@ -5882,12 +5974,13 @@ CONFIG_PINCTRL_SUN50I_H6=y # CONFIG_PINCTRL_SUN8I_A33 is not set # CONFIG_PINCTRL_SUN8I_A83T is not set # CONFIG_PINCTRL_SUN8I_A83T_R is not set -# CONFIG_PINCTRL_SUN8I_H3 is not set CONFIG_PINCTRL_SUN8I_H3_R=y +CONFIG_PINCTRL_SUN8I_H3=y # CONFIG_PINCTRL_SUN8I_V3S is not set # CONFIG_PINCTRL_SUN9I_A80 is not set # CONFIG_PINCTRL_SUN9I_A80_R is not set # CONFIG_PINCTRL_SX150X is not set +CONFIG_PINCTRL_TEGRA234=y CONFIG_PINCTRL=y CONFIG_PINCTRL_ZYNQMP=y # CONFIG_PING is not set @@ -5933,7 +6026,6 @@ CONFIG_POWERCAP=y CONFIG_POWER_MLXBF=m CONFIG_POWER_RESET_AS3722=y # CONFIG_POWER_RESET_BRCMKONA is not set -# CONFIG_POWER_RESET_BRCMSTB is not set CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_HISI=y @@ -6068,6 +6160,8 @@ CONFIG_PWM_MESON=m CONFIG_PWM_OMAP_DMTIMER=m CONFIG_PWM_PCA9685=m CONFIG_PWM_RASPBERRYPI_POE=m +# CONFIG_PWM_RCAR is not set +# CONFIG_PWM_RENESAS_TPU is not set CONFIG_PWM_ROCKCHIP=m CONFIG_PWM_STMPE=y CONFIG_PWM_SUN4I=m @@ -6134,6 +6228,8 @@ CONFIG_QCOM_Q6V5_WCSS=m CONFIG_QCOM_QDF2400_ERRATUM_0065=y CONFIG_QCOM_QFPROM=m CONFIG_QCOM_QMI_HELPERS=m +CONFIG_QCOM_QSEECOM_UEFISECAPP=y +CONFIG_QCOM_QSEECOM=y CONFIG_QCOM_RAMP_CTRL=m CONFIG_QCOM_RMTFS_MEM=m CONFIG_QCOM_RPMHPD=y @@ -6240,6 +6336,10 @@ CONFIG_RASPBERRYPI_POWER=y CONFIG_RATIONAL_KUNIT_TEST=m # CONFIG_RAVE_SP_CORE is not set # CONFIG_RBTREE_TEST is not set +# CONFIG_RCAR_DMAC is not set +# CONFIG_RCAR_GEN3_THERMAL is not set +# CONFIG_RCAR_REMOTEPROC is not set +# CONFIG_RCAR_THERMAL is not set CONFIG_RC_ATI_REMOTE=m CONFIG_RC_CORE=y CONFIG_RC_DECODERS=y @@ -6275,7 +6375,7 @@ CONFIG_RD_ZSTD=y # CONFIG_READABLE_ASM is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_REALTEK_AUTOPM=y -CONFIG_REALTEK_PHY=y +CONFIG_REALTEK_PHY=m # CONFIG_REED_SOLOMON_TEST is not set # CONFIG_REGMAP_BUILD is not set CONFIG_REGMAP_I2C=y @@ -6319,6 +6419,7 @@ CONFIG_REGULATOR_HI655X=m CONFIG_REGULATOR_MAX20411=m CONFIG_REGULATOR_MAX5970=m CONFIG_REGULATOR_MAX597X=m +CONFIG_REGULATOR_MAX77503=m CONFIG_REGULATOR_MAX77620=y CONFIG_REGULATOR_MAX77650=m CONFIG_REGULATOR_MAX77686=m @@ -6405,7 +6506,14 @@ CONFIG_RELOCATABLE=y # CONFIG_REMOTEPROC_CDEV is not set CONFIG_REMOTEPROC=y CONFIG_REMOTE_TARGET=m +# CONFIG_RENESAS_OSTM is not set # CONFIG_RENESAS_PHY is not set +# CONFIG_RENESAS_RPCIF is not set +# CONFIG_RENESAS_RZAWDT is not set +# CONFIG_RENESAS_RZG2LWDT is not set +# CONFIG_RENESAS_RZN1WDT is not set +# CONFIG_RENESAS_USB_DMAC is not set +# CONFIG_RENESAS_WDT is not set # CONFIG_RESET_ATTACK_MITIGATION is not set CONFIG_RESET_CONTROLLER=y CONFIG_RESET_HISI=y @@ -6416,6 +6524,7 @@ CONFIG_RESET_MESON=m CONFIG_RESET_QCOM_AOSS=y CONFIG_RESET_QCOM_PDC=m CONFIG_RESET_RASPBERRYPI=y +CONFIG_RESET_RZG2L_USBPHY_CTRL=m CONFIG_RESET_SCMI=y CONFIG_RESET_SIMPLE=y CONFIG_RESET_TI_SCI=m @@ -6472,6 +6581,7 @@ CONFIG_ROCKCHIP_VOP2=y CONFIG_ROCKCHIP_VOP=y CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set CONFIG_ROHM_BU27034=m CONFIG_ROMFS_BACKED_BY_BLOCK=y @@ -6531,7 +6641,6 @@ CONFIG_RTC_DRV_ARMADA38X=m CONFIG_RTC_DRV_AS3722=m CONFIG_RTC_DRV_BBNSM=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m CONFIG_RTC_DRV_CADENCE=m CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_CROS_EC=m @@ -6619,6 +6728,7 @@ CONFIG_RTC_DRV_RX8581=m CONFIG_RTC_DRV_S35390A=m # CONFIG_RTC_DRV_SA1100 is not set CONFIG_RTC_DRV_SD3078=m +# CONFIG_RTC_DRV_SH is not set CONFIG_RTC_DRV_SNVS=m CONFIG_RTC_DRV_STK17TA8=m CONFIG_RTC_DRV_SUN6I=y @@ -6694,6 +6804,10 @@ CONFIG_RV_REACT_PRINTK=y CONFIG_RV=y CONFIG_RXKAD=y # CONFIG_RXPERF is not set +# CONFIG_RZ_DMAC is not set +# CONFIG_RZG2L_ADC is not set +# CONFIG_RZG2L_THERMAL is not set +# CONFIG_RZ_MTU3 is not set CONFIG_S2IO=m # CONFIG_S390_KPROBES_SANITY_TEST is not set # CONFIG_S390_MODULES_SANITY_TEST is not set @@ -6714,6 +6828,7 @@ CONFIG_SATA_MV=m CONFIG_SATA_PMP=y # CONFIG_SATA_PROMISE is not set # CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_RCAR is not set CONFIG_SATA_SIL24=m # CONFIG_SATA_SIL is not set # CONFIG_SATA_SIS is not set @@ -6863,6 +6978,7 @@ CONFIG_SCSI_UFS_HISI=m CONFIG_SCSI_UFS_HPB=y CONFIG_SCSI_UFS_HWMON=y CONFIG_SCSI_UFS_QCOM=m +# CONFIG_SCSI_UFS_RENESAS is not set CONFIG_SCSI_UFS_TI_J721E=m CONFIG_SCSI_VIRTIO=m CONFIG_SCSI_WD719X=m @@ -6892,11 +7008,12 @@ CONFIG_SDM_VIDEOCC_845=m # CONFIG_SDX_GCC_75 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y # CONFIG_SECURITY_APPARMOR is not set -# CONFIG_SECURITY_DMESG_RESTRICT is not set +CONFIG_SECURITY_DMESG_RESTRICT=y CONFIG_SECURITYFS=y CONFIG_SECURITY_INFINIBAND=y CONFIG_SECURITY_LANDLOCK=y @@ -7038,6 +7155,7 @@ CONFIG_SENSORS_LTC2947_SPI=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set CONFIG_SENSORS_LTC2990=m +CONFIG_SENSORS_LTC2991=m # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m CONFIG_SENSORS_LTC4151=m @@ -7098,6 +7216,7 @@ CONFIG_SENSORS_PLI1209BC=m CONFIG_SENSORS_PLI1209BC_REGULATOR=y CONFIG_SENSORS_PM6764TR=m CONFIG_SENSORS_PMBUS=m +CONFIG_SENSORS_POWERZ=m CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -7171,6 +7290,7 @@ CONFIG_SERIAL_8250_CS=m CONFIG_SERIAL_8250_DFL=m CONFIG_SERIAL_8250_DMA=y CONFIG_SERIAL_8250_DW=y +# CONFIG_SERIAL_8250_EM is not set CONFIG_SERIAL_8250_EXAR=m CONFIG_SERIAL_8250_EXTENDED=y # CONFIG_SERIAL_8250_FINTEK is not set @@ -7233,6 +7353,11 @@ CONFIG_SERIAL_SC16IS7XX_I2C=y CONFIG_SERIAL_SC16IS7XX=m CONFIG_SERIAL_SC16IS7XX_SPI=y # CONFIG_SERIAL_SCCNXP is not set +CONFIG_SERIAL_SH_SCI_CONSOLE=y +CONFIG_SERIAL_SH_SCI_DMA=y +CONFIG_SERIAL_SH_SCI_EARLYCON=y +CONFIG_SERIAL_SH_SCI_NR_UARTS=18 +CONFIG_SERIAL_SH_SCI=y # CONFIG_SERIAL_SIFIVE is not set # CONFIG_SERIAL_SPRD is not set # CONFIG_SERIAL_ST_ASC is not set @@ -7299,7 +7424,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -7313,12 +7437,14 @@ CONFIG_SMC91X=m # CONFIG_SM_CAMCC_6350 is not set # CONFIG_SM_CAMCC_8250 is not set # CONFIG_SM_CAMCC_8450 is not set +# CONFIG_SM_CAMCC_8550 is not set CONFIG_SMC_DIAG=m CONFIG_SMC=m # CONFIG_SM_DISPCC_8250 is not set CONFIG_SM_DISPCC_8450=m # CONFIG_SM_DISPCC_8550 is not set # CONFIG_SM_FTL is not set +# CONFIG_SM_GCC_4450 is not set # CONFIG_SM_GCC_6115 is not set # CONFIG_SM_GCC_6125 is not set # CONFIG_SM_GCC_6350 is not set @@ -7353,7 +7479,7 @@ CONFIG_SMS_USB_DRV=m # CONFIG_SM_TCSRCC_8550 is not set # CONFIG_SM_VIDEOCC_8150 is not set # CONFIG_SM_VIDEOCC_8250 is not set -# CONFIG_SM_VIDEOCC_8350 is not set +CONFIG_SM_VIDEOCC_8350=m # CONFIG_SM_VIDEOCC_8450 is not set # CONFIG_SM_VIDEOCC_8550 is not set CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0 @@ -7422,6 +7548,7 @@ CONFIG_SND_FM801=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -7575,8 +7702,10 @@ CONFIG_SND_SOC_APQ8016_SBC=m CONFIG_SND_SOC_ARNDALE=m CONFIG_SND_SOC_AUDIO_IIO_AUX=m CONFIG_SND_SOC_AW8738=m +CONFIG_SND_SOC_AW87390=m CONFIG_SND_SOC_AW88261=m CONFIG_SND_SOC_AW88395=m +CONFIG_SND_SOC_AW88399=m CONFIG_SND_SOC_BD28623=m CONFIG_SND_SOC_BT_SCO=m CONFIG_SND_SOC_CHV3_CODEC=m @@ -7782,6 +7911,7 @@ CONFIG_SND_SOC_PCM512x_SPI=m # CONFIG_SND_SOC_PEB2466 is not set CONFIG_SND_SOC_QCOM=m CONFIG_SND_SOC_QDSP6=m +# CONFIG_SND_SOC_RCAR is not set CONFIG_SND_SOC_RK3288_HDMI_ANALOG=m CONFIG_SND_SOC_RK3328=m CONFIG_SND_SOC_RK3399_GRU_SOUND=m @@ -7816,6 +7946,8 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m CONFIG_SND_SOC_RT715_SDW=m CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +CONFIG_SND_SOC_RTQ9128=m +# CONFIG_SND_SOC_RZ is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811=m @@ -7827,6 +7959,7 @@ CONFIG_SND_SOC_SC8280XP=m CONFIG_SND_SOC_SDM845=m # CONFIG_SND_SOC_SDW_MOCKUP is not set CONFIG_SND_SOC_SGTL5000=m +# CONFIG_SND_SOC_SH4_FSI is not set CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m CONFIG_SND_SOC_SIMPLE_MUX=m # CONFIG_SND_SOC_SM8250 is not set @@ -8120,8 +8253,12 @@ CONFIG_SPI_QCOM_GENI=m CONFIG_SPI_QCOM_QSPI=m CONFIG_SPI_QUP=m CONFIG_SPI_ROCKCHIP=m -# CONFIG_SPI_ROCKCHIP_SFC is not set +CONFIG_SPI_ROCKCHIP_SFC=m +# CONFIG_SPI_RSPI is not set +# CONFIG_SPI_RZV2M_CSI is not set # CONFIG_SPI_SC18IS602 is not set +# CONFIG_SPI_SH_HSPI is not set +# CONFIG_SPI_SH_MSIOF is not set # CONFIG_SPI_SIFIVE is not set # CONFIG_SPI_SLAVE is not set CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m @@ -8354,6 +8491,7 @@ CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX_DEBUG is not set CONFIG_TCM_QLA2XXX=m CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -8430,6 +8568,7 @@ CONFIG_TEST_LOCKUP=m # CONFIG_TEST_MEMINIT is not set CONFIG_TEST_MIN_HEAP=m # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -8502,7 +8641,7 @@ CONFIG_TIFM_7XX1=m CONFIG_TIFM_CORE=m CONFIG_TIGON3_HWMON=y CONFIG_TIGON3=m -# CONFIG_TI_ICSSG_PRUETH is not set +CONFIG_TI_ICSSG_PRUETH=m CONFIG_TI_ICSS_IEP=m CONFIG_TI_K3_AM65_CPSW_NUSS=m CONFIG_TI_K3_AM65_CPSW_SWITCHDEV=y @@ -8717,6 +8856,7 @@ CONFIG_TYPEC_MUX_FSA4480=m CONFIG_TYPEC_MUX_GPIO_SBU=m CONFIG_TYPEC_MUX_NB7VPQ904M=m CONFIG_TYPEC_MUX_PI3USB30532=m +CONFIG_TYPEC_MUX_PTN36502=m CONFIG_TYPEC_NVIDIA_ALTMODE=m CONFIG_TYPEC_QCOM_PMIC=m # CONFIG_TYPEC_RT1711H is not set @@ -8728,7 +8868,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m CONFIG_TYPEC_TCPM=m CONFIG_TYPEC_TPS6598X=m CONFIG_TYPEC_UCSI=m -CONFIG_TYPEC_WCOVE=m CONFIG_TYPEC_WUSB3801=m CONFIG_TYPHOON=m CONFIG_UACCE=m @@ -8821,6 +8960,7 @@ CONFIG_USB_CHIPIDEA_HOST=y CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_PCI=m CONFIG_USB_CHIPIDEA_TEGRA=m CONFIG_USB_CHIPIDEA_UDC=y @@ -8885,6 +9025,7 @@ CONFIG_USB_EHCI_TT_NEWSCHED=y # CONFIG_USB_EHSET_TEST_FIXTURE is not set CONFIG_USB_EMI26=m CONFIG_USB_EMI62=m +# CONFIG_USB_EMXX is not set CONFIG_USB_EPSON2888=y # CONFIG_USB_ETH is not set CONFIG_USB_EZUSB_FX2=m @@ -8997,6 +9138,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_M66592 is not set CONFIG_USB_MA901=m @@ -9057,6 +9199,7 @@ CONFIG_USB_OTG_FSM=m # CONFIG_USB_OTG_PRODUCTLIST is not set CONFIG_USB_OTG=y # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y CONFIG_USBPCWATCHDOG=m CONFIG_USB_PEGASUS=m @@ -9073,6 +9216,9 @@ CONFIG_USB_QCOM_EUD=m CONFIG_USB_RAINSHADOW_CEC=m # CONFIG_USB_RAREMONO is not set CONFIG_USB_RAW_GADGET=m +# CONFIG_USB_RENESAS_USB3 is not set +# CONFIG_USB_RENESAS_USBF is not set +# CONFIG_USB_RENESAS_USBHS is not set CONFIG_USB_ROLE_SWITCH=y CONFIG_USB_RTL8150=m CONFIG_USB_RTL8152=m @@ -9184,6 +9330,7 @@ CONFIG_USB_XHCI_MVEBU=m CONFIG_USB_XHCI_PCI_RENESAS=y CONFIG_USB_XHCI_PCI=y CONFIG_USB_XHCI_PLATFORM=m +CONFIG_USB_XHCI_RCAR=m CONFIG_USB_XHCI_TEGRA=m CONFIG_USB_XUSBATM=m CONFIG_USB=y @@ -9354,7 +9501,7 @@ CONFIG_VIDEO_IMX8_ISI=m CONFIG_VIDEO_IMX8_ISI_M2M=y CONFIG_VIDEO_IMX8_JPEG=m CONFIG_VIDEO_IMX8MQ_MIPI_CSI2=m -CONFIG_VIDEO_IMX_MEDIA=m +# CONFIG_VIDEO_IMX_MEDIA is not set CONFIG_VIDEO_IMX_MIPI_CSIS=m CONFIG_VIDEO_IMX_PXP=m # CONFIG_VIDEO_IPU3_CIO2 is not set @@ -9371,10 +9518,12 @@ CONFIG_VIDEO_MAX9286=m # CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set CONFIG_VIDEO_MESON_GE2D=m CONFIG_VIDEO_MESON_VDEC=m +# CONFIG_VIDEO_MGB4 is not set CONFIG_VIDEO_ML86V7667=m CONFIG_VIDEO_MSP3400=m CONFIG_VIDEO_MT9M001=m # CONFIG_VIDEO_MT9M111 is not set +CONFIG_VIDEO_MT9M114=m CONFIG_VIDEO_MT9P031=m CONFIG_VIDEO_MT9T112=m CONFIG_VIDEO_MT9V011=m @@ -9422,12 +9571,19 @@ CONFIG_VIDEO_PVRUSB2=m CONFIG_VIDEO_PVRUSB2_SYSFS=y CONFIG_VIDEO_QCOM_CAMSS=m CONFIG_VIDEO_QCOM_VENUS=m +# CONFIG_VIDEO_RCAR_CSI2 is not set +# CONFIG_VIDEO_RCAR_ISP is not set +# CONFIG_VIDEO_RCAR_VIN is not set CONFIG_VIDEO_RDACM20=m # CONFIG_VIDEO_RDACM21 is not set +# CONFIG_VIDEO_RENESAS_FCP is not set +# CONFIG_VIDEO_RENESAS_JPU is not set CONFIG_VIDEO_RJ54N1=m CONFIG_VIDEO_ROCKCHIP_ISP1=m CONFIG_VIDEO_ROCKCHIP_RGA=m CONFIG_VIDEO_ROCKCHIP_VDEC=m +# CONFIG_VIDEO_RZG2L_CRU is not set +# CONFIG_VIDEO_RZG2L_CSI2 is not set CONFIG_VIDEO_S5C73M3=m CONFIG_VIDEO_S5K4ECGX=m CONFIG_VIDEO_S5K5BAF=m @@ -9478,6 +9634,7 @@ CONFIG_VIDEO_THS7303=m CONFIG_VIDEO_THS8200=m CONFIG_VIDEO_TI_CAL=m CONFIG_VIDEO_TI_CAL_MC=y +CONFIG_VIDEO_TI_J721E_CSI2RX=m CONFIG_VIDEO_TLV320AIC23B=m CONFIG_VIDEO_TM6000_ALSA=m CONFIG_VIDEO_TM6000_DVB=m @@ -9683,6 +9840,7 @@ CONFIG_XDP_SOCKETS=y # CONFIG_XEN_GRANT_DMA_ALLOC is not set # CONFIG_XEN is not set CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512 +CONFIG_XEN_PRIVCMD_EVENTFD=y CONFIG_XEN_PRIVCMD_IRQFD=y CONFIG_XEN_PRIVCMD=m # CONFIG_XEN_PVCALLS_FRONTEND is not set @@ -9801,19 +9959,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-aarch64-debug-rhel.config b/SOURCES/kernel-aarch64-debug-rhel.config index edb7db5..ebbe575 100644 --- a/SOURCES/kernel-aarch64-debug-rhel.config +++ b/SOURCES/kernel-aarch64-debug-rhel.config @@ -220,6 +220,7 @@ CONFIG_AMD_PMC=m # CONFIG_AMD_XGBE_DCB is not set CONFIG_AMD_XGBE=m # CONFIG_AMIGA_PARTITION is not set +CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y # CONFIG_AMT is not set # CONFIG_ANDROID_BINDER_IPC is not set @@ -261,6 +262,7 @@ CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8 CONFIG_ARCH_MXC=y # CONFIG_ARCH_NPCM is not set CONFIG_ARCH_NXP=y +CONFIG_ARCH_PENSANDO=y CONFIG_ARCH_QCOM=y CONFIG_ARCH_RANDOM=y # CONFIG_ARCH_REALTEK is not set @@ -324,6 +326,7 @@ CONFIG_ARM64_ERRATUM_2457168=y CONFIG_ARM64_ERRATUM_2645198=y CONFIG_ARM64_ERRATUM_2658417=y CONFIG_ARM64_ERRATUM_2966298=y +CONFIG_ARM64_ERRATUM_3117295=y CONFIG_ARM64_ERRATUM_819472=y CONFIG_ARM64_ERRATUM_824069=y CONFIG_ARM64_ERRATUM_826319=y @@ -375,6 +378,7 @@ CONFIG_ARM_PMU=y # CONFIG_ARM_QCOM_CPUFREQ_HW is not set CONFIG_ARM_SBSA_WATCHDOG=m CONFIG_ARM_SCMI_CPUFREQ=m +CONFIG_ARM_SCMI_PERF_DOMAIN=y # CONFIG_ARM_SCMI_POWER_CONTROL is not set CONFIG_ARM_SCMI_POWER_DOMAIN=m CONFIG_ARM_SCMI_PROTOCOL=y @@ -407,6 +411,7 @@ CONFIG_ARM_TI_CPUFREQ=y CONFIG_ASN1=y # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -520,6 +525,7 @@ CONFIG_BASE_FULL=y # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -655,7 +661,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_BRCM_TRACING=y # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -748,7 +753,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -828,6 +832,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -909,6 +914,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CLEANCACHE=y CONFIG_CLK_BCM_NS2=y CONFIG_CLK_BCM_SR=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -1001,7 +1007,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1085,7 +1090,6 @@ CONFIG_CRYPTO_AES_ARM64_CE_CCM=y CONFIG_CRYPTO_AES_ARM64_CE=y CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y CONFIG_CRYPTO_AES_ARM64=y -CONFIG_CRYPTO_AES_GCM_P10=y # CONFIG_CRYPTO_AES_TI is not set CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_ANSI_CPRNG=m @@ -1102,7 +1106,6 @@ CONFIG_CRYPTO_CCM=y CONFIG_CRYPTO_CFB=y CONFIG_CRYPTO_CHACHA20=m CONFIG_CRYPTO_CHACHA20_NEON=y -# CONFIG_CRYPTO_CHACHA20_P10 is not set CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CMAC=y # CONFIG_CRYPTO_CRC32C_VPMSUM is not set @@ -1189,6 +1192,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1213,7 +1221,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_POLY1305=m CONFIG_CRYPTO_POLY1305_NEON=y -# CONFIG_CRYPTO_POLY1305_P10 is not set # CONFIG_CRYPTO_POLYVAL_ARM64_CE is not set # CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set CONFIG_CRYPTO_RMD160=m @@ -1485,6 +1492,7 @@ CONFIG_DPAA2_CONSOLE=m # CONFIG_DPOT_DAC is not set # CONFIG_DPS310 is not set # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1557,6 +1565,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set CONFIG_DRM_IMX_DCSS=m # CONFIG_DRM_IMX_LCDC is not set # CONFIG_DRM_IMX_LCDIF is not set @@ -1586,38 +1595,92 @@ CONFIG_DRM_NOUVEAU=m # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LG_LG4573 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set # CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1637,7 +1700,8 @@ CONFIG_DRM_RADEON_USERPTR=y # CONFIG_DRM_SIMPLE_BRIDGE is not set CONFIG_DRM_SIMPLEDRM=y # CONFIG_DRM_SSD130X is not set -# CONFIG_DRM_TEGRA is not set +# CONFIG_DRM_TEGRA_DEBUG is not set +CONFIG_DRM_TEGRA=m # CONFIG_DRM_THINE_THC63LVD1024 is not set # CONFIG_DRM_TI_DLPC3433 is not set # CONFIG_DRM_TIDSS is not set @@ -1805,7 +1869,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_EEPROM_AT25 is not set CONFIG_EEPROM_EE1004=m # CONFIG_EEPROM_IDT_89HPESX is not set -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1839,7 +1902,12 @@ CONFIG_ENIC=m # CONFIG_EPIC100 is not set CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y CONFIG_ETHOC=m CONFIG_ETHTOOL_NETLINK=y @@ -1912,7 +1980,7 @@ CONFIG_FAULT_INJECTION=y # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -2029,7 +2097,9 @@ CONFIG_FSL_PQ_MDIO=m # CONFIG_FSL_RCPM is not set CONFIG_FSL_XGMAC_MDIO=m CONFIG_FSNOTIFY=y -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_RECORD_RECURSION is not set @@ -2037,6 +2107,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +# CONFIG_FUEL_GAUGE_MM8013 is not set CONFIG_FUJITSU_ERRATUM_010001=y # CONFIG_FUJITSU_ES is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set @@ -2189,6 +2260,7 @@ CONFIG_GPIO_XLP=m # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set CONFIG_GUP_TEST=y CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2372,6 +2444,7 @@ CONFIG_HNS_ENET=m CONFIG_HNS=m # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2535,6 +2608,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set # CONFIG_IAQCORE is not set CONFIG_IAVF=m # CONFIG_IB700_WDT is not set @@ -2548,6 +2622,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set # CONFIG_IEEE802154_AT86RF230 is not set @@ -2617,7 +2692,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10 CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2751,6 +2825,7 @@ CONFIG_INPUT_SPARSEKMAP=m CONFIG_INPUT_UINPUT=m CONFIG_INPUT=y # CONFIG_INPUT_YEALINK is not set +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_INTEGRITY_AUDIT=y @@ -2793,6 +2868,7 @@ CONFIG_INTEL_SDSI=m # CONFIG_INTEL_SOC_PMIC_CHTWC is not set # CONFIG_INTEL_SOC_PMIC is not set # CONFIG_INTEL_TCC_COOLING is not set +# CONFIG_INTEL_TDX_HOST is not set # CONFIG_INTEL_TH is not set CONFIG_INTEL_UNCORE_FREQ_CONTROL=m # CONFIG_INTEL_VSC is not set @@ -2820,7 +2896,8 @@ CONFIG_IOMMU_DEBUGFS=y CONFIG_IOMMU_DEFAULT_DMA_LAZY=y # CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -3122,7 +3199,7 @@ CONFIG_KEY_NOTIFICATIONS=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set # CONFIG_KFENCE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3152,6 +3229,7 @@ CONFIG_KUNIT_TEST=m CONFIG_KVM_AMD_SEV=y # CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set # CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_PROVE_MMU=y CONFIG_KVM_SMM=y # CONFIG_KVM_WERROR is not set @@ -3319,6 +3397,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3333,6 +3412,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3341,6 +3421,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m # CONFIG_MAC80211_MESH is not set @@ -3407,6 +3488,7 @@ CONFIG_MAX_SKB_FRAGS=17 # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3444,6 +3526,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3510,7 +3593,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3651,6 +3734,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3767,6 +3851,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3839,6 +3926,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3916,7 +4005,6 @@ CONFIG_MWIFIEX_PCIE=m CONFIG_MWIFIEX_SDIO=m CONFIG_MWIFIEX_USB=m # CONFIG_MWL8K is not set -# CONFIG_MX3_IPU is not set # CONFIG_MXC4005 is not set # CONFIG_MXC6255 is not set # CONFIG_MXS_DMA is not set @@ -3961,9 +4049,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4085,6 +4170,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -4097,15 +4183,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -4134,6 +4217,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -4238,7 +4322,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4436,9 +4520,11 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y +CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_BCM_OCOTP is not set # CONFIG_NVMEM_IMX_IIM is not set @@ -4465,7 +4551,9 @@ CONFIG_NVME_TARGET=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -4585,6 +4673,7 @@ CONFIG_PCC=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_AL is not set @@ -4650,6 +4739,7 @@ CONFIG_PCI_XGENE_MSI=y CONFIG_PCI_XGENE=y CONFIG_PCI=y # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4729,6 +4819,7 @@ CONFIG_PINCTRL_ALDERLAKE=m # CONFIG_PINCTRL_AMD is not set # CONFIG_PINCTRL_BROXTON is not set # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_ELKHARTLAKE=m CONFIG_PINCTRL_EMMITSBURG=m @@ -4817,7 +4908,6 @@ CONFIG_POSIX_TIMERS=y CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_HISI=y @@ -4974,6 +5064,7 @@ CONFIG_QCOM_L3_PMU=y # CONFIG_QCOM_PDC is not set CONFIG_QCOM_QDF2400_ERRATUM_0065=y # CONFIG_QCOM_QFPROM is not set +# CONFIG_QCOM_QSEECOM is not set # CONFIG_QCOM_RAMP_CTRL is not set # CONFIG_QCOM_RMTFS_MEM is not set # CONFIG_QCOM_RPMH is not set @@ -5002,7 +5093,7 @@ CONFIG_QLA3XXX=m # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set +CONFIG_QORIQ_THERMAL=m CONFIG_QRTR=m CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -5110,6 +5201,7 @@ CONFIG_REGULATOR_GPIO=y # CONFIG_REGULATOR_MAX1586 is not set # CONFIG_REGULATOR_MAX20086 is not set # CONFIG_REGULATOR_MAX20411 is not set +# CONFIG_REGULATOR_MAX77503 is not set CONFIG_REGULATOR_MAX77620=y CONFIG_REGULATOR_MAX77686=m # CONFIG_REGULATOR_MAX77826 is not set @@ -5214,6 +5306,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -5254,7 +5347,6 @@ CONFIG_RTC_DRV_ABB5ZES3=m CONFIG_RTC_DRV_ABX80X=m CONFIG_RTC_DRV_BBNSM=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5544,6 +5636,7 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5699,6 +5792,7 @@ CONFIG_SENSORS_LTC2945=m # CONFIG_SENSORS_LTC2978 is not set # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m # CONFIG_SENSORS_LTC4151 is not set @@ -5757,6 +5851,7 @@ CONFIG_SENSORS_NTC_THERMISTOR=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set # CONFIG_SENSORS_PMBUS is not set +# CONFIG_SENSORS_POWERZ is not set CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -5933,7 +6028,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -6017,6 +6111,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -6154,8 +6249,10 @@ CONFIG_SND_SEQ_UMP=y # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -6246,6 +6343,7 @@ CONFIG_SND_SOC_CX2072X=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -6360,12 +6458,6 @@ CONFIG_SND_SOC_MAX98927=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set # CONFIG_SND_SOC_RL6231 is not set -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -6388,6 +6480,7 @@ CONFIG_SND_SOC_RT1318_SDW=m # CONFIG_SND_SOC_RT715_SDW is not set CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6584,7 +6677,6 @@ CONFIG_SND_VX222=m # CONFIG_SND_XEN_FRONTEND is not set # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set CONFIG_SOC_IMX8M=y CONFIG_SOC_IMX9=m # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set @@ -6815,6 +6907,7 @@ CONFIG_TCM_IBLOCK=m CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6881,6 +6974,7 @@ CONFIG_TEST_LIST_SORT=m # CONFIG_TEST_MEMINIT is not set CONFIG_TEST_MIN_HEAP=m # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -7112,6 +7206,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -7190,6 +7285,7 @@ CONFIG_USB_CHIPIDEA_HOST=y CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_TEGRA=m CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CONN_GPIO=m @@ -7293,6 +7389,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -7336,6 +7433,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -7486,7 +7584,10 @@ CONFIG_VEXPRESS_CONFIG=y # CONFIG_VF610_DAC is not set CONFIG_VFAT_FS=m # CONFIG_VFIO_AMBA is not set +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y CONFIG_VFIO_FSL_MC=m +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO=m # CONFIG_VFIO_MDEV is not set @@ -7601,11 +7702,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -7940,19 +8043,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-aarch64-fedora.config b/SOURCES/kernel-aarch64-fedora.config index 2ae7f79..1bb5dec 100644 --- a/SOURCES/kernel-aarch64-fedora.config +++ b/SOURCES/kernel-aarch64-fedora.config @@ -254,6 +254,7 @@ CONFIG_AMD_XGBE_DCB=y CONFIG_AMD_XGBE=m # CONFIG_AMIGA_PARTITION is not set CONFIG_AMLOGIC_THERMAL=m +CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y CONFIG_AMT=m CONFIG_ANDROID_BINDER_DEVICES="binder,hwbinder,vndbinder" @@ -329,12 +330,33 @@ CONFIG_ARCH_MXC=y CONFIG_ARCH_NR_GPIO=2048 CONFIG_ARCH_NXP=y # CONFIG_ARCH_OMAP1 is not set +# CONFIG_ARCH_PENSANDO is not set # CONFIG_ARCH_PXA is not set CONFIG_ARCH_QCOM=y +CONFIG_ARCH_R8A774A1=y +# CONFIG_ARCH_R8A774B1 is not set +# CONFIG_ARCH_R8A774C0 is not set +# CONFIG_ARCH_R8A774E1 is not set +# CONFIG_ARCH_R8A77951 is not set +# CONFIG_ARCH_R8A77960 is not set +# CONFIG_ARCH_R8A77961 is not set +# CONFIG_ARCH_R8A77965 is not set +# CONFIG_ARCH_R8A77970 is not set +# CONFIG_ARCH_R8A77980 is not set +# CONFIG_ARCH_R8A77990 is not set +# CONFIG_ARCH_R8A77995 is not set +# CONFIG_ARCH_R8A779A0 is not set +# CONFIG_ARCH_R8A779F0 is not set +# CONFIG_ARCH_R8A779G0 is not set +CONFIG_ARCH_R9A07G043=y +CONFIG_ARCH_R9A07G044=y +CONFIG_ARCH_R9A07G054=y +# CONFIG_ARCH_R9A08G045 is not set +# CONFIG_ARCH_R9A09G011 is not set CONFIG_ARCH_RANDOM=y # CONFIG_ARCH_RDA is not set # CONFIG_ARCH_REALTEK is not set -# CONFIG_ARCH_RENESAS is not set +CONFIG_ARCH_RENESAS=y CONFIG_ARCH_ROCKCHIP=y # CONFIG_ARCH_S32 is not set # CONFIG_ARCH_SA1100 is not set @@ -396,6 +418,7 @@ CONFIG_ARM64_ERRATUM_2457168=y CONFIG_ARM64_ERRATUM_2645198=y CONFIG_ARM64_ERRATUM_2658417=y CONFIG_ARM64_ERRATUM_2966298=y +CONFIG_ARM64_ERRATUM_3117295=y CONFIG_ARM64_ERRATUM_819472=y CONFIG_ARM64_ERRATUM_824069=y CONFIG_ARM64_ERRATUM_826319=y @@ -484,6 +507,7 @@ CONFIG_ARM_RASPBERRYPI_CPUFREQ=m CONFIG_ARM_RK3399_DMC_DEVFREQ=m CONFIG_ARM_SBSA_WATCHDOG=m CONFIG_ARM_SCMI_CPUFREQ=m +CONFIG_ARM_SCMI_PERF_DOMAIN=y CONFIG_ARM_SCMI_POWERCAP=m CONFIG_ARM_SCMI_POWER_CONTROL=m CONFIG_ARM_SCMI_POWER_DOMAIN=m @@ -531,7 +555,7 @@ CONFIG_ATA_ACPI=y CONFIG_ATA_BMDMA=y CONFIG_ATA_FORCE=y CONFIG_ATA_GENERIC=m -# CONFIG_ATALK is not set +CONFIG_ATALK=m CONFIG_ATA_OVER_ETH=m CONFIG_ATA_PIIX=y # CONFIG_ATARI_PARTITION is not set @@ -702,6 +726,7 @@ CONFIG_BATTERY_GAUGE_LTC2941=m CONFIG_BATTERY_MAX17040=m CONFIG_BATTERY_MAX17042=m # CONFIG_BATTERY_MAX1721X is not set +# CONFIG_BATTERY_PM8916_BMS_VM is not set CONFIG_BATTERY_QCOM_BATTMGR=m CONFIG_BATTERY_RT5033=m CONFIG_BATTERY_SAMSUNG_SDI=y @@ -715,6 +740,15 @@ CONFIG_BAYCOM_SER_HDX=m # CONFIG_BCACHE_ASYNC_REGISTRATION is not set # CONFIG_BCACHE_CLOSURES_DEBUG is not set # CONFIG_BCACHE_DEBUG is not set +# CONFIG_BCACHEFS_DEBUG is not set +# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set +# CONFIG_BCACHEFS_ERASURE_CODING is not set +CONFIG_BCACHEFS_FS=m +# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set +# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set +CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_BCACHEFS_QUOTA=y +# CONFIG_BCACHEFS_TESTS is not set CONFIG_BCACHE=m CONFIG_BCM2711_THERMAL=m CONFIG_BCM2835_MBOX=y @@ -867,7 +901,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_BRCMSTB_L2_IRQ=y # CONFIG_BRCM_TRACING is not set CONFIG_BRCMUTIL=m @@ -980,7 +1013,6 @@ CONFIG_CADENCE_WATCHDOG=m # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -CONFIG_CAN_BXCAN=m CONFIG_CAN_CALC_BITTIMING=y CONFIG_CAN_CAN327=m # CONFIG_CAN_CC770 is not set @@ -1017,6 +1049,8 @@ CONFIG_CAN_NETLINK=y CONFIG_CAN_PEAK_PCIEFD=m CONFIG_CAN_PEAK_USB=m CONFIG_CAN_RAW=m +# CONFIG_CAN_RCAR_CANFD is not set +# CONFIG_CAN_RCAR is not set # CONFIG_CAN_SJA1000 is not set CONFIG_CAN_SLCAN=m # CONFIG_CAN_SOFTING is not set @@ -1075,6 +1109,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFI_CLANG is not set CONFIG_CFS_BANDWIDTH=y @@ -1117,6 +1152,7 @@ CONFIG_CHARGER_MAX77650=m CONFIG_CHARGER_MAX77976=m # CONFIG_CHARGER_MAX8903 is not set CONFIG_CHARGER_MT6370=m +# CONFIG_CHARGER_PM8916_LBC is not set # CONFIG_CHARGER_QCOM_SMB2 is not set CONFIG_CHARGER_QCOM_SMBB=m CONFIG_CHARGER_RK817=m @@ -1168,6 +1204,7 @@ CONFIG_CIO2_BRIDGE=y CONFIG_CLEANCACHE=y CONFIG_CLK_BCM2711_DVP=m CONFIG_CLK_BCM2835=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set CONFIG_CLK_ICST=y @@ -1183,6 +1220,7 @@ CONFIG_CLK_LS1028A_PLLDIG=y CONFIG_CLK_PX30=y CONFIG_CLK_QORIQ=y CONFIG_CLK_RASPBERRYPI=y +# CONFIG_CLK_RCAR_USB2_CLOCK_SEL is not set CONFIG_CLK_RK3036=y CONFIG_CLK_RK312X=y CONFIG_CLK_RK3188=y @@ -1268,6 +1306,8 @@ CONFIG_COMMON_CLK_QCOM=y CONFIG_COMMON_CLK_RK808=m CONFIG_COMMON_CLK_ROCKCHIP=y CONFIG_COMMON_CLK_RS9_PCIE=m +CONFIG_COMMON_CLK_S4_PERIPHERALS=y +CONFIG_COMMON_CLK_S4_PLL=y CONFIG_COMMON_CLK_SCMI=y CONFIG_COMMON_CLK_SCPI=m # CONFIG_COMMON_CLK_SI514 is not set @@ -1299,7 +1339,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1387,6 +1426,7 @@ CONFIG_CROS_EC_UART=m CONFIG_CROS_EC_VBC=m CONFIG_CROS_HPS_I2C=m CONFIG_CROS_KBD_LED_BACKLIGHT=m +CONFIG_CROS_KUNIT_EC_PROTO_TEST=m CONFIG_CROS_KUNIT=m CONFIG_CROSS_MEMORY_ATTACH=y CONFIG_CROS_TYPEC_SWITCH=m @@ -1533,6 +1573,11 @@ CONFIG_CRYPTO_GHASH=y CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1644,6 +1689,7 @@ CONFIG_DE2104X=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_CGROUP_REF is not set +# CONFIG_DEBUG_CLOSURES is not set # CONFIG_DEBUG_CREDENTIALS is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_DRIVER is not set @@ -1732,7 +1778,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=32768 CONFIG_DEFAULT_SECURITY_SELINUX=y # CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set # CONFIG_DETECT_HUNG_TASK is not set -# CONFIG_DEV_APPLETALK is not set CONFIG_DEV_DAX_CXL=m CONFIG_DEV_DAX_HMEM=m CONFIG_DEV_DAX_KMEM=m @@ -1844,6 +1889,7 @@ CONFIG_DPOT_DAC=m # CONFIG_DPS310 is not set CONFIG_DRAGONRISE_FF=y # CONFIG_DRBD_FAULT_INJECTION is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m CONFIG_DRM_ACCEL_QAIC=m CONFIG_DRM_ACCEL=y CONFIG_DRM_AMD_ACP=y @@ -1927,6 +1973,7 @@ CONFIG_DRM_IMX8QXP_LDB=m CONFIG_DRM_IMX8QXP_PIXEL_COMBINER=m CONFIG_DRM_IMX8QXP_PIXEL_LINK=m CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI=m +CONFIG_DRM_IMX93_MIPI_DSI=m CONFIG_DRM_IMX_DCSS=m CONFIG_DRM_IMX_LCDC=m CONFIG_DRM_IMX_LCDIF=m @@ -1990,9 +2037,11 @@ CONFIG_DRM_PANEL_HIMAX_HX8394=m CONFIG_DRM_PANEL_ILITEK_IL9322=m CONFIG_DRM_PANEL_ILITEK_ILI9341=m CONFIG_DRM_PANEL_ILITEK_ILI9881C=m +CONFIG_DRM_PANEL_ILITEK_ILI9882T=m CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m # CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m +CONFIG_DRM_PANEL_JDI_LPM102A188A=m # CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set CONFIG_DRM_PANEL_JDI_R63452=m CONFIG_DRM_PANEL_KHADAS_TS050=m @@ -2022,6 +2071,7 @@ CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN=m # CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set CONFIG_DRM_PANEL_RAYDIUM_RM68200=m +CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m CONFIG_DRM_PANEL_RONBO_RB070D30=m CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m CONFIG_DRM_PANEL_SAMSUNG_DB7430=m @@ -2066,13 +2116,16 @@ CONFIG_DRM_PL111=m CONFIG_DRM_QXL=m CONFIG_DRM_RADEON=m CONFIG_DRM_RADEON_USERPTR=y +# CONFIG_DRM_RCAR_DU is not set # CONFIG_DRM_RCAR_DW_HDMI is not set # CONFIG_DRM_RCAR_LVDS is not set # CONFIG_DRM_RCAR_MIPI_DSI is not set # CONFIG_DRM_RCAR_USE_LVDS is not set # CONFIG_DRM_RCAR_USE_MIPI_DSI is not set CONFIG_DRM_ROCKCHIP=m +# CONFIG_DRM_RZG2L_MIPI_DSI is not set CONFIG_DRM_SAMSUNG_DSIM=m +# CONFIG_DRM_SHMOBILE is not set # CONFIG_DRM_SII902X is not set CONFIG_DRM_SII9234=m # CONFIG_DRM_SIL_SII8620 is not set @@ -2092,7 +2145,7 @@ CONFIG_DRM_TEGRA_STAGING=y # CONFIG_DRM_THINE_THC63LVD1024 is not set CONFIG_DRM_TI_DLPC3433=m CONFIG_DRM_TIDSS=m -# CONFIG_DRM_TI_SN65DSI83 is not set +CONFIG_DRM_TI_SN65DSI83=m CONFIG_DRM_TI_SN65DSI86=m CONFIG_DRM_TI_TFP410=m CONFIG_DRM_TI_TPD12S015=m @@ -2260,6 +2313,7 @@ CONFIG_EDAC_LEGACY_SYSFS=y CONFIG_EDAC_QCOM=m CONFIG_EDAC_SYNOPSYS=m CONFIG_EDAC_THUNDERX=m +CONFIG_EDAC_VERSAL=m CONFIG_EDAC_XGENE=m CONFIG_EDAC=y CONFIG_EDAC_ZYNQMP=m @@ -2269,7 +2323,6 @@ CONFIG_EEPROM_AT24=m CONFIG_EEPROM_AT25=m CONFIG_EEPROM_EE1004=m CONFIG_EEPROM_IDT_89HPESX=m -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m CONFIG_EFI_ARMSTUB_DTB_LOADER=y # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -2392,7 +2445,7 @@ CONFIG_FAT_KUNIT_TEST=m # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -2546,6 +2599,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y # CONFIG_FTWDT010_WATCHDOG is not set +CONFIG_FUEL_GAUGE_MM8013=m CONFIG_FUJITSU_ERRATUM_010001=y # CONFIG_FUJITSU_ES is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set @@ -2676,6 +2730,7 @@ CONFIG_GPIO_PCI_IDIO_16=m # CONFIG_GPIO_PISOSR is not set CONFIG_GPIO_PL061=y CONFIG_GPIO_RASPBERRYPI_EXP=m +CONFIG_GPIO_RCAR=m # CONFIG_GPIO_RDC321X is not set CONFIG_GPIO_ROCKCHIP=y # CONFIG_GPIO_SAMA5D2_PIOBU is not set @@ -2905,6 +2960,7 @@ CONFIG_HNS_ENET=m CONFIG_HOLTEK_FF=y # CONFIG_HOSTAP is not set CONFIG_HOTPLUG_CPU=y +CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA=m CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -3058,9 +3114,13 @@ CONFIG_I2C_PXA=m CONFIG_I2C_QCOM_CCI=m CONFIG_I2C_QCOM_GENI=m CONFIG_I2C_QUP=m +# CONFIG_I2C_RCAR is not set +# CONFIG_I2C_RIIC is not set CONFIG_I2C_RK3X=y # CONFIG_I2C_ROBOTFUZZ_OSIF is not set +# CONFIG_I2C_RZV2M is not set CONFIG_I2C_SCMI=m +# CONFIG_I2C_SH_MOBILE is not set CONFIG_I2C_SI470X=m # CONFIG_I2C_SI4713 is not set CONFIG_I2C_SIMTEC=m @@ -3102,6 +3162,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m CONFIG_IEEE802154_ADF7242=m # CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set @@ -3194,7 +3255,6 @@ CONFIG_IMA_NG_TEMPLATE=y CONFIG_IMA_READ_POLICY=y # CONFIG_IMA_SIG_TEMPLATE is not set # CONFIG_IMA_TEMPLATE is not set -# CONFIG_IMA_TRUSTED_KEYRING is not set CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -3403,6 +3463,7 @@ CONFIG_INTERCONNECT_QCOM_SC8280XP=m CONFIG_INTERCONNECT_QCOM_SDM845=m # CONFIG_INTERCONNECT_QCOM_SDX55 is not set # CONFIG_INTERCONNECT_QCOM_SDX65 is not set +CONFIG_INTERCONNECT_QCOM_SDX75=m # CONFIG_INTERCONNECT_QCOM_SM6350 is not set CONFIG_INTERCONNECT_QCOM_SM8150=m CONFIG_INTERCONNECT_QCOM_SM8250=m @@ -3465,8 +3526,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IPC_NS=y # CONFIG_IP_DCCP is not set -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP=m CONFIG_IP_FIB_TRIE_STATS=y CONFIG_IPMB_DEVICE_INTERFACE=m CONFIG_IPMI_DEVICE_INTERFACE=m @@ -3477,6 +3536,7 @@ CONFIG_IPMI_POWEROFF=m CONFIG_IPMI_SI=m CONFIG_IPMI_SSIF=m CONFIG_IPMI_WATCHDOG=m +# CONFIG_IPMMU_VMSA is not set CONFIG_IP_MROUTE_MULTIPLE_TABLES=y CONFIG_IP_MROUTE=y CONFIG_IP_MULTICAST=y @@ -3799,7 +3859,7 @@ CONFIG_KEY_NOTIFICATIONS=y CONFIG_KEYS_REQUEST_CACHE=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3835,6 +3895,7 @@ CONFIG_KUNIT=m CONFIG_KUNIT_TEST=m # CONFIG_KUNPENG_HCCS is not set CONFIG_KUSER_HELPERS=y +CONFIG_KVM_MAX_NR_VCPUS=4096 # CONFIG_KVM_PROVE_MMU is not set CONFIG_KVM_SMM=y # CONFIG_KVM_WERROR is not set @@ -3892,6 +3953,7 @@ CONFIG_LEDS_GPIO=m CONFIG_LEDS_GROUP_MULTICOLOR=m # CONFIG_LEDS_IS31FL319X is not set CONFIG_LEDS_IS31FL32XX=m +CONFIG_LEDS_KTD202X=m # CONFIG_LEDS_KTD2692 is not set # CONFIG_LEDS_LGM is not set CONFIG_LEDS_LM3530=m @@ -4019,6 +4081,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock" CONFIG_LSM_MMAP_MIN_ADDR=65535 CONFIG_LTC1660=m +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -4033,6 +4096,7 @@ CONFIG_LTO_NONE=y CONFIG_LTR501=m CONFIG_LTRF216A=m CONFIG_LV0104CS=m +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -4041,6 +4105,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y @@ -4112,6 +4177,7 @@ CONFIG_MB1232=m # CONFIG_MCORE2 is not set CONFIG_MCP320X=m CONFIG_MCP3422=m +# CONFIG_MCP3564 is not set CONFIG_MCP3911=m CONFIG_MCP4018=m CONFIG_MCP41010=m @@ -4122,6 +4188,7 @@ CONFIG_MCP4728=m # CONFIG_MCP4922 is not set CONFIG_MCTP_SERIAL=m # CONFIG_MCTP_TRANSPORT_I2C is not set +# CONFIG_MCTP_TRANSPORT_I3C is not set CONFIG_MCTP=y CONFIG_MD_AUTODETECT=y CONFIG_MD_BITMAP_FILE=y @@ -4143,7 +4210,7 @@ CONFIG_MDIO_I2C=m CONFIG_MDIO_IPQ8064=m # CONFIG_MDIO_MSCC_MIIM is not set CONFIG_MDIO_MVUSB=m -# CONFIG_MDIO_OCTEON is not set +CONFIG_MDIO_OCTEON=m # CONFIG_MDIO_SUN4I is not set CONFIG_MDIO_THUNDER=m CONFIG_MDIO_XGENE=m @@ -4157,6 +4224,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m CONFIG_MEDIA_ANALOG_TV_SUPPORT=y CONFIG_MEDIA_ATTACH=y @@ -4398,18 +4466,22 @@ CONFIG_MLX4_DEBUG=y CONFIG_MLX4_EN_DCB=y CONFIG_MLX4_EN=m CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_ACCEL=y CONFIG_MLX5_CLS_ACT=y CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y CONFIG_MLX5_EN_RXNFC=y CONFIG_MLX5_EN_TLS=y CONFIG_MLX5_ESWITCH=y -# CONFIG_MLX5_FPGA is not set +# CONFIG_MLX5_FPGA_IPSEC is not set +# CONFIG_MLX5_FPGA_TLS is not set +CONFIG_MLX5_FPGA=y CONFIG_MLX5_INFINIBAND=m CONFIG_MLX5_IPSEC=y CONFIG_MLX5_MACSEC=y @@ -4497,7 +4569,11 @@ CONFIG_MMC_SDHCI_PLTFM=m CONFIG_MMC_SDHCI_PXAV3=m CONFIG_MMC_SDHCI_TEGRA=m CONFIG_MMC_SDHCI_XENON=m +CONFIG_MMC_SDHI_INTERNAL_DMAC=m +CONFIG_MMC_SDHI=m +# CONFIG_MMC_SDHI_SYS_DMAC is not set CONFIG_MMC_SDRICOH_CS=m +# CONFIG_MMC_SH_MMCIF is not set CONFIG_MMC_SPI=m # CONFIG_MMC_STM32_SDMMC is not set CONFIG_MMC_SUNXI=m @@ -4530,6 +4606,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -4620,6 +4699,8 @@ CONFIG_MT7915E=m CONFIG_MT7921E=m CONFIG_MT7921S=m CONFIG_MT7921U=m +CONFIG_MT7925E=m +CONFIG_MT7925U=m CONFIG_MT7996E=m # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -4678,6 +4759,7 @@ CONFIG_MTD_NAND_NANDSIM=m # CONFIG_MTD_NAND_PL35X is not set # CONFIG_MTD_NAND_PLATFORM is not set # CONFIG_MTD_NAND_QCOM is not set +# CONFIG_MTD_NAND_RENESAS is not set # CONFIG_MTD_NAND_RICOH is not set # CONFIG_MTD_NAND_ROCKCHIP is not set # CONFIG_MTD_NAND_SUNXI is not set @@ -4745,7 +4827,6 @@ CONFIG_MWIFIEX_PCIE=m CONFIG_MWIFIEX_SDIO=m CONFIG_MWIFIEX_USB=m CONFIG_MWL8K=m -# CONFIG_MX3_IPU is not set CONFIG_MXC4005=m CONFIG_MXC6255=m # CONFIG_MXS_DMA is not set @@ -4799,9 +4880,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4874,12 +4952,12 @@ CONFIG_NETFILTER_EGRESS=y CONFIG_NETFILTER_INGRESS=y CONFIG_NETFILTER_NETLINK_ACCT=m # CONFIG_NETFILTER_NETLINK_GLUE_CT is not set -# CONFIG_NETFILTER_NETLINK_HOOK is not set +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_OSF=m CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_XTABLES_COMPAT=y +# CONFIG_NETFILTER_XTABLES_COMPAT is not set CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_CONNMARK=m CONFIG_NETFILTER_XT_MARK=m @@ -4972,6 +5050,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +CONFIG_NETKIT=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -4984,15 +5063,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NETROM=m CONFIG_NET_SB1000=y -CONFIG_NET_SCH_ATM=m CONFIG_NET_SCH_CAKE=m -CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m # CONFIG_NET_SCH_DEFAULT is not set CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -5026,6 +5102,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_NET_TEAM_MODE_RANDOM=m CONFIG_NET_TEAM_MODE_ROUNDROBIN=m +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -5163,7 +5240,7 @@ CONFIG_NFC_ST21NFCA=m # CONFIG_NFC_ST_NCI_I2C is not set # CONFIG_NFC_ST_NCI_SPI is not set CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NFC_TRF7970A=m @@ -5357,11 +5434,13 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y +CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m CONFIG_NVIDIA_SHIELD_FF=y # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set CONFIG_NVME_APPLE=m -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y CONFIG_NVME_HWMON=y CONFIG_NVMEM_APPLE_EFUSES=m # CONFIG_NVMEM_IMX_IIM is not set @@ -5397,7 +5476,9 @@ CONFIG_NVME_TARGET=m CONFIG_NVME_TARGET_PASSTHRU=y CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -5545,6 +5626,7 @@ CONFIG_PCI_AARDVARK=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_AL is not set @@ -5583,10 +5665,13 @@ CONFIG_PCIE_MOBIVEIL=y CONFIG_PCIEPORTBUS=y CONFIG_PCIE_PTM=y CONFIG_PCIE_QCOM=y +# CONFIG_PCIE_RCAR_GEN4_HOST is not set +# CONFIG_PCIE_RCAR_HOST is not set CONFIG_PCIE_ROCKCHIP_DW_HOST=y CONFIG_PCIE_ROCKCHIP_HOST=y CONFIG_PCIE_TEGRA194_HOST=y CONFIG_PCIE_XILINX_CPM=y +CONFIG_PCIE_XILINX_DMA_PL=y CONFIG_PCIE_XILINX_NWL=y CONFIG_PCIE_XILINX=y # CONFIG_PCI_FTPCI100 is not set @@ -5620,6 +5705,7 @@ CONFIG_PCI_XGENE_MSI=y CONFIG_PCI_XGENE=y CONFIG_PCI=y CONFIG_PCNET32=m +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -5669,7 +5755,7 @@ CONFIG_PHY_MESON_G12A_MIPI_DPHY_ANALOG=y CONFIG_PHY_MESON_G12A_USB2=y CONFIG_PHY_MESON_G12A_USB3_PCIE=m CONFIG_PHY_MESON_GXL_USB2=m -# CONFIG_PHY_MIXEL_LVDS_PHY is not set +CONFIG_PHY_MIXEL_LVDS_PHY=m CONFIG_PHY_MIXEL_MIPI_DPHY=m CONFIG_PHY_MVEBU_A3700_COMPHY=m CONFIG_PHY_MVEBU_A3700_UTMI=m @@ -5703,6 +5789,11 @@ CONFIG_PHY_QCOM_USB_HSIC=m CONFIG_PHY_QCOM_USB_HS=m CONFIG_PHY_QCOM_USB_SNPS_FEMTO_V2=m CONFIG_PHY_QCOM_USB_SS=m +# CONFIG_PHY_R8A779F0_ETHERNET_SERDES is not set +# CONFIG_PHY_RCAR_GEN2 is not set +# CONFIG_PHY_RCAR_GEN3_PCIE is not set +CONFIG_PHY_RCAR_GEN3_USB2=m +# CONFIG_PHY_RCAR_GEN3_USB3 is not set CONFIG_PHY_ROCKCHIP_DPHY_RX0=m CONFIG_PHY_ROCKCHIP_DP=m CONFIG_PHY_ROCKCHIP_EMMC=m @@ -5735,6 +5826,7 @@ CONFIG_PINCONF=y CONFIG_PINCTRL_ALDERLAKE=m CONFIG_PINCTRL_AMD=y CONFIG_PINCTRL_AMLOGIC_C3=y +CONFIG_PINCTRL_AMLOGIC_T7=y CONFIG_PINCTRL_APPLE_GPIO=m CONFIG_PINCTRL_AS3722=y CONFIG_PINCTRL_AXP209=m @@ -5855,12 +5947,13 @@ CONFIG_PINCTRL_SUN50I_H6=y # CONFIG_PINCTRL_SUN8I_A33 is not set # CONFIG_PINCTRL_SUN8I_A83T is not set # CONFIG_PINCTRL_SUN8I_A83T_R is not set -# CONFIG_PINCTRL_SUN8I_H3 is not set CONFIG_PINCTRL_SUN8I_H3_R=y +CONFIG_PINCTRL_SUN8I_H3=y # CONFIG_PINCTRL_SUN8I_V3S is not set # CONFIG_PINCTRL_SUN9I_A80 is not set # CONFIG_PINCTRL_SUN9I_A80_R is not set # CONFIG_PINCTRL_SX150X is not set +CONFIG_PINCTRL_TEGRA234=y CONFIG_PINCTRL=y CONFIG_PINCTRL_ZYNQMP=y # CONFIG_PING is not set @@ -5906,7 +5999,6 @@ CONFIG_POWERCAP=y CONFIG_POWER_MLXBF=m CONFIG_POWER_RESET_AS3722=y # CONFIG_POWER_RESET_BRCMKONA is not set -# CONFIG_POWER_RESET_BRCMSTB is not set CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_HISI=y @@ -6041,6 +6133,8 @@ CONFIG_PWM_MESON=m CONFIG_PWM_OMAP_DMTIMER=m CONFIG_PWM_PCA9685=m CONFIG_PWM_RASPBERRYPI_POE=m +# CONFIG_PWM_RCAR is not set +# CONFIG_PWM_RENESAS_TPU is not set CONFIG_PWM_ROCKCHIP=m CONFIG_PWM_STMPE=y CONFIG_PWM_SUN4I=m @@ -6107,6 +6201,8 @@ CONFIG_QCOM_Q6V5_WCSS=m CONFIG_QCOM_QDF2400_ERRATUM_0065=y CONFIG_QCOM_QFPROM=m CONFIG_QCOM_QMI_HELPERS=m +CONFIG_QCOM_QSEECOM_UEFISECAPP=y +CONFIG_QCOM_QSEECOM=y CONFIG_QCOM_RAMP_CTRL=m CONFIG_QCOM_RMTFS_MEM=m CONFIG_QCOM_RPMHPD=y @@ -6213,6 +6309,10 @@ CONFIG_RASPBERRYPI_POWER=y CONFIG_RATIONAL_KUNIT_TEST=m # CONFIG_RAVE_SP_CORE is not set # CONFIG_RBTREE_TEST is not set +# CONFIG_RCAR_DMAC is not set +# CONFIG_RCAR_GEN3_THERMAL is not set +# CONFIG_RCAR_REMOTEPROC is not set +# CONFIG_RCAR_THERMAL is not set CONFIG_RC_ATI_REMOTE=m CONFIG_RC_CORE=y CONFIG_RC_DECODERS=y @@ -6248,7 +6348,7 @@ CONFIG_RD_ZSTD=y # CONFIG_READABLE_ASM is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_REALTEK_AUTOPM=y -CONFIG_REALTEK_PHY=y +CONFIG_REALTEK_PHY=m # CONFIG_REED_SOLOMON_TEST is not set # CONFIG_REGMAP_BUILD is not set CONFIG_REGMAP_I2C=y @@ -6292,6 +6392,7 @@ CONFIG_REGULATOR_HI655X=m CONFIG_REGULATOR_MAX20411=m CONFIG_REGULATOR_MAX5970=m CONFIG_REGULATOR_MAX597X=m +CONFIG_REGULATOR_MAX77503=m CONFIG_REGULATOR_MAX77620=y CONFIG_REGULATOR_MAX77650=m CONFIG_REGULATOR_MAX77686=m @@ -6378,7 +6479,14 @@ CONFIG_RELOCATABLE=y # CONFIG_REMOTEPROC_CDEV is not set CONFIG_REMOTEPROC=y CONFIG_REMOTE_TARGET=m +# CONFIG_RENESAS_OSTM is not set # CONFIG_RENESAS_PHY is not set +# CONFIG_RENESAS_RPCIF is not set +# CONFIG_RENESAS_RZAWDT is not set +# CONFIG_RENESAS_RZG2LWDT is not set +# CONFIG_RENESAS_RZN1WDT is not set +# CONFIG_RENESAS_USB_DMAC is not set +# CONFIG_RENESAS_WDT is not set # CONFIG_RESET_ATTACK_MITIGATION is not set CONFIG_RESET_CONTROLLER=y CONFIG_RESET_HISI=y @@ -6389,6 +6497,7 @@ CONFIG_RESET_MESON=m CONFIG_RESET_QCOM_AOSS=y CONFIG_RESET_QCOM_PDC=m CONFIG_RESET_RASPBERRYPI=y +CONFIG_RESET_RZG2L_USBPHY_CTRL=m CONFIG_RESET_SCMI=y CONFIG_RESET_SIMPLE=y CONFIG_RESET_TI_SCI=m @@ -6445,6 +6554,7 @@ CONFIG_ROCKCHIP_VOP2=y CONFIG_ROCKCHIP_VOP=y CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set CONFIG_ROHM_BU27034=m CONFIG_ROMFS_BACKED_BY_BLOCK=y @@ -6504,7 +6614,6 @@ CONFIG_RTC_DRV_ARMADA38X=m CONFIG_RTC_DRV_AS3722=m CONFIG_RTC_DRV_BBNSM=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m CONFIG_RTC_DRV_CADENCE=m CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_CROS_EC=m @@ -6592,6 +6701,7 @@ CONFIG_RTC_DRV_RX8581=m CONFIG_RTC_DRV_S35390A=m # CONFIG_RTC_DRV_SA1100 is not set CONFIG_RTC_DRV_SD3078=m +# CONFIG_RTC_DRV_SH is not set CONFIG_RTC_DRV_SNVS=m CONFIG_RTC_DRV_STK17TA8=m CONFIG_RTC_DRV_SUN6I=y @@ -6667,6 +6777,10 @@ CONFIG_RV_REACT_PRINTK=y CONFIG_RV=y CONFIG_RXKAD=y # CONFIG_RXPERF is not set +# CONFIG_RZ_DMAC is not set +# CONFIG_RZG2L_ADC is not set +# CONFIG_RZG2L_THERMAL is not set +# CONFIG_RZ_MTU3 is not set CONFIG_S2IO=m # CONFIG_S390_KPROBES_SANITY_TEST is not set # CONFIG_S390_MODULES_SANITY_TEST is not set @@ -6687,6 +6801,7 @@ CONFIG_SATA_MV=m CONFIG_SATA_PMP=y # CONFIG_SATA_PROMISE is not set # CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_RCAR is not set CONFIG_SATA_SIL24=m # CONFIG_SATA_SIL is not set # CONFIG_SATA_SIS is not set @@ -6836,6 +6951,7 @@ CONFIG_SCSI_UFS_HISI=m CONFIG_SCSI_UFS_HPB=y CONFIG_SCSI_UFS_HWMON=y CONFIG_SCSI_UFS_QCOM=m +# CONFIG_SCSI_UFS_RENESAS is not set CONFIG_SCSI_UFS_TI_J721E=m CONFIG_SCSI_VIRTIO=m CONFIG_SCSI_WD719X=m @@ -6865,11 +6981,12 @@ CONFIG_SDM_VIDEOCC_845=m # CONFIG_SDX_GCC_75 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y # CONFIG_SECURITY_APPARMOR is not set -# CONFIG_SECURITY_DMESG_RESTRICT is not set +CONFIG_SECURITY_DMESG_RESTRICT=y CONFIG_SECURITYFS=y CONFIG_SECURITY_INFINIBAND=y CONFIG_SECURITY_LANDLOCK=y @@ -7011,6 +7128,7 @@ CONFIG_SENSORS_LTC2947_SPI=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set CONFIG_SENSORS_LTC2990=m +CONFIG_SENSORS_LTC2991=m # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m CONFIG_SENSORS_LTC4151=m @@ -7071,6 +7189,7 @@ CONFIG_SENSORS_PLI1209BC=m CONFIG_SENSORS_PLI1209BC_REGULATOR=y CONFIG_SENSORS_PM6764TR=m CONFIG_SENSORS_PMBUS=m +CONFIG_SENSORS_POWERZ=m CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -7144,6 +7263,7 @@ CONFIG_SERIAL_8250_CS=m CONFIG_SERIAL_8250_DFL=m CONFIG_SERIAL_8250_DMA=y CONFIG_SERIAL_8250_DW=y +# CONFIG_SERIAL_8250_EM is not set CONFIG_SERIAL_8250_EXAR=m CONFIG_SERIAL_8250_EXTENDED=y # CONFIG_SERIAL_8250_FINTEK is not set @@ -7206,6 +7326,11 @@ CONFIG_SERIAL_SC16IS7XX_I2C=y CONFIG_SERIAL_SC16IS7XX=m CONFIG_SERIAL_SC16IS7XX_SPI=y # CONFIG_SERIAL_SCCNXP is not set +CONFIG_SERIAL_SH_SCI_CONSOLE=y +CONFIG_SERIAL_SH_SCI_DMA=y +CONFIG_SERIAL_SH_SCI_EARLYCON=y +CONFIG_SERIAL_SH_SCI_NR_UARTS=18 +CONFIG_SERIAL_SH_SCI=y # CONFIG_SERIAL_SIFIVE is not set # CONFIG_SERIAL_SPRD is not set # CONFIG_SERIAL_ST_ASC is not set @@ -7272,7 +7397,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -7286,12 +7410,14 @@ CONFIG_SMC91X=m # CONFIG_SM_CAMCC_6350 is not set # CONFIG_SM_CAMCC_8250 is not set # CONFIG_SM_CAMCC_8450 is not set +# CONFIG_SM_CAMCC_8550 is not set CONFIG_SMC_DIAG=m CONFIG_SMC=m # CONFIG_SM_DISPCC_8250 is not set CONFIG_SM_DISPCC_8450=m # CONFIG_SM_DISPCC_8550 is not set # CONFIG_SM_FTL is not set +# CONFIG_SM_GCC_4450 is not set # CONFIG_SM_GCC_6115 is not set # CONFIG_SM_GCC_6125 is not set # CONFIG_SM_GCC_6350 is not set @@ -7326,7 +7452,7 @@ CONFIG_SMS_USB_DRV=m # CONFIG_SM_TCSRCC_8550 is not set # CONFIG_SM_VIDEOCC_8150 is not set # CONFIG_SM_VIDEOCC_8250 is not set -# CONFIG_SM_VIDEOCC_8350 is not set +CONFIG_SM_VIDEOCC_8350=m # CONFIG_SM_VIDEOCC_8450 is not set # CONFIG_SM_VIDEOCC_8550 is not set CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0 @@ -7395,6 +7521,7 @@ CONFIG_SND_FM801=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -7547,8 +7674,10 @@ CONFIG_SND_SOC_APQ8016_SBC=m CONFIG_SND_SOC_ARNDALE=m CONFIG_SND_SOC_AUDIO_IIO_AUX=m CONFIG_SND_SOC_AW8738=m +CONFIG_SND_SOC_AW87390=m CONFIG_SND_SOC_AW88261=m CONFIG_SND_SOC_AW88395=m +CONFIG_SND_SOC_AW88399=m CONFIG_SND_SOC_BD28623=m CONFIG_SND_SOC_BT_SCO=m CONFIG_SND_SOC_CHV3_CODEC=m @@ -7754,6 +7883,7 @@ CONFIG_SND_SOC_PCM512x_SPI=m # CONFIG_SND_SOC_PEB2466 is not set CONFIG_SND_SOC_QCOM=m CONFIG_SND_SOC_QDSP6=m +# CONFIG_SND_SOC_RCAR is not set CONFIG_SND_SOC_RK3288_HDMI_ANALOG=m CONFIG_SND_SOC_RK3328=m CONFIG_SND_SOC_RK3399_GRU_SOUND=m @@ -7788,6 +7918,8 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m CONFIG_SND_SOC_RT715_SDW=m CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +CONFIG_SND_SOC_RTQ9128=m +# CONFIG_SND_SOC_RZ is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811=m @@ -7799,6 +7931,7 @@ CONFIG_SND_SOC_SC8280XP=m CONFIG_SND_SOC_SDM845=m # CONFIG_SND_SOC_SDW_MOCKUP is not set CONFIG_SND_SOC_SGTL5000=m +# CONFIG_SND_SOC_SH4_FSI is not set CONFIG_SND_SOC_SIMPLE_AMPLIFIER=m CONFIG_SND_SOC_SIMPLE_MUX=m # CONFIG_SND_SOC_SM8250 is not set @@ -8091,8 +8224,12 @@ CONFIG_SPI_QCOM_GENI=m CONFIG_SPI_QCOM_QSPI=m CONFIG_SPI_QUP=m CONFIG_SPI_ROCKCHIP=m -# CONFIG_SPI_ROCKCHIP_SFC is not set +CONFIG_SPI_ROCKCHIP_SFC=m +# CONFIG_SPI_RSPI is not set +# CONFIG_SPI_RZV2M_CSI is not set # CONFIG_SPI_SC18IS602 is not set +# CONFIG_SPI_SH_HSPI is not set +# CONFIG_SPI_SH_MSIOF is not set # CONFIG_SPI_SIFIVE is not set # CONFIG_SPI_SLAVE is not set CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m @@ -8325,6 +8462,7 @@ CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX_DEBUG is not set CONFIG_TCM_QLA2XXX=m CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -8401,6 +8539,7 @@ CONFIG_TEST_KSTRTOX=y # CONFIG_TEST_MEMINIT is not set # CONFIG_TEST_MIN_HEAP is not set # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -8473,7 +8612,7 @@ CONFIG_TIFM_7XX1=m CONFIG_TIFM_CORE=m CONFIG_TIGON3_HWMON=y CONFIG_TIGON3=m -# CONFIG_TI_ICSSG_PRUETH is not set +CONFIG_TI_ICSSG_PRUETH=m CONFIG_TI_ICSS_IEP=m CONFIG_TI_K3_AM65_CPSW_NUSS=m CONFIG_TI_K3_AM65_CPSW_SWITCHDEV=y @@ -8688,6 +8827,7 @@ CONFIG_TYPEC_MUX_FSA4480=m CONFIG_TYPEC_MUX_GPIO_SBU=m CONFIG_TYPEC_MUX_NB7VPQ904M=m CONFIG_TYPEC_MUX_PI3USB30532=m +CONFIG_TYPEC_MUX_PTN36502=m CONFIG_TYPEC_NVIDIA_ALTMODE=m CONFIG_TYPEC_QCOM_PMIC=m # CONFIG_TYPEC_RT1711H is not set @@ -8699,7 +8839,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m CONFIG_TYPEC_TCPM=m CONFIG_TYPEC_TPS6598X=m CONFIG_TYPEC_UCSI=m -CONFIG_TYPEC_WCOVE=m CONFIG_TYPEC_WUSB3801=m CONFIG_TYPHOON=m CONFIG_UACCE=m @@ -8792,6 +8931,7 @@ CONFIG_USB_CHIPIDEA_HOST=y CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_PCI=m CONFIG_USB_CHIPIDEA_TEGRA=m CONFIG_USB_CHIPIDEA_UDC=y @@ -8856,6 +8996,7 @@ CONFIG_USB_EHCI_TT_NEWSCHED=y # CONFIG_USB_EHSET_TEST_FIXTURE is not set CONFIG_USB_EMI26=m CONFIG_USB_EMI62=m +# CONFIG_USB_EMXX is not set CONFIG_USB_EPSON2888=y # CONFIG_USB_ETH is not set CONFIG_USB_EZUSB_FX2=m @@ -8968,6 +9109,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_M66592 is not set CONFIG_USB_MA901=m @@ -9028,6 +9170,7 @@ CONFIG_USB_OTG_FSM=m # CONFIG_USB_OTG_PRODUCTLIST is not set CONFIG_USB_OTG=y # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y CONFIG_USBPCWATCHDOG=m CONFIG_USB_PEGASUS=m @@ -9044,6 +9187,9 @@ CONFIG_USB_QCOM_EUD=m CONFIG_USB_RAINSHADOW_CEC=m # CONFIG_USB_RAREMONO is not set CONFIG_USB_RAW_GADGET=m +# CONFIG_USB_RENESAS_USB3 is not set +# CONFIG_USB_RENESAS_USBF is not set +# CONFIG_USB_RENESAS_USBHS is not set CONFIG_USB_ROLE_SWITCH=y CONFIG_USB_RTL8150=m CONFIG_USB_RTL8152=m @@ -9155,6 +9301,7 @@ CONFIG_USB_XHCI_MVEBU=m CONFIG_USB_XHCI_PCI_RENESAS=y CONFIG_USB_XHCI_PCI=y CONFIG_USB_XHCI_PLATFORM=m +CONFIG_USB_XHCI_RCAR=m CONFIG_USB_XHCI_TEGRA=m CONFIG_USB_XUSBATM=m CONFIG_USB=y @@ -9325,7 +9472,7 @@ CONFIG_VIDEO_IMX8_ISI=m CONFIG_VIDEO_IMX8_ISI_M2M=y CONFIG_VIDEO_IMX8_JPEG=m CONFIG_VIDEO_IMX8MQ_MIPI_CSI2=m -CONFIG_VIDEO_IMX_MEDIA=m +# CONFIG_VIDEO_IMX_MEDIA is not set CONFIG_VIDEO_IMX_MIPI_CSIS=m CONFIG_VIDEO_IMX_PXP=m # CONFIG_VIDEO_IPU3_CIO2 is not set @@ -9342,10 +9489,12 @@ CONFIG_VIDEO_MAX9286=m # CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set CONFIG_VIDEO_MESON_GE2D=m CONFIG_VIDEO_MESON_VDEC=m +# CONFIG_VIDEO_MGB4 is not set CONFIG_VIDEO_ML86V7667=m CONFIG_VIDEO_MSP3400=m CONFIG_VIDEO_MT9M001=m # CONFIG_VIDEO_MT9M111 is not set +CONFIG_VIDEO_MT9M114=m CONFIG_VIDEO_MT9P031=m CONFIG_VIDEO_MT9T112=m CONFIG_VIDEO_MT9V011=m @@ -9393,12 +9542,19 @@ CONFIG_VIDEO_PVRUSB2=m CONFIG_VIDEO_PVRUSB2_SYSFS=y CONFIG_VIDEO_QCOM_CAMSS=m CONFIG_VIDEO_QCOM_VENUS=m +# CONFIG_VIDEO_RCAR_CSI2 is not set +# CONFIG_VIDEO_RCAR_ISP is not set +# CONFIG_VIDEO_RCAR_VIN is not set CONFIG_VIDEO_RDACM20=m # CONFIG_VIDEO_RDACM21 is not set +# CONFIG_VIDEO_RENESAS_FCP is not set +# CONFIG_VIDEO_RENESAS_JPU is not set CONFIG_VIDEO_RJ54N1=m CONFIG_VIDEO_ROCKCHIP_ISP1=m CONFIG_VIDEO_ROCKCHIP_RGA=m CONFIG_VIDEO_ROCKCHIP_VDEC=m +# CONFIG_VIDEO_RZG2L_CRU is not set +# CONFIG_VIDEO_RZG2L_CSI2 is not set CONFIG_VIDEO_S5C73M3=m CONFIG_VIDEO_S5K4ECGX=m CONFIG_VIDEO_S5K5BAF=m @@ -9449,6 +9605,7 @@ CONFIG_VIDEO_THS7303=m CONFIG_VIDEO_THS8200=m CONFIG_VIDEO_TI_CAL=m CONFIG_VIDEO_TI_CAL_MC=y +CONFIG_VIDEO_TI_J721E_CSI2RX=m CONFIG_VIDEO_TLV320AIC23B=m CONFIG_VIDEO_TM6000_ALSA=m CONFIG_VIDEO_TM6000_DVB=m @@ -9654,6 +9811,7 @@ CONFIG_XDP_SOCKETS=y # CONFIG_XEN_GRANT_DMA_ALLOC is not set # CONFIG_XEN is not set CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512 +CONFIG_XEN_PRIVCMD_EVENTFD=y CONFIG_XEN_PRIVCMD_IRQFD=y CONFIG_XEN_PRIVCMD=m # CONFIG_XEN_PVCALLS_FRONTEND is not set @@ -9772,19 +9930,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-aarch64-rhel.config b/SOURCES/kernel-aarch64-rhel.config index 5a535cf..cfcc4f8 100644 --- a/SOURCES/kernel-aarch64-rhel.config +++ b/SOURCES/kernel-aarch64-rhel.config @@ -220,6 +220,7 @@ CONFIG_AMD_PMC=m # CONFIG_AMD_XGBE_DCB is not set CONFIG_AMD_XGBE=m # CONFIG_AMIGA_PARTITION is not set +CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y # CONFIG_AMT is not set # CONFIG_ANDROID_BINDER_IPC is not set @@ -261,6 +262,7 @@ CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8 CONFIG_ARCH_MXC=y # CONFIG_ARCH_NPCM is not set CONFIG_ARCH_NXP=y +CONFIG_ARCH_PENSANDO=y CONFIG_ARCH_QCOM=y CONFIG_ARCH_RANDOM=y # CONFIG_ARCH_REALTEK is not set @@ -324,6 +326,7 @@ CONFIG_ARM64_ERRATUM_2457168=y CONFIG_ARM64_ERRATUM_2645198=y CONFIG_ARM64_ERRATUM_2658417=y CONFIG_ARM64_ERRATUM_2966298=y +CONFIG_ARM64_ERRATUM_3117295=y CONFIG_ARM64_ERRATUM_819472=y CONFIG_ARM64_ERRATUM_824069=y CONFIG_ARM64_ERRATUM_826319=y @@ -375,6 +378,7 @@ CONFIG_ARM_PMU=y # CONFIG_ARM_QCOM_CPUFREQ_HW is not set CONFIG_ARM_SBSA_WATCHDOG=m CONFIG_ARM_SCMI_CPUFREQ=m +CONFIG_ARM_SCMI_PERF_DOMAIN=y # CONFIG_ARM_SCMI_POWER_CONTROL is not set CONFIG_ARM_SCMI_POWER_DOMAIN=m CONFIG_ARM_SCMI_PROTOCOL=y @@ -407,6 +411,7 @@ CONFIG_ARM_TI_CPUFREQ=y CONFIG_ASN1=y # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -520,6 +525,7 @@ CONFIG_BASE_FULL=y # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -655,7 +661,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set # CONFIG_BRCM_TRACING is not set # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -748,7 +753,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -828,6 +832,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y # CONFIG_CFG80211_DEBUGFS is not set CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -909,6 +914,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CLEANCACHE=y CONFIG_CLK_BCM_NS2=y CONFIG_CLK_BCM_SR=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -1001,7 +1007,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1085,7 +1090,6 @@ CONFIG_CRYPTO_AES_ARM64_CE_CCM=y CONFIG_CRYPTO_AES_ARM64_CE=y CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y CONFIG_CRYPTO_AES_ARM64=y -CONFIG_CRYPTO_AES_GCM_P10=y # CONFIG_CRYPTO_AES_TI is not set CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_ANSI_CPRNG=m @@ -1102,7 +1106,6 @@ CONFIG_CRYPTO_CCM=y CONFIG_CRYPTO_CFB=y CONFIG_CRYPTO_CHACHA20=m CONFIG_CRYPTO_CHACHA20_NEON=y -# CONFIG_CRYPTO_CHACHA20_P10 is not set CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CMAC=y # CONFIG_CRYPTO_CRC32C_VPMSUM is not set @@ -1189,6 +1192,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1213,7 +1221,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_POLY1305=m CONFIG_CRYPTO_POLY1305_NEON=y -# CONFIG_CRYPTO_POLY1305_P10 is not set # CONFIG_CRYPTO_POLYVAL_ARM64_CE is not set # CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set CONFIG_CRYPTO_RMD160=m @@ -1477,6 +1484,7 @@ CONFIG_DPAA2_CONSOLE=m # CONFIG_DPOT_DAC is not set # CONFIG_DPS310 is not set # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1549,6 +1557,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set CONFIG_DRM_IMX_DCSS=m # CONFIG_DRM_IMX_LCDC is not set # CONFIG_DRM_IMX_LCDIF is not set @@ -1578,38 +1587,92 @@ CONFIG_DRM_NOUVEAU=m # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LG_LG4573 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set # CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1629,7 +1692,8 @@ CONFIG_DRM_RADEON_USERPTR=y # CONFIG_DRM_SIMPLE_BRIDGE is not set CONFIG_DRM_SIMPLEDRM=y # CONFIG_DRM_SSD130X is not set -# CONFIG_DRM_TEGRA is not set +# CONFIG_DRM_TEGRA_DEBUG is not set +CONFIG_DRM_TEGRA=m # CONFIG_DRM_THINE_THC63LVD1024 is not set # CONFIG_DRM_TI_DLPC3433 is not set # CONFIG_DRM_TIDSS is not set @@ -1797,7 +1861,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_EEPROM_AT25 is not set CONFIG_EEPROM_EE1004=m # CONFIG_EEPROM_IDT_89HPESX is not set -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1831,7 +1894,12 @@ CONFIG_ENIC=m # CONFIG_EPIC100 is not set CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y CONFIG_ETHOC=m CONFIG_ETHTOOL_NETLINK=y @@ -1896,7 +1964,7 @@ CONFIG_FAT_KUNIT_TEST=m # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -2013,7 +2081,9 @@ CONFIG_FSL_PQ_MDIO=m # CONFIG_FSL_RCPM is not set CONFIG_FSL_XGMAC_MDIO=m CONFIG_FSNOTIFY=y -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_RECORD_RECURSION is not set @@ -2021,6 +2091,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +# CONFIG_FUEL_GAUGE_MM8013 is not set CONFIG_FUJITSU_ERRATUM_010001=y # CONFIG_FUJITSU_ES is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set @@ -2173,6 +2244,7 @@ CONFIG_GPIO_XLP=m # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set # CONFIG_GUP_TEST is not set CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2356,6 +2428,7 @@ CONFIG_HNS_ENET=m CONFIG_HNS=m # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2519,6 +2592,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set # CONFIG_IAQCORE is not set CONFIG_IAVF=m # CONFIG_IB700_WDT is not set @@ -2532,6 +2606,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set # CONFIG_IEEE802154_AT86RF230 is not set @@ -2601,7 +2676,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10 CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2735,6 +2809,7 @@ CONFIG_INPUT_SPARSEKMAP=m CONFIG_INPUT_UINPUT=m CONFIG_INPUT=y # CONFIG_INPUT_YEALINK is not set +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_INTEGRITY_AUDIT=y @@ -2777,6 +2852,7 @@ CONFIG_INTEL_SDSI=m # CONFIG_INTEL_SOC_PMIC_CHTWC is not set # CONFIG_INTEL_SOC_PMIC is not set # CONFIG_INTEL_TCC_COOLING is not set +# CONFIG_INTEL_TDX_HOST is not set # CONFIG_INTEL_TH is not set CONFIG_INTEL_UNCORE_FREQ_CONTROL=m # CONFIG_INTEL_VSC is not set @@ -2804,7 +2880,8 @@ CONFIG_IO_DELAY_0X80=y CONFIG_IOMMU_DEFAULT_DMA_LAZY=y # CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -3100,7 +3177,7 @@ CONFIG_KEY_NOTIFICATIONS=y # CONFIG_KEYS_REQUEST_CACHE is not set CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3131,6 +3208,7 @@ CONFIG_KUNIT_TEST=m CONFIG_KVM_AMD_SEV=y # CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set # CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set +CONFIG_KVM_MAX_NR_VCPUS=4096 # CONFIG_KVM_PROVE_MMU is not set CONFIG_KVM_SMM=y # CONFIG_KVM_WERROR is not set @@ -3298,6 +3376,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3312,6 +3391,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3320,6 +3400,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m # CONFIG_MAC80211_MESH is not set @@ -3386,6 +3467,7 @@ CONFIG_MAX_SKB_FRAGS=17 # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3423,6 +3505,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3489,7 +3572,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3630,6 +3713,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3746,6 +3830,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3818,6 +3905,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3895,7 +3984,6 @@ CONFIG_MWIFIEX_PCIE=m CONFIG_MWIFIEX_SDIO=m CONFIG_MWIFIEX_USB=m # CONFIG_MWL8K is not set -# CONFIG_MX3_IPU is not set # CONFIG_MXC4005 is not set # CONFIG_MXC6255 is not set # CONFIG_MXS_DMA is not set @@ -3940,9 +4028,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4064,6 +4149,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -4076,15 +4162,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -4113,6 +4196,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -4217,7 +4301,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4415,9 +4499,11 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y +CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_BCM_OCOTP is not set # CONFIG_NVMEM_IMX_IIM is not set @@ -4444,7 +4530,9 @@ CONFIG_NVME_TARGET=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -4562,6 +4650,7 @@ CONFIG_PCC=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_AL is not set @@ -4627,6 +4716,7 @@ CONFIG_PCI_XGENE_MSI=y CONFIG_PCI_XGENE=y CONFIG_PCI=y # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4706,6 +4796,7 @@ CONFIG_PINCTRL_ALDERLAKE=m # CONFIG_PINCTRL_AMD is not set # CONFIG_PINCTRL_BROXTON is not set # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_ELKHARTLAKE=m CONFIG_PINCTRL_EMMITSBURG=m @@ -4794,7 +4885,6 @@ CONFIG_POSIX_TIMERS=y CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_HISI=y @@ -4951,6 +5041,7 @@ CONFIG_QCOM_L3_PMU=y # CONFIG_QCOM_PDC is not set CONFIG_QCOM_QDF2400_ERRATUM_0065=y # CONFIG_QCOM_QFPROM is not set +# CONFIG_QCOM_QSEECOM is not set # CONFIG_QCOM_RAMP_CTRL is not set # CONFIG_QCOM_RMTFS_MEM is not set # CONFIG_QCOM_RPMH is not set @@ -4979,7 +5070,7 @@ CONFIG_QLA3XXX=m # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set +CONFIG_QORIQ_THERMAL=m CONFIG_QRTR=m CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -5087,6 +5178,7 @@ CONFIG_REGULATOR_GPIO=y # CONFIG_REGULATOR_MAX1586 is not set # CONFIG_REGULATOR_MAX20086 is not set # CONFIG_REGULATOR_MAX20411 is not set +# CONFIG_REGULATOR_MAX77503 is not set CONFIG_REGULATOR_MAX77620=y CONFIG_REGULATOR_MAX77686=m # CONFIG_REGULATOR_MAX77826 is not set @@ -5191,6 +5283,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -5231,7 +5324,6 @@ CONFIG_RTC_DRV_ABB5ZES3=m CONFIG_RTC_DRV_ABX80X=m CONFIG_RTC_DRV_BBNSM=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5521,6 +5613,7 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5676,6 +5769,7 @@ CONFIG_SENSORS_LTC2945=m # CONFIG_SENSORS_LTC2978 is not set # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m # CONFIG_SENSORS_LTC4151 is not set @@ -5734,6 +5828,7 @@ CONFIG_SENSORS_NTC_THERMISTOR=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set # CONFIG_SENSORS_PMBUS is not set +# CONFIG_SENSORS_POWERZ is not set CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -5910,7 +6005,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -5994,6 +6088,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -6130,8 +6225,10 @@ CONFIG_SND_SEQ_UMP=y # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -6222,6 +6319,7 @@ CONFIG_SND_SOC_CX2072X=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -6336,12 +6434,6 @@ CONFIG_SND_SOC_MAX98927=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set # CONFIG_SND_SOC_RL6231 is not set -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -6364,6 +6456,7 @@ CONFIG_SND_SOC_RT1318_SDW=m # CONFIG_SND_SOC_RT715_SDW is not set CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6559,7 +6652,6 @@ CONFIG_SND_VX222=m # CONFIG_SND_XEN_FRONTEND is not set # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set CONFIG_SOC_IMX8M=y CONFIG_SOC_IMX9=m # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set @@ -6790,6 +6882,7 @@ CONFIG_TCM_IBLOCK=m CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6856,6 +6949,7 @@ CONFIG_TEST_KSTRTOX=y # CONFIG_TEST_MEMINIT is not set # CONFIG_TEST_MIN_HEAP is not set # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -7087,6 +7181,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -7165,6 +7260,7 @@ CONFIG_USB_CHIPIDEA_HOST=y CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_TEGRA=m CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CONN_GPIO=m @@ -7268,6 +7364,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -7311,6 +7408,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -7461,7 +7559,10 @@ CONFIG_VEXPRESS_CONFIG=y # CONFIG_VF610_DAC is not set CONFIG_VFAT_FS=m # CONFIG_VFIO_AMBA is not set +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y CONFIG_VFIO_FSL_MC=m +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO=m # CONFIG_VFIO_MDEV is not set @@ -7576,11 +7677,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -7915,19 +8018,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-aarch64-rt-debug-rhel.config b/SOURCES/kernel-aarch64-rt-debug-rhel.config index ef78813..9fbeebc 100644 --- a/SOURCES/kernel-aarch64-rt-debug-rhel.config +++ b/SOURCES/kernel-aarch64-rt-debug-rhel.config @@ -220,6 +220,7 @@ CONFIG_AMD_PMC=m # CONFIG_AMD_XGBE_DCB is not set CONFIG_AMD_XGBE=m # CONFIG_AMIGA_PARTITION is not set +CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y # CONFIG_AMT is not set # CONFIG_ANDROID_BINDER_IPC is not set @@ -261,6 +262,7 @@ CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8 CONFIG_ARCH_MXC=y # CONFIG_ARCH_NPCM is not set CONFIG_ARCH_NXP=y +CONFIG_ARCH_PENSANDO=y CONFIG_ARCH_QCOM=y CONFIG_ARCH_RANDOM=y # CONFIG_ARCH_REALTEK is not set @@ -324,6 +326,7 @@ CONFIG_ARM64_ERRATUM_2457168=y CONFIG_ARM64_ERRATUM_2645198=y CONFIG_ARM64_ERRATUM_2658417=y CONFIG_ARM64_ERRATUM_2966298=y +CONFIG_ARM64_ERRATUM_3117295=y CONFIG_ARM64_ERRATUM_819472=y CONFIG_ARM64_ERRATUM_824069=y CONFIG_ARM64_ERRATUM_826319=y @@ -375,6 +378,7 @@ CONFIG_ARM_PMU=y # CONFIG_ARM_QCOM_CPUFREQ_HW is not set CONFIG_ARM_SBSA_WATCHDOG=m CONFIG_ARM_SCMI_CPUFREQ=m +CONFIG_ARM_SCMI_PERF_DOMAIN=y # CONFIG_ARM_SCMI_POWER_CONTROL is not set CONFIG_ARM_SCMI_POWER_DOMAIN=m CONFIG_ARM_SCMI_PROTOCOL=y @@ -407,6 +411,7 @@ CONFIG_ARM_TI_CPUFREQ=y CONFIG_ASN1=y # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -520,6 +525,7 @@ CONFIG_BASE_FULL=y # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -655,7 +661,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_BRCM_TRACING=y # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -748,7 +753,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -828,6 +832,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -909,6 +914,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CLEANCACHE=y CONFIG_CLK_BCM_NS2=y CONFIG_CLK_BCM_SR=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -1001,7 +1007,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1086,7 +1091,6 @@ CONFIG_CRYPTO_AES_ARM64_CE_CCM=y CONFIG_CRYPTO_AES_ARM64_CE=y CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y CONFIG_CRYPTO_AES_ARM64=y -CONFIG_CRYPTO_AES_GCM_P10=y # CONFIG_CRYPTO_AES_TI is not set CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_ANSI_CPRNG=m @@ -1103,7 +1107,6 @@ CONFIG_CRYPTO_CCM=y CONFIG_CRYPTO_CFB=y CONFIG_CRYPTO_CHACHA20=m CONFIG_CRYPTO_CHACHA20_NEON=y -# CONFIG_CRYPTO_CHACHA20_P10 is not set CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CMAC=y # CONFIG_CRYPTO_CRC32C_VPMSUM is not set @@ -1190,6 +1193,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1214,7 +1222,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_POLY1305=m CONFIG_CRYPTO_POLY1305_NEON=y -# CONFIG_CRYPTO_POLY1305_P10 is not set # CONFIG_CRYPTO_POLYVAL_ARM64_CE is not set # CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set CONFIG_CRYPTO_RMD160=m @@ -1487,6 +1494,7 @@ CONFIG_DPAA2_CONSOLE=m # CONFIG_DPOT_DAC is not set # CONFIG_DPS310 is not set # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1559,6 +1567,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set CONFIG_DRM_IMX_DCSS=m # CONFIG_DRM_IMX_LCDC is not set # CONFIG_DRM_IMX_LCDIF is not set @@ -1588,38 +1597,92 @@ CONFIG_DRM_NOUVEAU=m # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LG_LG4573 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set # CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1639,7 +1702,8 @@ CONFIG_DRM_RADEON_USERPTR=y # CONFIG_DRM_SIMPLE_BRIDGE is not set CONFIG_DRM_SIMPLEDRM=y # CONFIG_DRM_SSD130X is not set -# CONFIG_DRM_TEGRA is not set +# CONFIG_DRM_TEGRA_DEBUG is not set +CONFIG_DRM_TEGRA=m # CONFIG_DRM_THINE_THC63LVD1024 is not set # CONFIG_DRM_TI_DLPC3433 is not set # CONFIG_DRM_TIDSS is not set @@ -1843,7 +1907,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_EEPROM_AT25 is not set CONFIG_EEPROM_EE1004=m # CONFIG_EEPROM_IDT_89HPESX is not set -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1877,7 +1940,12 @@ CONFIG_ENIC=m # CONFIG_EPIC100 is not set CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y CONFIG_ETHOC=m CONFIG_ETHTOOL_NETLINK=y @@ -1950,7 +2018,7 @@ CONFIG_FAULT_INJECTION=y # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -2067,7 +2135,9 @@ CONFIG_FSL_PQ_MDIO=m # CONFIG_FSL_RCPM is not set CONFIG_FSL_XGMAC_MDIO=m CONFIG_FSNOTIFY=y -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_RECORD_RECURSION is not set @@ -2075,6 +2145,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +# CONFIG_FUEL_GAUGE_MM8013 is not set CONFIG_FUJITSU_ERRATUM_010001=y # CONFIG_FUJITSU_ES is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set @@ -2228,6 +2299,7 @@ CONFIG_GPIO_XLP=m # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set CONFIG_GUP_TEST=y CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2412,6 +2484,7 @@ CONFIG_HNS_ENET=m CONFIG_HNS=m # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2575,6 +2648,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set # CONFIG_IAQCORE is not set CONFIG_IAVF=m # CONFIG_IB700_WDT is not set @@ -2588,6 +2662,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set # CONFIG_IEEE802154_AT86RF230 is not set @@ -2657,7 +2732,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10 CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2791,6 +2865,7 @@ CONFIG_INPUT_SPARSEKMAP=m CONFIG_INPUT_UINPUT=m CONFIG_INPUT=y # CONFIG_INPUT_YEALINK is not set +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_INTEGRITY_AUDIT=y @@ -2833,6 +2908,7 @@ CONFIG_INTEL_SDSI=m # CONFIG_INTEL_SOC_PMIC_CHTWC is not set # CONFIG_INTEL_SOC_PMIC is not set # CONFIG_INTEL_TCC_COOLING is not set +# CONFIG_INTEL_TDX_HOST is not set # CONFIG_INTEL_TH is not set CONFIG_INTEL_UNCORE_FREQ_CONTROL=m # CONFIG_INTEL_VSC is not set @@ -2860,7 +2936,8 @@ CONFIG_IOMMU_DEBUGFS=y CONFIG_IOMMU_DEFAULT_DMA_LAZY=y # CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -3162,7 +3239,7 @@ CONFIG_KEY_NOTIFICATIONS=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set # CONFIG_KFENCE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3192,6 +3269,7 @@ CONFIG_KUNIT_TEST=m CONFIG_KVM_AMD_SEV=y # CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set # CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_PROVE_MMU=y CONFIG_KVM_SMM=y # CONFIG_KVM_WERROR is not set @@ -3359,6 +3437,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3373,6 +3452,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3381,6 +3461,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m # CONFIG_MAC80211_MESH is not set @@ -3447,6 +3528,7 @@ CONFIG_MAX_SKB_FRAGS=17 # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3484,6 +3566,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3555,7 +3638,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3696,6 +3779,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3812,6 +3896,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3884,6 +3971,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3961,7 +4050,6 @@ CONFIG_MWIFIEX_PCIE=m CONFIG_MWIFIEX_SDIO=m CONFIG_MWIFIEX_USB=m # CONFIG_MWL8K is not set -# CONFIG_MX3_IPU is not set # CONFIG_MXC4005 is not set # CONFIG_MXC6255 is not set # CONFIG_MXS_DMA is not set @@ -4006,9 +4094,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4130,6 +4215,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -4142,15 +4228,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -4179,6 +4262,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -4283,7 +4367,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4481,9 +4565,11 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y +CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_BCM_OCOTP is not set # CONFIG_NVMEM_IMX_IIM is not set @@ -4510,7 +4596,9 @@ CONFIG_NVME_TARGET=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -4630,6 +4718,7 @@ CONFIG_PCC=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_AL is not set @@ -4695,6 +4784,7 @@ CONFIG_PCI_XGENE_MSI=y CONFIG_PCI_XGENE=y CONFIG_PCI=y # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4774,6 +4864,7 @@ CONFIG_PINCTRL_ALDERLAKE=m # CONFIG_PINCTRL_AMD is not set # CONFIG_PINCTRL_BROXTON is not set # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_ELKHARTLAKE=m CONFIG_PINCTRL_EMMITSBURG=m @@ -4862,7 +4953,6 @@ CONFIG_POSIX_TIMERS=y CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_HISI=y @@ -5023,6 +5113,7 @@ CONFIG_QCOM_L3_PMU=y # CONFIG_QCOM_PDC is not set CONFIG_QCOM_QDF2400_ERRATUM_0065=y # CONFIG_QCOM_QFPROM is not set +# CONFIG_QCOM_QSEECOM is not set # CONFIG_QCOM_RAMP_CTRL is not set # CONFIG_QCOM_RMTFS_MEM is not set # CONFIG_QCOM_RPMH is not set @@ -5051,7 +5142,7 @@ CONFIG_QLA3XXX=m # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set +CONFIG_QORIQ_THERMAL=m CONFIG_QRTR=m CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -5162,6 +5253,7 @@ CONFIG_REGULATOR_GPIO=y # CONFIG_REGULATOR_MAX1586 is not set # CONFIG_REGULATOR_MAX20086 is not set # CONFIG_REGULATOR_MAX20411 is not set +# CONFIG_REGULATOR_MAX77503 is not set CONFIG_REGULATOR_MAX77620=y CONFIG_REGULATOR_MAX77686=m # CONFIG_REGULATOR_MAX77826 is not set @@ -5266,6 +5358,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -5306,7 +5399,6 @@ CONFIG_RTC_DRV_ABB5ZES3=m CONFIG_RTC_DRV_ABX80X=m CONFIG_RTC_DRV_BBNSM=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5596,6 +5688,7 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5751,6 +5844,7 @@ CONFIG_SENSORS_LTC2945=m # CONFIG_SENSORS_LTC2978 is not set # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m # CONFIG_SENSORS_LTC4151 is not set @@ -5809,6 +5903,7 @@ CONFIG_SENSORS_NTC_THERMISTOR=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set # CONFIG_SENSORS_PMBUS is not set +# CONFIG_SENSORS_POWERZ is not set CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -5985,8 +6080,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set -# CONFIG_SLOB is not set # CONFIG_SLUB_CPU_PARTIAL is not set # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -6071,6 +6164,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -6208,8 +6302,10 @@ CONFIG_SND_SEQ_UMP=y # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -6300,6 +6396,7 @@ CONFIG_SND_SOC_CX2072X=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -6414,12 +6511,6 @@ CONFIG_SND_SOC_MAX98927=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set # CONFIG_SND_SOC_RL6231 is not set -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -6442,6 +6533,7 @@ CONFIG_SND_SOC_RT1318_SDW=m # CONFIG_SND_SOC_RT715_SDW is not set CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6638,7 +6730,6 @@ CONFIG_SND_VX222=m # CONFIG_SND_XEN_FRONTEND is not set # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set CONFIG_SOC_IMX8M=y CONFIG_SOC_IMX9=m # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set @@ -6869,6 +6960,7 @@ CONFIG_TCM_IBLOCK=m CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6935,6 +7027,7 @@ CONFIG_TEST_LIST_SORT=m # CONFIG_TEST_MEMINIT is not set CONFIG_TEST_MIN_HEAP=m # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -7166,6 +7259,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -7245,6 +7339,7 @@ CONFIG_USB_CHIPIDEA_HOST=y CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_TEGRA=m CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CONN_GPIO=m @@ -7348,6 +7443,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -7391,6 +7487,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -7541,7 +7638,10 @@ CONFIG_VEXPRESS_CONFIG=y # CONFIG_VF610_DAC is not set CONFIG_VFAT_FS=m # CONFIG_VFIO_AMBA is not set +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y CONFIG_VFIO_FSL_MC=m +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO=m # CONFIG_VFIO_MDEV is not set @@ -7656,11 +7756,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -8001,19 +8103,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-aarch64-rt-rhel.config b/SOURCES/kernel-aarch64-rt-rhel.config index e21fe86..77d7f10 100644 --- a/SOURCES/kernel-aarch64-rt-rhel.config +++ b/SOURCES/kernel-aarch64-rt-rhel.config @@ -220,6 +220,7 @@ CONFIG_AMD_PMC=m # CONFIG_AMD_XGBE_DCB is not set CONFIG_AMD_XGBE=m # CONFIG_AMIGA_PARTITION is not set +CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y # CONFIG_AMT is not set # CONFIG_ANDROID_BINDER_IPC is not set @@ -261,6 +262,7 @@ CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8 CONFIG_ARCH_MXC=y # CONFIG_ARCH_NPCM is not set CONFIG_ARCH_NXP=y +CONFIG_ARCH_PENSANDO=y CONFIG_ARCH_QCOM=y CONFIG_ARCH_RANDOM=y # CONFIG_ARCH_REALTEK is not set @@ -324,6 +326,7 @@ CONFIG_ARM64_ERRATUM_2457168=y CONFIG_ARM64_ERRATUM_2645198=y CONFIG_ARM64_ERRATUM_2658417=y CONFIG_ARM64_ERRATUM_2966298=y +CONFIG_ARM64_ERRATUM_3117295=y CONFIG_ARM64_ERRATUM_819472=y CONFIG_ARM64_ERRATUM_824069=y CONFIG_ARM64_ERRATUM_826319=y @@ -375,6 +378,7 @@ CONFIG_ARM_PMU=y # CONFIG_ARM_QCOM_CPUFREQ_HW is not set CONFIG_ARM_SBSA_WATCHDOG=m CONFIG_ARM_SCMI_CPUFREQ=m +CONFIG_ARM_SCMI_PERF_DOMAIN=y # CONFIG_ARM_SCMI_POWER_CONTROL is not set CONFIG_ARM_SCMI_POWER_DOMAIN=m CONFIG_ARM_SCMI_PROTOCOL=y @@ -407,6 +411,7 @@ CONFIG_ARM_TI_CPUFREQ=y CONFIG_ASN1=y # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -520,6 +525,7 @@ CONFIG_BASE_FULL=y # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -655,7 +661,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set # CONFIG_BRCM_TRACING is not set # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -748,7 +753,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -828,6 +832,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y # CONFIG_CFG80211_DEBUGFS is not set CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -909,6 +914,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CLEANCACHE=y CONFIG_CLK_BCM_NS2=y CONFIG_CLK_BCM_SR=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -1001,7 +1007,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1086,7 +1091,6 @@ CONFIG_CRYPTO_AES_ARM64_CE_CCM=y CONFIG_CRYPTO_AES_ARM64_CE=y CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y CONFIG_CRYPTO_AES_ARM64=y -CONFIG_CRYPTO_AES_GCM_P10=y # CONFIG_CRYPTO_AES_TI is not set CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_ANSI_CPRNG=m @@ -1103,7 +1107,6 @@ CONFIG_CRYPTO_CCM=y CONFIG_CRYPTO_CFB=y CONFIG_CRYPTO_CHACHA20=m CONFIG_CRYPTO_CHACHA20_NEON=y -# CONFIG_CRYPTO_CHACHA20_P10 is not set CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CMAC=y # CONFIG_CRYPTO_CRC32C_VPMSUM is not set @@ -1190,6 +1193,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1214,7 +1222,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_POLY1305=m CONFIG_CRYPTO_POLY1305_NEON=y -# CONFIG_CRYPTO_POLY1305_P10 is not set # CONFIG_CRYPTO_POLYVAL_ARM64_CE is not set # CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set CONFIG_CRYPTO_RMD160=m @@ -1479,6 +1486,7 @@ CONFIG_DPAA2_CONSOLE=m # CONFIG_DPOT_DAC is not set # CONFIG_DPS310 is not set # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1551,6 +1559,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set CONFIG_DRM_IMX_DCSS=m # CONFIG_DRM_IMX_LCDC is not set # CONFIG_DRM_IMX_LCDIF is not set @@ -1580,38 +1589,92 @@ CONFIG_DRM_NOUVEAU=m # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LG_LG4573 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set # CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1631,7 +1694,8 @@ CONFIG_DRM_RADEON_USERPTR=y # CONFIG_DRM_SIMPLE_BRIDGE is not set CONFIG_DRM_SIMPLEDRM=y # CONFIG_DRM_SSD130X is not set -# CONFIG_DRM_TEGRA is not set +# CONFIG_DRM_TEGRA_DEBUG is not set +CONFIG_DRM_TEGRA=m # CONFIG_DRM_THINE_THC63LVD1024 is not set # CONFIG_DRM_TI_DLPC3433 is not set # CONFIG_DRM_TIDSS is not set @@ -1835,7 +1899,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_EEPROM_AT25 is not set CONFIG_EEPROM_EE1004=m # CONFIG_EEPROM_IDT_89HPESX is not set -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1869,7 +1932,12 @@ CONFIG_ENIC=m # CONFIG_EPIC100 is not set CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y CONFIG_ETHOC=m CONFIG_ETHTOOL_NETLINK=y @@ -1934,7 +2002,7 @@ CONFIG_FAT_KUNIT_TEST=m # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -2051,7 +2119,9 @@ CONFIG_FSL_PQ_MDIO=m # CONFIG_FSL_RCPM is not set CONFIG_FSL_XGMAC_MDIO=m CONFIG_FSNOTIFY=y -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_RECORD_RECURSION is not set @@ -2059,6 +2129,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +# CONFIG_FUEL_GAUGE_MM8013 is not set CONFIG_FUJITSU_ERRATUM_010001=y # CONFIG_FUJITSU_ES is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set @@ -2212,6 +2283,7 @@ CONFIG_GPIO_XLP=m # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set # CONFIG_GUP_TEST is not set CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2396,6 +2468,7 @@ CONFIG_HNS_ENET=m CONFIG_HNS=m # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2559,6 +2632,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set # CONFIG_IAQCORE is not set CONFIG_IAVF=m # CONFIG_IB700_WDT is not set @@ -2572,6 +2646,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set # CONFIG_IEEE802154_AT86RF230 is not set @@ -2641,7 +2716,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10 CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2775,6 +2849,7 @@ CONFIG_INPUT_SPARSEKMAP=m CONFIG_INPUT_UINPUT=m CONFIG_INPUT=y # CONFIG_INPUT_YEALINK is not set +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_INTEGRITY_AUDIT=y @@ -2817,6 +2892,7 @@ CONFIG_INTEL_SDSI=m # CONFIG_INTEL_SOC_PMIC_CHTWC is not set # CONFIG_INTEL_SOC_PMIC is not set # CONFIG_INTEL_TCC_COOLING is not set +# CONFIG_INTEL_TDX_HOST is not set # CONFIG_INTEL_TH is not set CONFIG_INTEL_UNCORE_FREQ_CONTROL=m # CONFIG_INTEL_VSC is not set @@ -2844,7 +2920,8 @@ CONFIG_IO_DELAY_0X80=y CONFIG_IOMMU_DEFAULT_DMA_LAZY=y # CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -3140,7 +3217,7 @@ CONFIG_KEY_NOTIFICATIONS=y # CONFIG_KEYS_REQUEST_CACHE is not set CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3171,6 +3248,7 @@ CONFIG_KUNIT_TEST=m CONFIG_KVM_AMD_SEV=y # CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set # CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set +CONFIG_KVM_MAX_NR_VCPUS=4096 # CONFIG_KVM_PROVE_MMU is not set CONFIG_KVM_SMM=y # CONFIG_KVM_WERROR is not set @@ -3338,6 +3416,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3352,6 +3431,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3360,6 +3440,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m # CONFIG_MAC80211_MESH is not set @@ -3426,6 +3507,7 @@ CONFIG_MAX_SKB_FRAGS=17 # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3463,6 +3545,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3534,7 +3617,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3675,6 +3758,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3791,6 +3875,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3863,6 +3950,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3940,7 +4029,6 @@ CONFIG_MWIFIEX_PCIE=m CONFIG_MWIFIEX_SDIO=m CONFIG_MWIFIEX_USB=m # CONFIG_MWL8K is not set -# CONFIG_MX3_IPU is not set # CONFIG_MXC4005 is not set # CONFIG_MXC6255 is not set # CONFIG_MXS_DMA is not set @@ -3985,9 +4073,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4109,6 +4194,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -4121,15 +4207,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -4158,6 +4241,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -4262,7 +4346,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4460,9 +4544,11 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y +CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU=m # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_BCM_OCOTP is not set # CONFIG_NVMEM_IMX_IIM is not set @@ -4489,7 +4575,9 @@ CONFIG_NVME_TARGET=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -4607,6 +4695,7 @@ CONFIG_PCC=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_AL is not set @@ -4672,6 +4761,7 @@ CONFIG_PCI_XGENE_MSI=y CONFIG_PCI_XGENE=y CONFIG_PCI=y # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4751,6 +4841,7 @@ CONFIG_PINCTRL_ALDERLAKE=m # CONFIG_PINCTRL_AMD is not set # CONFIG_PINCTRL_BROXTON is not set # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_ELKHARTLAKE=m CONFIG_PINCTRL_EMMITSBURG=m @@ -4839,7 +4930,6 @@ CONFIG_POSIX_TIMERS=y CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_HISI=y @@ -5000,6 +5090,7 @@ CONFIG_QCOM_L3_PMU=y # CONFIG_QCOM_PDC is not set CONFIG_QCOM_QDF2400_ERRATUM_0065=y # CONFIG_QCOM_QFPROM is not set +# CONFIG_QCOM_QSEECOM is not set # CONFIG_QCOM_RAMP_CTRL is not set # CONFIG_QCOM_RMTFS_MEM is not set # CONFIG_QCOM_RPMH is not set @@ -5028,7 +5119,7 @@ CONFIG_QLA3XXX=m # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set +CONFIG_QORIQ_THERMAL=m CONFIG_QRTR=m CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -5139,6 +5230,7 @@ CONFIG_REGULATOR_GPIO=y # CONFIG_REGULATOR_MAX1586 is not set # CONFIG_REGULATOR_MAX20086 is not set # CONFIG_REGULATOR_MAX20411 is not set +# CONFIG_REGULATOR_MAX77503 is not set CONFIG_REGULATOR_MAX77620=y CONFIG_REGULATOR_MAX77686=m # CONFIG_REGULATOR_MAX77826 is not set @@ -5243,6 +5335,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -5283,7 +5376,6 @@ CONFIG_RTC_DRV_ABB5ZES3=m CONFIG_RTC_DRV_ABX80X=m CONFIG_RTC_DRV_BBNSM=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5573,6 +5665,7 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5728,6 +5821,7 @@ CONFIG_SENSORS_LTC2945=m # CONFIG_SENSORS_LTC2978 is not set # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m # CONFIG_SENSORS_LTC4151 is not set @@ -5786,6 +5880,7 @@ CONFIG_SENSORS_NTC_THERMISTOR=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set # CONFIG_SENSORS_PMBUS is not set +# CONFIG_SENSORS_POWERZ is not set CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -5962,8 +6057,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set -# CONFIG_SLOB is not set # CONFIG_SLUB_CPU_PARTIAL is not set # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -6048,6 +6141,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -6184,8 +6278,10 @@ CONFIG_SND_SEQ_UMP=y # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -6276,6 +6372,7 @@ CONFIG_SND_SOC_CX2072X=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -6390,12 +6487,6 @@ CONFIG_SND_SOC_MAX98927=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set # CONFIG_SND_SOC_RL6231 is not set -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -6418,6 +6509,7 @@ CONFIG_SND_SOC_RT1318_SDW=m # CONFIG_SND_SOC_RT715_SDW is not set CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6613,7 +6705,6 @@ CONFIG_SND_VX222=m # CONFIG_SND_XEN_FRONTEND is not set # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set CONFIG_SOC_IMX8M=y CONFIG_SOC_IMX9=m # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set @@ -6844,6 +6935,7 @@ CONFIG_TCM_IBLOCK=m CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6910,6 +7002,7 @@ CONFIG_TEST_KSTRTOX=y # CONFIG_TEST_MEMINIT is not set # CONFIG_TEST_MIN_HEAP is not set # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -7141,6 +7234,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -7220,6 +7314,7 @@ CONFIG_USB_CHIPIDEA_HOST=y CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_TEGRA=m CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CONN_GPIO=m @@ -7323,6 +7418,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -7366,6 +7462,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -7516,7 +7613,10 @@ CONFIG_VEXPRESS_CONFIG=y # CONFIG_VF610_DAC is not set CONFIG_VFAT_FS=m # CONFIG_VFIO_AMBA is not set +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y CONFIG_VFIO_FSL_MC=m +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO=m # CONFIG_VFIO_MDEV is not set @@ -7631,11 +7731,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -7976,19 +8078,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-ppc64le-debug-fedora.config b/SOURCES/kernel-ppc64le-debug-fedora.config index 01ae224..2c014c2 100644 --- a/SOURCES/kernel-ppc64le-debug-fedora.config +++ b/SOURCES/kernel-ppc64le-debug-fedora.config @@ -481,6 +481,15 @@ CONFIG_BAYCOM_SER_HDX=m # CONFIG_BCACHE_ASYNC_REGISTRATION is not set # CONFIG_BCACHE_CLOSURES_DEBUG is not set # CONFIG_BCACHE_DEBUG is not set +CONFIG_BCACHEFS_DEBUG_TRANSACTIONS=y +CONFIG_BCACHEFS_DEBUG=y +# CONFIG_BCACHEFS_ERASURE_CODING is not set +CONFIG_BCACHEFS_FS=m +CONFIG_BCACHEFS_LOCK_TIME_STATS=y +# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set +CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_BCACHEFS_QUOTA=y +# CONFIG_BCACHEFS_TESTS is not set CONFIG_BCACHE=m CONFIG_BCM54140_PHY=m CONFIG_BCM7XXX_PHY=m @@ -622,7 +631,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_BRCM_TRACING=y CONFIG_BRIDGE_CFM=y CONFIG_BRIDGE_EBT_802_3=m @@ -730,7 +738,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -CONFIG_CAN_BXCAN=m CONFIG_CAN_CALC_BITTIMING=y CONFIG_CAN_CAN327=m # CONFIG_CAN_CC770 is not set @@ -810,6 +817,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFI_CLANG is not set CONFIG_CFS_BANDWIDTH=y @@ -898,6 +906,7 @@ CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CIO2_BRIDGE=y CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -970,7 +979,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1036,6 +1044,7 @@ CONFIG_CROS_EC_TYPEC=m CONFIG_CROS_EC_UART=m CONFIG_CROS_HPS_I2C=m CONFIG_CROS_KBD_LED_BACKLIGHT=m +CONFIG_CROS_KUNIT_EC_PROTO_TEST=m CONFIG_CROS_KUNIT=m CONFIG_CROSS_MEMORY_ATTACH=y CONFIG_CROS_TYPEC_SWITCH=m @@ -1127,6 +1136,11 @@ CONFIG_CRYPTO_GHASH=y CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1225,6 +1239,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_CGROUP_REF is not set +# CONFIG_DEBUG_CLOSURES is not set CONFIG_DEBUG_CREDENTIALS=y # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_DRIVER is not set @@ -1322,7 +1337,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_DEFAULT_SECURITY_SELINUX=y # CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEV_APPLETALK is not set CONFIG_DEV_DAX_CXL=m CONFIG_DEV_DAX_HMEM=m CONFIG_DEV_DAX_KMEM=m @@ -1413,7 +1427,7 @@ CONFIG_DNS_RESOLVER=m CONFIG_DP83640_PHY=m CONFIG_DP83822_PHY=m CONFIG_DP83848_PHY=m -# CONFIG_DP83867_PHY is not set +CONFIG_DP83867_PHY=m CONFIG_DP83869_PHY=m # CONFIG_DP83TC811_PHY is not set # CONFIG_DP83TD510_PHY is not set @@ -1422,6 +1436,7 @@ CONFIG_DPOT_DAC=m # CONFIG_DPS310 is not set CONFIG_DRAGONRISE_FF=y CONFIG_DRBD_FAULT_INJECTION=y +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL_QAIC is not set CONFIG_DRM_ACCEL=y CONFIG_DRM_AMD_ACP=y @@ -1528,9 +1543,11 @@ CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +CONFIG_DRM_PANEL_ILITEK_ILI9882T=m CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m # CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m +CONFIG_DRM_PANEL_JDI_LPM102A188A=m # CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set CONFIG_DRM_PANEL_JDI_R63452=m # CONFIG_DRM_PANEL_KHADAS_TS050 is not set @@ -1560,6 +1577,7 @@ CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set # CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set # CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m CONFIG_DRM_PANEL_RONBO_RB070D30=m CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m CONFIG_DRM_PANEL_SAMSUNG_DB7430=m @@ -1778,7 +1796,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_EEPROM_AT25 is not set CONFIG_EEPROM_EE1004=m CONFIG_EEPROM_IDT_89HPESX=m -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m CONFIG_EFI_COCO_SECRET=y CONFIG_EFI_CUSTOM_SSDT_OVERLAYS=y @@ -1899,7 +1916,7 @@ CONFIG_FAULT_INJECTION=y # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -2039,6 +2056,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y # CONFIG_FTR_FIXUP_SELFTEST is not set +CONFIG_FUEL_GAUGE_MM8013=m # CONFIG_FUNCTION_ERROR_INJECTION is not set CONFIG_FUNCTION_GRAPH_RETVAL=y CONFIG_FUNCTION_GRAPH_TRACER=y @@ -2177,6 +2195,7 @@ CONFIG_GREENASIA_FF=y # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set CONFIG_GTP=m +# CONFIG_GUEST_STATE_BUFFER_TEST is not set CONFIG_GUP_TEST=y CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2522,6 +2541,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m CONFIG_IEEE802154_ADF7242=m # CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set @@ -2608,7 +2628,6 @@ CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y # CONFIG_IMA_SIG_TEMPLATE is not set # CONFIG_IMA_TEMPLATE is not set -# CONFIG_IMA_TRUSTED_KEYRING is not set CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2814,8 +2833,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IPC_NS=y # CONFIG_IP_DCCP is not set -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP=m CONFIG_IP_FIB_TRIE_STATS=y # CONFIG_IPMB_DEVICE_INTERFACE is not set CONFIG_IPMI_DEVICE_INTERFACE=m @@ -3131,7 +3148,7 @@ CONFIG_KEYS_REQUEST_CACHE=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set # CONFIG_KFENCE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3169,6 +3186,7 @@ CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND=y # CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set # CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set CONFIG_KVM_GUEST=y +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_PROVE_MMU=y CONFIG_KVM_SMM=y # CONFIG_KVM_WERROR is not set @@ -3226,6 +3244,7 @@ CONFIG_LEDS_GPIO=m CONFIG_LEDS_GROUP_MULTICOLOR=m # CONFIG_LEDS_IS31FL319X is not set CONFIG_LEDS_IS31FL32XX=m +CONFIG_LEDS_KTD202X=m # CONFIG_LEDS_KTD2692 is not set # CONFIG_LEDS_LGM is not set CONFIG_LEDS_LM3530=m @@ -3353,6 +3372,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock" CONFIG_LSM_MMAP_MIN_ADDR=65535 CONFIG_LTC1660=m +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3367,6 +3387,7 @@ CONFIG_LTO_NONE=y CONFIG_LTR501=m CONFIG_LTRF216A=m CONFIG_LV0104CS=m +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3375,6 +3396,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y @@ -3442,6 +3464,7 @@ CONFIG_MB1232=m # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set CONFIG_MCP3911=m CONFIG_MCP4018=m CONFIG_MCP41010=m @@ -3452,6 +3475,7 @@ CONFIG_MCP4728=m # CONFIG_MCP4922 is not set CONFIG_MCTP_SERIAL=m # CONFIG_MCTP_TRANSPORT_I2C is not set +# CONFIG_MCTP_TRANSPORT_I3C is not set CONFIG_MCTP=y CONFIG_MD_AUTODETECT=y CONFIG_MD_BITMAP_FILE=y @@ -3481,6 +3505,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m CONFIG_MEDIA_ANALOG_TV_SUPPORT=y CONFIG_MEDIA_ATTACH=y @@ -3692,18 +3717,22 @@ CONFIG_MLX4_DEBUG=y CONFIG_MLX4_EN_DCB=y CONFIG_MLX4_EN=m CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_ACCEL=y CONFIG_MLX5_CLS_ACT=y CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y CONFIG_MLX5_EN_RXNFC=y CONFIG_MLX5_EN_TLS=y CONFIG_MLX5_ESWITCH=y -# CONFIG_MLX5_FPGA is not set +# CONFIG_MLX5_FPGA_IPSEC is not set +# CONFIG_MLX5_FPGA_TLS is not set +CONFIG_MLX5_FPGA=y CONFIG_MLX5_INFINIBAND=m CONFIG_MLX5_IPSEC=y CONFIG_MLX5_MACSEC=y @@ -3804,6 +3833,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3881,6 +3913,8 @@ CONFIG_MT7915E=m CONFIG_MT7921E=m CONFIG_MT7921S=m CONFIG_MT7921U=m +CONFIG_MT7925E=m +CONFIG_MT7925U=m CONFIG_MT7996E=m # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AR7_PARTS is not set @@ -4028,9 +4062,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4103,12 +4134,12 @@ CONFIG_NETFILTER_EGRESS=y CONFIG_NETFILTER_INGRESS=y CONFIG_NETFILTER_NETLINK_ACCT=m # CONFIG_NETFILTER_NETLINK_GLUE_CT is not set -# CONFIG_NETFILTER_NETLINK_HOOK is not set +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_OSF=m CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_XTABLES_COMPAT=y +# CONFIG_NETFILTER_XTABLES_COMPAT is not set CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_CONNMARK=m CONFIG_NETFILTER_XT_MARK=m @@ -4201,6 +4232,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +CONFIG_NETKIT=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -4212,15 +4244,12 @@ CONFIG_NET_NS=y CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NETROM=m -CONFIG_NET_SCH_ATM=m CONFIG_NET_SCH_CAKE=m -CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m # CONFIG_NET_SCH_DEFAULT is not set CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -4254,6 +4283,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_NET_TEAM_MODE_RANDOM=m CONFIG_NET_TEAM_MODE_ROUNDROBIN=m +CONFIG_NET_TEST=m CONFIG_NET_TULIP=y CONFIG_NET_UDP_TUNNEL=m CONFIG_NET_VENDOR_3COM=y @@ -4388,7 +4418,7 @@ CONFIG_NFC_ST21NFCA=m # CONFIG_NFC_ST_NCI_I2C is not set # CONFIG_NFC_ST_NCI_SPI is not set CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NFC_TRF7970A=m @@ -4582,8 +4612,9 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_SHIELD_FF=y # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y CONFIG_NVME_HWMON=y CONFIG_NVMEM_LAYOUT_ONIE_TLV=m CONFIG_NVMEM_LAYOUT_SL28_VPD=m @@ -4603,7 +4634,9 @@ CONFIG_NVME_TARGET=m CONFIG_NVME_TARGET_PASSTHRU=y CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set CONFIG_NVRAM=m # CONFIG_NVSW_SN2201 is not set @@ -4726,6 +4759,7 @@ CONFIG_PATA_WINBOND=m # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -4774,6 +4808,7 @@ CONFIG_PCI_STUB=y CONFIG_PCI_SW_SWITCHTEC=m CONFIG_PCI=y CONFIG_PCNET32=m +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4798,16 +4833,12 @@ CONFIG_PHY_CADENCE_SIERRA=m CONFIG_PHY_CADENCE_TORRENT=m # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_HI3670_PCIE is not set # CONFIG_PHY_HI3670_USB is not set # CONFIG_PHY_LAN966X_SERDES is not set CONFIG_PHYLIB=y CONFIG_PHYLINK=m # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -4901,7 +4932,6 @@ CONFIG_POWERNV_CPUIDLE=y # CONFIG_POWERNV_OP_PANEL is not set # CONFIG_POWERPC64_CPU is not set # CONFIG_POWER_RESET_BRCMKONA is not set -# CONFIG_POWER_RESET_BRCMSTB is not set CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_GPIO=y # CONFIG_POWER_RESET_LINKSTATION is not set @@ -5184,7 +5214,7 @@ CONFIG_RD_ZSTD=y # CONFIG_READABLE_ASM is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_REALTEK_AUTOPM=y -CONFIG_REALTEK_PHY=y +CONFIG_REALTEK_PHY=m # CONFIG_REED_SOLOMON_TEST is not set # CONFIG_REGMAP_BUILD is not set CONFIG_REGMAP_I2C=y @@ -5216,6 +5246,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m CONFIG_REGULATOR_MAX20411=m CONFIG_REGULATOR_MAX5970=m CONFIG_REGULATOR_MAX597X=m +CONFIG_REGULATOR_MAX77503=m CONFIG_REGULATOR_MAX77650=m # CONFIG_REGULATOR_MAX77826 is not set CONFIG_REGULATOR_MAX77857=m @@ -5320,6 +5351,7 @@ CONFIG_RMI4_SPI=m CONFIG_RMNET=m # CONFIG_ROCKCHIP_PHY is not set CONFIG_ROCKER=m +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set CONFIG_ROHM_BU27034=m CONFIG_ROMFS_BACKED_BY_BLOCK=y @@ -5374,7 +5406,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_ABEOZ9=m CONFIG_RTC_DRV_ABX80X=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5687,11 +5718,12 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y # CONFIG_SECURITY_APPARMOR is not set -# CONFIG_SECURITY_DMESG_RESTRICT is not set +CONFIG_SECURITY_DMESG_RESTRICT=y CONFIG_SECURITYFS=y CONFIG_SECURITY_INFINIBAND=y CONFIG_SECURITY_LANDLOCK=y @@ -5832,6 +5864,7 @@ CONFIG_SENSORS_LTC2947_SPI=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set CONFIG_SENSORS_LTC2990=m +CONFIG_SENSORS_LTC2991=m # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m CONFIG_SENSORS_LTC4151=m @@ -5893,6 +5926,7 @@ CONFIG_SENSORS_PLI1209BC=m CONFIG_SENSORS_PLI1209BC_REGULATOR=y CONFIG_SENSORS_PM6764TR=m CONFIG_SENSORS_PMBUS=m +CONFIG_SENSORS_POWERZ=m CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -6062,7 +6096,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -6152,6 +6185,7 @@ CONFIG_SND_FM801=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -6296,8 +6330,10 @@ CONFIG_SND_SOC_AK5558=m # CONFIG_SND_SOC_ARNDALE is not set CONFIG_SND_SOC_AUDIO_IIO_AUX=m CONFIG_SND_SOC_AW8738=m +CONFIG_SND_SOC_AW87390=m CONFIG_SND_SOC_AW88261=m CONFIG_SND_SOC_AW88395=m +CONFIG_SND_SOC_AW88399=m CONFIG_SND_SOC_BD28623=m CONFIG_SND_SOC_BT_SCO=m CONFIG_SND_SOC_CHV3_CODEC=m @@ -6502,12 +6538,6 @@ CONFIG_SND_SOC_PCM3060_SPI=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set CONFIG_SND_SOC_RL6231=m -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set CONFIG_SND_SOC_RT1017_SDCA_SDW=m # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -6530,6 +6560,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m CONFIG_SND_SOC_RT715_SDW=m CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +CONFIG_SND_SOC_RTQ9128=m # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6969,6 +7000,7 @@ CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX_DEBUG is not set CONFIG_TCM_QLA2XXX=m CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -7023,6 +7055,7 @@ CONFIG_TEST_LOCKUP=m # CONFIG_TEST_MEMINIT is not set CONFIG_TEST_MIN_HEAP=m # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -7087,8 +7120,6 @@ CONFIG_TIFM_7XX1=m CONFIG_TIFM_CORE=m CONFIG_TIGON3_HWMON=y CONFIG_TIGON3=m -# CONFIG_TI_ICSSG_PRUETH is not set -CONFIG_TI_ICSS_IEP=m CONFIG_TI_LMP92064=m CONFIG_TIME_KUNIT_TEST=m CONFIG_TIME_NS=y @@ -7276,6 +7307,7 @@ CONFIG_TYPEC_MUX_FSA4480=m CONFIG_TYPEC_MUX_GPIO_SBU=m CONFIG_TYPEC_MUX_NB7VPQ904M=m CONFIG_TYPEC_MUX_PI3USB30532=m +CONFIG_TYPEC_MUX_PTN36502=m CONFIG_TYPEC_NVIDIA_ALTMODE=m # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -7287,7 +7319,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m CONFIG_TYPEC_TCPM=m CONFIG_TYPEC_TPS6598X=m CONFIG_TYPEC_UCSI=m -CONFIG_TYPEC_WCOVE=m CONFIG_TYPEC_WUSB3801=m CONFIG_TYPHOON=m CONFIG_UACCE=m @@ -7368,6 +7399,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m # CONFIG_USB_CHIPIDEA is not set CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_PCI=m CONFIG_USB_CONFIGFS_F_MIDI2=y # CONFIG_USB_CONFIGFS_F_UAC1_LEGACY is not set @@ -7498,6 +7530,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m CONFIG_USB_MA901=m # CONFIG_USB_MASS_STORAGE is not set @@ -7550,6 +7583,7 @@ CONFIG_USB_ONBOARD_HUB=m # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y CONFIG_USBPCWATCHDOG=m CONFIG_USB_PEGASUS=m @@ -7752,7 +7786,7 @@ CONFIG_VIDEO_BT819=m CONFIG_VIDEO_BT848=m CONFIG_VIDEO_BT856=m CONFIG_VIDEO_BT866=m -CONFIG_VIDEO_CADENCE_CSI2RX=m +# CONFIG_VIDEO_CADENCE_CSI2RX is not set CONFIG_VIDEO_CADENCE_CSI2TX=m # CONFIG_VIDEO_CADENCE is not set # CONFIG_VIDEO_CAFE_CCIC is not set @@ -7829,10 +7863,12 @@ CONFIG_VIDEO_M52790=m CONFIG_VIDEO_MAX9286=m # CONFIG_VIDEO_MAX96712 is not set # CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set +# CONFIG_VIDEO_MGB4 is not set CONFIG_VIDEO_ML86V7667=m CONFIG_VIDEO_MSP3400=m CONFIG_VIDEO_MT9M001=m # CONFIG_VIDEO_MT9M111 is not set +CONFIG_VIDEO_MT9M114=m CONFIG_VIDEO_MT9P031=m CONFIG_VIDEO_MT9T112=m CONFIG_VIDEO_MT9V011=m @@ -8121,6 +8157,7 @@ CONFIG_XDP_SOCKETS_DIAG=m CONFIG_XDP_SOCKETS=y # CONFIG_XEN_GRANT_DMA_ALLOC is not set CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512 +CONFIG_XEN_PRIVCMD_EVENTFD=y CONFIG_XEN_PRIVCMD_IRQFD=y CONFIG_XEN_PRIVCMD=m # CONFIG_XEN_PVCALLS_FRONTEND is not set @@ -8229,19 +8266,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-ppc64le-debug-rhel.config b/SOURCES/kernel-ppc64le-debug-rhel.config index 2499a4b..64f780e 100644 --- a/SOURCES/kernel-ppc64le-debug-rhel.config +++ b/SOURCES/kernel-ppc64le-debug-rhel.config @@ -233,7 +233,6 @@ CONFIG_AQUANTIA_PHY=m CONFIG_ARCH_FORCE_MAX_ORDER=8 # CONFIG_ARCH_KEEMBAY is not set # CONFIG_ARCH_LG1K is not set -# CONFIG_ARCH_MA35 is not set CONFIG_ARCH_MEMORY_PROBE=y # CONFIG_ARCH_MESON is not set CONFIG_ARCH_MMAP_RND_BITS=14 @@ -276,6 +275,7 @@ CONFIG_ARM_SMCCC_SOC_ID=y CONFIG_ASN1=y # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -388,6 +388,7 @@ CONFIG_BASE_FULL=y # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -517,7 +518,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_BRCM_TRACING=y # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -610,7 +610,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -681,6 +680,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -758,6 +758,7 @@ CONFIG_CIFS_SMB_DIRECT=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -829,7 +830,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -979,6 +979,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1268,6 +1273,7 @@ CONFIG_DP83TC811_PHY=m # CONFIG_DPOT_DAC is not set # CONFIG_DPS310 is not set # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1340,6 +1346,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set # CONFIG_DRM_IMX_LCDIF is not set # CONFIG_DRM_ITE_IT6505 is not set # CONFIG_DRM_ITE_IT66121 is not set @@ -1366,36 +1373,90 @@ CONFIG_DRM_NOUVEAU=m # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1572,7 +1633,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_EEPROM_AT25 is not set # CONFIG_EEPROM_EE1004 is not set # CONFIG_EEPROM_IDT_89HPESX is not set -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1602,7 +1662,12 @@ CONFIG_ENIC=m CONFIG_EPIC100=m CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y CONFIG_ETHOC=m CONFIG_ETHTOOL_NETLINK=y @@ -1676,7 +1741,7 @@ CONFIG_FAULT_INJECTION=y # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -1774,7 +1839,9 @@ CONFIG_FS_DAX=y # CONFIG_FSL_QDMA is not set # CONFIG_FSL_RCPM is not set CONFIG_FSNOTIFY=y -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_RECORD_RECURSION is not set @@ -1783,6 +1850,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y # CONFIG_FTR_FIXUP_SELFTEST is not set +# CONFIG_FUEL_GAUGE_MM8013 is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set # CONFIG_FUNCTION_GRAPH_RETVAL is not set CONFIG_FUNCTION_GRAPH_TRACER=y @@ -1921,6 +1989,7 @@ CONFIG_GPIO_SIM=m # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set CONFIG_GUP_TEST=y CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2093,6 +2162,7 @@ CONFIG_HMM_MIRROR=y # CONFIG_HNS3_PMU is not set # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2243,6 +2313,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set # CONFIG_IAQCORE is not set CONFIG_IAVF=m # CONFIG_IB700_WDT is not set @@ -2260,6 +2331,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set # CONFIG_IEEE802154_AT86RF230 is not set @@ -2330,7 +2402,6 @@ CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2444,6 +2515,7 @@ CONFIG_INPUT_TOUCHSCREEN=y CONFIG_INPUT_UINPUT=m CONFIG_INPUT=y CONFIG_INPUT_YEALINK=m +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_INTEGRITY_AUDIT=y @@ -2486,6 +2558,7 @@ CONFIG_INTEL_SDSI=m # CONFIG_INTEL_SOC_PMIC_CHTWC is not set # CONFIG_INTEL_SOC_PMIC is not set # CONFIG_INTEL_TCC_COOLING is not set +# CONFIG_INTEL_TDX_HOST is not set # CONFIG_INTEL_TH is not set CONFIG_INTEL_UNCORE_FREQ_CONTROL=m # CONFIG_INTEL_VSC is not set @@ -2508,7 +2581,8 @@ CONFIG_IOMMU_DEBUGFS=y # CONFIG_IOMMU_DEFAULT_DMA_LAZY is not set CONFIG_IOMMU_DEFAULT_DMA_STRICT=y # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -2806,7 +2880,7 @@ CONFIG_KEY_NOTIFICATIONS=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set # CONFIG_KFENCE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -2844,6 +2918,7 @@ CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND=y # CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set CONFIG_KVM_BOOK3S_PR_POSSIBLE=y CONFIG_KVM_GUEST=y +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_PROVE_MMU=y CONFIG_KVM_SMM=y # CONFIG_KVM_WERROR is not set @@ -3013,6 +3088,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3027,6 +3103,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3035,6 +3112,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m # CONFIG_MAC80211_MESH is not set @@ -3063,7 +3141,6 @@ CONFIG_MANTIS_CORE=m CONFIG_MARVELL_10G_PHY=m # CONFIG_MARVELL_88Q2XXX_PHY is not set # CONFIG_MARVELL_88X2222_PHY is not set -CONFIG_MARVELL_GTI_WDT=y CONFIG_MARVELL_PHY=m # CONFIG_MATOM is not set # CONFIG_MAX1027 is not set @@ -3097,6 +3174,7 @@ CONFIG_MAX_SKB_FRAGS=17 # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3122,7 +3200,7 @@ CONFIG_MDIO_HISI_FEMAC=m # CONFIG_MDIO_IPQ8064 is not set CONFIG_MDIO_MSCC_MIIM=m # CONFIG_MDIO_MVUSB is not set -CONFIG_MDIO_OCTEON=m +# CONFIG_MDIO_OCTEON is not set CONFIG_MDIO_THUNDER=m CONFIG_MD_LINEAR=m # CONFIG_MD_MULTIPATH is not set @@ -3131,6 +3209,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3197,7 +3276,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3333,6 +3412,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3442,6 +3522,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3516,6 +3599,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3622,9 +3707,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -3746,6 +3828,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -3758,15 +3841,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -3795,6 +3875,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -3895,7 +3976,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4089,8 +4170,9 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_IMX_OCOTP_ELE is not set # CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set @@ -4112,7 +4194,9 @@ CONFIG_NVME_TARGET=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set CONFIG_NVRAM=m # CONFIG_NVSW_SN2201 is not set @@ -4219,6 +4303,7 @@ CONFIG_PATA_PLATFORM=m # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -4266,6 +4351,7 @@ CONFIG_PCI_STUB=y # CONFIG_PCI_SW_SWITCHTEC is not set CONFIG_PCI=y # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4290,8 +4376,6 @@ CONFIG_PHY_BCM_SR_USB=m # CONFIG_PHY_CADENCE_TORRENT is not set # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_FSL_LYNX_28G is not set # CONFIG_PHY_HI3660_USB is not set # CONFIG_PHY_HI3670_PCIE is not set @@ -4301,8 +4385,6 @@ CONFIG_PHY_BCM_SR_USB=m # CONFIG_PHY_LAN966X_SERDES is not set CONFIG_PHYLIB=y # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -4329,6 +4411,7 @@ CONFIG_PID_NS=y CONFIG_PINCTRL_ALDERLAKE=m # CONFIG_PINCTRL_BROXTON is not set # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_ELKHARTLAKE=m CONFIG_PINCTRL_EMMITSBURG=m @@ -4399,7 +4482,6 @@ CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_CPUIDLE=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set # CONFIG_POWER_RESET_GPIO_RESTART is not set CONFIG_POWER_RESET_GPIO=y # CONFIG_POWER_RESET_LTC2952 is not set @@ -4600,7 +4682,6 @@ CONFIG_QFMT_V2=y # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set # CONFIG_QRTR is not set CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -4702,6 +4783,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m # CONFIG_REGULATOR_LTC3589 is not set # CONFIG_REGULATOR_LTC3676 is not set # CONFIG_REGULATOR_MAX1586 is not set +# CONFIG_REGULATOR_MAX77503 is not set # CONFIG_REGULATOR_MAX77857 is not set # CONFIG_REGULATOR_MAX8649 is not set # CONFIG_REGULATOR_MAX8660 is not set @@ -4777,6 +4859,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -4818,7 +4901,6 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_DRV_ABEOZ9 is not set # CONFIG_RTC_DRV_ABX80X is not set CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5102,6 +5184,7 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5256,6 +5339,7 @@ CONFIG_SENSORS_LM95245=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set # CONFIG_SENSORS_LTC3815 is not set CONFIG_SENSORS_LTC4151=m @@ -5314,6 +5398,7 @@ CONFIG_SENSORS_PCF8591=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set CONFIG_SENSORS_PMBUS=m +# CONFIG_SENSORS_POWERZ is not set # CONFIG_SENSORS_POWR1220 is not set # CONFIG_SENSORS_PWM_FAN is not set # CONFIG_SENSORS_PXE1610 is not set @@ -5482,7 +5567,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -5562,6 +5646,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -5699,8 +5784,10 @@ CONFIG_SND_SEQ_UMP=y # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -5791,6 +5878,7 @@ CONFIG_SND_SOC_CX2072X=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -5905,12 +5993,6 @@ CONFIG_SND_SOC_MAX98927=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set # CONFIG_SND_SOC_RL6231 is not set -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -5933,6 +6015,7 @@ CONFIG_SND_SOC_RT1318_SDW=m # CONFIG_SND_SOC_RT715_SDW is not set CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6129,7 +6212,6 @@ CONFIG_SND_VX222=m # CONFIG_SND_XEN_FRONTEND is not set # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set # CONFIG_SOC_TI is not set CONFIG_SOFTLOCKUP_DETECTOR=y @@ -6331,6 +6413,7 @@ CONFIG_TCM_IBLOCK=m CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6384,6 +6467,7 @@ CONFIG_TEST_LIVEPATCH=m # CONFIG_TEST_MEMINIT is not set CONFIG_TEST_MIN_HEAP=m # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -6604,6 +6688,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -6680,6 +6765,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m # CONFIG_USB_CHIPIDEA is not set CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m # CONFIG_USB_CONN_GPIO is not set CONFIG_USB_CXACRU=m # CONFIG_USB_CYPRESS_CY7C63 is not set @@ -6778,6 +6864,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m # CONFIG_USB_LED_TRIG is not set CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -6823,6 +6910,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -6967,6 +7055,9 @@ CONFIG_VETH=m # CONFIG_VF610_DAC is not set CONFIG_VFAT_FS=m # CONFIG_VFIO_AMBA is not set +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO=m # CONFIG_VFIO_MDEV is not set @@ -7080,11 +7171,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -7419,19 +7512,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-ppc64le-fedora.config b/SOURCES/kernel-ppc64le-fedora.config index 544c116..df182c6 100644 --- a/SOURCES/kernel-ppc64le-fedora.config +++ b/SOURCES/kernel-ppc64le-fedora.config @@ -479,6 +479,15 @@ CONFIG_BAYCOM_SER_HDX=m # CONFIG_BCACHE_ASYNC_REGISTRATION is not set # CONFIG_BCACHE_CLOSURES_DEBUG is not set # CONFIG_BCACHE_DEBUG is not set +# CONFIG_BCACHEFS_DEBUG is not set +# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set +# CONFIG_BCACHEFS_ERASURE_CODING is not set +CONFIG_BCACHEFS_FS=m +# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set +# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set +CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_BCACHEFS_QUOTA=y +# CONFIG_BCACHEFS_TESTS is not set CONFIG_BCACHE=m CONFIG_BCM54140_PHY=m CONFIG_BCM7XXX_PHY=m @@ -620,7 +629,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set # CONFIG_BRCM_TRACING is not set CONFIG_BRIDGE_CFM=y CONFIG_BRIDGE_EBT_802_3=m @@ -728,7 +736,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -CONFIG_CAN_BXCAN=m CONFIG_CAN_CALC_BITTIMING=y CONFIG_CAN_CAN327=m # CONFIG_CAN_CC770 is not set @@ -808,6 +815,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFI_CLANG is not set CONFIG_CFS_BANDWIDTH=y @@ -896,6 +904,7 @@ CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CIO2_BRIDGE=y CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -968,7 +977,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1034,6 +1042,7 @@ CONFIG_CROS_EC_TYPEC=m CONFIG_CROS_EC_UART=m CONFIG_CROS_HPS_I2C=m CONFIG_CROS_KBD_LED_BACKLIGHT=m +CONFIG_CROS_KUNIT_EC_PROTO_TEST=m CONFIG_CROS_KUNIT=m CONFIG_CROSS_MEMORY_ATTACH=y CONFIG_CROS_TYPEC_SWITCH=m @@ -1125,6 +1134,11 @@ CONFIG_CRYPTO_GHASH=y CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1223,6 +1237,7 @@ CONFIG_DE2104X=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_CGROUP_REF is not set +# CONFIG_DEBUG_CLOSURES is not set # CONFIG_DEBUG_CREDENTIALS is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_DRIVER is not set @@ -1312,7 +1327,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_DEFAULT_SECURITY_SELINUX=y # CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set # CONFIG_DETECT_HUNG_TASK is not set -# CONFIG_DEV_APPLETALK is not set CONFIG_DEV_DAX_CXL=m CONFIG_DEV_DAX_HMEM=m CONFIG_DEV_DAX_KMEM=m @@ -1402,7 +1416,7 @@ CONFIG_DNS_RESOLVER=m CONFIG_DP83640_PHY=m CONFIG_DP83822_PHY=m CONFIG_DP83848_PHY=m -# CONFIG_DP83867_PHY is not set +CONFIG_DP83867_PHY=m CONFIG_DP83869_PHY=m # CONFIG_DP83TC811_PHY is not set # CONFIG_DP83TD510_PHY is not set @@ -1411,6 +1425,7 @@ CONFIG_DPOT_DAC=m # CONFIG_DPS310 is not set CONFIG_DRAGONRISE_FF=y # CONFIG_DRBD_FAULT_INJECTION is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL_QAIC is not set CONFIG_DRM_ACCEL=y CONFIG_DRM_AMD_ACP=y @@ -1517,9 +1532,11 @@ CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +CONFIG_DRM_PANEL_ILITEK_ILI9882T=m CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m # CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m +CONFIG_DRM_PANEL_JDI_LPM102A188A=m # CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set CONFIG_DRM_PANEL_JDI_R63452=m # CONFIG_DRM_PANEL_KHADAS_TS050 is not set @@ -1549,6 +1566,7 @@ CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set # CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set # CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m CONFIG_DRM_PANEL_RONBO_RB070D30=m CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m CONFIG_DRM_PANEL_SAMSUNG_DB7430=m @@ -1767,7 +1785,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_EEPROM_AT25 is not set CONFIG_EEPROM_EE1004=m CONFIG_EEPROM_IDT_89HPESX=m -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m CONFIG_EFI_COCO_SECRET=y CONFIG_EFI_CUSTOM_SSDT_OVERLAYS=y @@ -1880,7 +1897,7 @@ CONFIG_FAT_KUNIT_TEST=m # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -2020,6 +2037,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y # CONFIG_FTR_FIXUP_SELFTEST is not set +CONFIG_FUEL_GAUGE_MM8013=m # CONFIG_FUNCTION_ERROR_INJECTION is not set CONFIG_FUNCTION_GRAPH_RETVAL=y CONFIG_FUNCTION_GRAPH_TRACER=y @@ -2158,6 +2176,7 @@ CONFIG_GREENASIA_FF=y # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set CONFIG_GTP=m +# CONFIG_GUEST_STATE_BUFFER_TEST is not set # CONFIG_GUP_TEST is not set CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2502,6 +2521,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m CONFIG_IEEE802154_ADF7242=m # CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set @@ -2588,7 +2608,6 @@ CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y # CONFIG_IMA_SIG_TEMPLATE is not set # CONFIG_IMA_TEMPLATE is not set -# CONFIG_IMA_TRUSTED_KEYRING is not set CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2794,8 +2813,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IPC_NS=y # CONFIG_IP_DCCP is not set -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP=m CONFIG_IP_FIB_TRIE_STATS=y # CONFIG_IPMB_DEVICE_INTERFACE is not set CONFIG_IPMI_DEVICE_INTERFACE=m @@ -3104,7 +3121,7 @@ CONFIG_KEY_NOTIFICATIONS=y CONFIG_KEYS_REQUEST_CACHE=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3143,6 +3160,7 @@ CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND=y # CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set # CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set CONFIG_KVM_GUEST=y +CONFIG_KVM_MAX_NR_VCPUS=4096 # CONFIG_KVM_PROVE_MMU is not set CONFIG_KVM_SMM=y # CONFIG_KVM_WERROR is not set @@ -3200,6 +3218,7 @@ CONFIG_LEDS_GPIO=m CONFIG_LEDS_GROUP_MULTICOLOR=m # CONFIG_LEDS_IS31FL319X is not set CONFIG_LEDS_IS31FL32XX=m +CONFIG_LEDS_KTD202X=m # CONFIG_LEDS_KTD2692 is not set # CONFIG_LEDS_LGM is not set CONFIG_LEDS_LM3530=m @@ -3327,6 +3346,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock" CONFIG_LSM_MMAP_MIN_ADDR=65535 CONFIG_LTC1660=m +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3341,6 +3361,7 @@ CONFIG_LTO_NONE=y CONFIG_LTR501=m CONFIG_LTRF216A=m CONFIG_LV0104CS=m +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3349,6 +3370,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y @@ -3415,6 +3437,7 @@ CONFIG_MB1232=m # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set CONFIG_MCP3911=m CONFIG_MCP4018=m CONFIG_MCP41010=m @@ -3425,6 +3448,7 @@ CONFIG_MCP4728=m # CONFIG_MCP4922 is not set CONFIG_MCTP_SERIAL=m # CONFIG_MCTP_TRANSPORT_I2C is not set +# CONFIG_MCTP_TRANSPORT_I3C is not set CONFIG_MCTP=y CONFIG_MD_AUTODETECT=y CONFIG_MD_BITMAP_FILE=y @@ -3454,6 +3478,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m CONFIG_MEDIA_ANALOG_TV_SUPPORT=y CONFIG_MEDIA_ATTACH=y @@ -3665,18 +3690,22 @@ CONFIG_MLX4_DEBUG=y CONFIG_MLX4_EN_DCB=y CONFIG_MLX4_EN=m CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_ACCEL=y CONFIG_MLX5_CLS_ACT=y CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y CONFIG_MLX5_EN_RXNFC=y CONFIG_MLX5_EN_TLS=y CONFIG_MLX5_ESWITCH=y -# CONFIG_MLX5_FPGA is not set +# CONFIG_MLX5_FPGA_IPSEC is not set +# CONFIG_MLX5_FPGA_TLS is not set +CONFIG_MLX5_FPGA=y CONFIG_MLX5_INFINIBAND=m CONFIG_MLX5_IPSEC=y CONFIG_MLX5_MACSEC=y @@ -3776,6 +3805,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3853,6 +3885,8 @@ CONFIG_MT7915E=m CONFIG_MT7921E=m CONFIG_MT7921S=m CONFIG_MT7921U=m +CONFIG_MT7925E=m +CONFIG_MT7925U=m CONFIG_MT7996E=m # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AR7_PARTS is not set @@ -4000,9 +4034,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4075,12 +4106,12 @@ CONFIG_NETFILTER_EGRESS=y CONFIG_NETFILTER_INGRESS=y CONFIG_NETFILTER_NETLINK_ACCT=m # CONFIG_NETFILTER_NETLINK_GLUE_CT is not set -# CONFIG_NETFILTER_NETLINK_HOOK is not set +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_OSF=m CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_XTABLES_COMPAT=y +# CONFIG_NETFILTER_XTABLES_COMPAT is not set CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_CONNMARK=m CONFIG_NETFILTER_XT_MARK=m @@ -4173,6 +4204,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +CONFIG_NETKIT=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -4184,15 +4216,12 @@ CONFIG_NET_NS=y CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NETROM=m -CONFIG_NET_SCH_ATM=m CONFIG_NET_SCH_CAKE=m -CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m # CONFIG_NET_SCH_DEFAULT is not set CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -4226,6 +4255,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_NET_TEAM_MODE_RANDOM=m CONFIG_NET_TEAM_MODE_ROUNDROBIN=m +CONFIG_NET_TEST=m CONFIG_NET_TULIP=y CONFIG_NET_UDP_TUNNEL=m CONFIG_NET_VENDOR_3COM=y @@ -4360,7 +4390,7 @@ CONFIG_NFC_ST21NFCA=m # CONFIG_NFC_ST_NCI_I2C is not set # CONFIG_NFC_ST_NCI_SPI is not set CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NFC_TRF7970A=m @@ -4554,8 +4584,9 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_SHIELD_FF=y # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y CONFIG_NVME_HWMON=y CONFIG_NVMEM_LAYOUT_ONIE_TLV=m CONFIG_NVMEM_LAYOUT_SL28_VPD=m @@ -4575,7 +4606,9 @@ CONFIG_NVME_TARGET=m CONFIG_NVME_TARGET_PASSTHRU=y CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set CONFIG_NVRAM=y # CONFIG_NVSW_SN2201 is not set @@ -4697,6 +4730,7 @@ CONFIG_PATA_WINBOND=m # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -4745,6 +4779,7 @@ CONFIG_PCI_STUB=y CONFIG_PCI_SW_SWITCHTEC=m CONFIG_PCI=y CONFIG_PCNET32=m +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4769,16 +4804,12 @@ CONFIG_PHY_CADENCE_SIERRA=m CONFIG_PHY_CADENCE_TORRENT=m # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_HI3670_PCIE is not set # CONFIG_PHY_HI3670_USB is not set # CONFIG_PHY_LAN966X_SERDES is not set CONFIG_PHYLIB=y CONFIG_PHYLINK=m # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -4872,7 +4903,6 @@ CONFIG_POWERNV_CPUIDLE=y # CONFIG_POWERNV_OP_PANEL is not set # CONFIG_POWERPC64_CPU is not set # CONFIG_POWER_RESET_BRCMKONA is not set -# CONFIG_POWER_RESET_BRCMSTB is not set CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_GPIO=y # CONFIG_POWER_RESET_LINKSTATION is not set @@ -5155,7 +5185,7 @@ CONFIG_RD_ZSTD=y # CONFIG_READABLE_ASM is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_REALTEK_AUTOPM=y -CONFIG_REALTEK_PHY=y +CONFIG_REALTEK_PHY=m # CONFIG_REED_SOLOMON_TEST is not set # CONFIG_REGMAP_BUILD is not set CONFIG_REGMAP_I2C=y @@ -5187,6 +5217,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m CONFIG_REGULATOR_MAX20411=m CONFIG_REGULATOR_MAX5970=m CONFIG_REGULATOR_MAX597X=m +CONFIG_REGULATOR_MAX77503=m CONFIG_REGULATOR_MAX77650=m # CONFIG_REGULATOR_MAX77826 is not set CONFIG_REGULATOR_MAX77857=m @@ -5291,6 +5322,7 @@ CONFIG_RMI4_SPI=m CONFIG_RMNET=m # CONFIG_ROCKCHIP_PHY is not set CONFIG_ROCKER=m +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set CONFIG_ROHM_BU27034=m CONFIG_ROMFS_BACKED_BY_BLOCK=y @@ -5345,7 +5377,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_ABEOZ9=m CONFIG_RTC_DRV_ABX80X=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5658,11 +5689,12 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y # CONFIG_SECURITY_APPARMOR is not set -# CONFIG_SECURITY_DMESG_RESTRICT is not set +CONFIG_SECURITY_DMESG_RESTRICT=y CONFIG_SECURITYFS=y CONFIG_SECURITY_INFINIBAND=y CONFIG_SECURITY_LANDLOCK=y @@ -5803,6 +5835,7 @@ CONFIG_SENSORS_LTC2947_SPI=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set CONFIG_SENSORS_LTC2990=m +CONFIG_SENSORS_LTC2991=m # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m CONFIG_SENSORS_LTC4151=m @@ -5864,6 +5897,7 @@ CONFIG_SENSORS_PLI1209BC=m CONFIG_SENSORS_PLI1209BC_REGULATOR=y CONFIG_SENSORS_PM6764TR=m CONFIG_SENSORS_PMBUS=m +CONFIG_SENSORS_POWERZ=m CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -6033,7 +6067,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -6123,6 +6156,7 @@ CONFIG_SND_FM801=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -6266,8 +6300,10 @@ CONFIG_SND_SOC_AK5558=m # CONFIG_SND_SOC_ARNDALE is not set CONFIG_SND_SOC_AUDIO_IIO_AUX=m CONFIG_SND_SOC_AW8738=m +CONFIG_SND_SOC_AW87390=m CONFIG_SND_SOC_AW88261=m CONFIG_SND_SOC_AW88395=m +CONFIG_SND_SOC_AW88399=m CONFIG_SND_SOC_BD28623=m CONFIG_SND_SOC_BT_SCO=m CONFIG_SND_SOC_CHV3_CODEC=m @@ -6472,12 +6508,6 @@ CONFIG_SND_SOC_PCM3060_SPI=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set CONFIG_SND_SOC_RL6231=m -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set CONFIG_SND_SOC_RT1017_SDCA_SDW=m # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -6500,6 +6530,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m CONFIG_SND_SOC_RT715_SDW=m CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +CONFIG_SND_SOC_RTQ9128=m # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6938,6 +6969,7 @@ CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX_DEBUG is not set CONFIG_TCM_QLA2XXX=m CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6992,6 +7024,7 @@ CONFIG_TEST_KSTRTOX=y # CONFIG_TEST_MEMINIT is not set # CONFIG_TEST_MIN_HEAP is not set # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -7056,8 +7089,6 @@ CONFIG_TIFM_7XX1=m CONFIG_TIFM_CORE=m CONFIG_TIGON3_HWMON=y CONFIG_TIGON3=m -# CONFIG_TI_ICSSG_PRUETH is not set -CONFIG_TI_ICSS_IEP=m CONFIG_TI_LMP92064=m CONFIG_TIME_KUNIT_TEST=m CONFIG_TIME_NS=y @@ -7245,6 +7276,7 @@ CONFIG_TYPEC_MUX_FSA4480=m CONFIG_TYPEC_MUX_GPIO_SBU=m CONFIG_TYPEC_MUX_NB7VPQ904M=m CONFIG_TYPEC_MUX_PI3USB30532=m +CONFIG_TYPEC_MUX_PTN36502=m CONFIG_TYPEC_NVIDIA_ALTMODE=m # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -7256,7 +7288,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m CONFIG_TYPEC_TCPM=m CONFIG_TYPEC_TPS6598X=m CONFIG_TYPEC_UCSI=m -CONFIG_TYPEC_WCOVE=m CONFIG_TYPEC_WUSB3801=m CONFIG_TYPHOON=m CONFIG_UACCE=m @@ -7337,6 +7368,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m # CONFIG_USB_CHIPIDEA is not set CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_PCI=m CONFIG_USB_CONFIGFS_F_MIDI2=y # CONFIG_USB_CONFIGFS_F_UAC1_LEGACY is not set @@ -7467,6 +7499,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m CONFIG_USB_MA901=m # CONFIG_USB_MASS_STORAGE is not set @@ -7519,6 +7552,7 @@ CONFIG_USB_ONBOARD_HUB=m # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y CONFIG_USBPCWATCHDOG=m CONFIG_USB_PEGASUS=m @@ -7721,7 +7755,7 @@ CONFIG_VIDEO_BT819=m CONFIG_VIDEO_BT848=m CONFIG_VIDEO_BT856=m CONFIG_VIDEO_BT866=m -CONFIG_VIDEO_CADENCE_CSI2RX=m +# CONFIG_VIDEO_CADENCE_CSI2RX is not set CONFIG_VIDEO_CADENCE_CSI2TX=m # CONFIG_VIDEO_CADENCE is not set # CONFIG_VIDEO_CAFE_CCIC is not set @@ -7798,10 +7832,12 @@ CONFIG_VIDEO_M52790=m CONFIG_VIDEO_MAX9286=m # CONFIG_VIDEO_MAX96712 is not set # CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set +# CONFIG_VIDEO_MGB4 is not set CONFIG_VIDEO_ML86V7667=m CONFIG_VIDEO_MSP3400=m CONFIG_VIDEO_MT9M001=m # CONFIG_VIDEO_MT9M111 is not set +CONFIG_VIDEO_MT9M114=m CONFIG_VIDEO_MT9P031=m CONFIG_VIDEO_MT9T112=m CONFIG_VIDEO_MT9V011=m @@ -8090,6 +8126,7 @@ CONFIG_XDP_SOCKETS_DIAG=m CONFIG_XDP_SOCKETS=y # CONFIG_XEN_GRANT_DMA_ALLOC is not set CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512 +CONFIG_XEN_PRIVCMD_EVENTFD=y CONFIG_XEN_PRIVCMD_IRQFD=y CONFIG_XEN_PRIVCMD=m # CONFIG_XEN_PVCALLS_FRONTEND is not set @@ -8198,19 +8235,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-ppc64le-rhel.config b/SOURCES/kernel-ppc64le-rhel.config index 252be59..40b9392 100644 --- a/SOURCES/kernel-ppc64le-rhel.config +++ b/SOURCES/kernel-ppc64le-rhel.config @@ -233,7 +233,6 @@ CONFIG_AQUANTIA_PHY=m CONFIG_ARCH_FORCE_MAX_ORDER=8 # CONFIG_ARCH_KEEMBAY is not set # CONFIG_ARCH_LG1K is not set -# CONFIG_ARCH_MA35 is not set CONFIG_ARCH_MEMORY_PROBE=y # CONFIG_ARCH_MESON is not set CONFIG_ARCH_MMAP_RND_BITS=14 @@ -276,6 +275,7 @@ CONFIG_ARM_SMCCC_SOC_ID=y CONFIG_ASN1=y # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -388,6 +388,7 @@ CONFIG_BASE_FULL=y # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -517,7 +518,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set # CONFIG_BRCM_TRACING is not set # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -610,7 +610,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -681,6 +680,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y # CONFIG_CFG80211_DEBUGFS is not set CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -758,6 +758,7 @@ CONFIG_CIFS_SMB_DIRECT=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -829,7 +830,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -979,6 +979,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1260,6 +1265,7 @@ CONFIG_DP83TC811_PHY=m # CONFIG_DPOT_DAC is not set # CONFIG_DPS310 is not set # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1332,6 +1338,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set # CONFIG_DRM_IMX_LCDIF is not set # CONFIG_DRM_ITE_IT6505 is not set # CONFIG_DRM_ITE_IT66121 is not set @@ -1358,36 +1365,90 @@ CONFIG_DRM_NOUVEAU=m # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1564,7 +1625,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_EEPROM_AT25 is not set # CONFIG_EEPROM_EE1004 is not set # CONFIG_EEPROM_IDT_89HPESX is not set -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1594,7 +1654,12 @@ CONFIG_ENIC=m CONFIG_EPIC100=m CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y CONFIG_ETHOC=m CONFIG_ETHTOOL_NETLINK=y @@ -1660,7 +1725,7 @@ CONFIG_FAT_KUNIT_TEST=m # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -1758,7 +1823,9 @@ CONFIG_FS_DAX=y # CONFIG_FSL_QDMA is not set # CONFIG_FSL_RCPM is not set CONFIG_FSNOTIFY=y -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_RECORD_RECURSION is not set @@ -1767,6 +1834,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y # CONFIG_FTR_FIXUP_SELFTEST is not set +# CONFIG_FUEL_GAUGE_MM8013 is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set # CONFIG_FUNCTION_GRAPH_RETVAL is not set CONFIG_FUNCTION_GRAPH_TRACER=y @@ -1905,6 +1973,7 @@ CONFIG_GPIO_SIM=m # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set # CONFIG_GUP_TEST is not set CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2077,6 +2146,7 @@ CONFIG_HMM_MIRROR=y # CONFIG_HNS3_PMU is not set # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2227,6 +2297,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set # CONFIG_IAQCORE is not set CONFIG_IAVF=m # CONFIG_IB700_WDT is not set @@ -2244,6 +2315,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set # CONFIG_IEEE802154_AT86RF230 is not set @@ -2314,7 +2386,6 @@ CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2428,6 +2499,7 @@ CONFIG_INPUT_TOUCHSCREEN=y CONFIG_INPUT_UINPUT=m CONFIG_INPUT=y CONFIG_INPUT_YEALINK=m +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_INTEGRITY_AUDIT=y @@ -2470,6 +2542,7 @@ CONFIG_INTEL_SDSI=m # CONFIG_INTEL_SOC_PMIC_CHTWC is not set # CONFIG_INTEL_SOC_PMIC is not set # CONFIG_INTEL_TCC_COOLING is not set +# CONFIG_INTEL_TDX_HOST is not set # CONFIG_INTEL_TH is not set CONFIG_INTEL_UNCORE_FREQ_CONTROL=m # CONFIG_INTEL_VSC is not set @@ -2492,7 +2565,8 @@ CONFIG_IO_EVENT_IRQ=y # CONFIG_IOMMU_DEFAULT_DMA_LAZY is not set CONFIG_IOMMU_DEFAULT_DMA_STRICT=y # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -2785,7 +2859,7 @@ CONFIG_KEY_NOTIFICATIONS=y # CONFIG_KEYS_REQUEST_CACHE is not set CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -2824,6 +2898,7 @@ CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND=y # CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set CONFIG_KVM_BOOK3S_PR_POSSIBLE=y CONFIG_KVM_GUEST=y +CONFIG_KVM_MAX_NR_VCPUS=4096 # CONFIG_KVM_PROVE_MMU is not set CONFIG_KVM_SMM=y # CONFIG_KVM_WERROR is not set @@ -2993,6 +3068,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3007,6 +3083,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3015,6 +3092,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m # CONFIG_MAC80211_MESH is not set @@ -3043,7 +3121,6 @@ CONFIG_MANTIS_CORE=m CONFIG_MARVELL_10G_PHY=m # CONFIG_MARVELL_88Q2XXX_PHY is not set # CONFIG_MARVELL_88X2222_PHY is not set -CONFIG_MARVELL_GTI_WDT=y CONFIG_MARVELL_PHY=m # CONFIG_MATOM is not set # CONFIG_MAX1027 is not set @@ -3077,6 +3154,7 @@ CONFIG_MAX_SKB_FRAGS=17 # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3102,7 +3180,7 @@ CONFIG_MDIO_HISI_FEMAC=m # CONFIG_MDIO_IPQ8064 is not set CONFIG_MDIO_MSCC_MIIM=m # CONFIG_MDIO_MVUSB is not set -CONFIG_MDIO_OCTEON=m +# CONFIG_MDIO_OCTEON is not set CONFIG_MDIO_THUNDER=m CONFIG_MD_LINEAR=m # CONFIG_MD_MULTIPATH is not set @@ -3111,6 +3189,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3177,7 +3256,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3313,6 +3392,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3422,6 +3502,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3496,6 +3579,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3602,9 +3687,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -3726,6 +3808,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -3738,15 +3821,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -3775,6 +3855,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -3875,7 +3956,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4069,8 +4150,9 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_IMX_OCOTP_ELE is not set # CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set @@ -4092,7 +4174,9 @@ CONFIG_NVME_TARGET=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set CONFIG_NVRAM=m # CONFIG_NVSW_SN2201 is not set @@ -4198,6 +4282,7 @@ CONFIG_PATA_PLATFORM=m # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -4245,6 +4330,7 @@ CONFIG_PCI_STUB=y # CONFIG_PCI_SW_SWITCHTEC is not set CONFIG_PCI=y # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4269,8 +4355,6 @@ CONFIG_PHY_BCM_SR_USB=m # CONFIG_PHY_CADENCE_TORRENT is not set # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_FSL_LYNX_28G is not set # CONFIG_PHY_HI3660_USB is not set # CONFIG_PHY_HI3670_PCIE is not set @@ -4280,8 +4364,6 @@ CONFIG_PHY_BCM_SR_USB=m # CONFIG_PHY_LAN966X_SERDES is not set CONFIG_PHYLIB=y # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -4308,6 +4390,7 @@ CONFIG_PID_NS=y CONFIG_PINCTRL_ALDERLAKE=m # CONFIG_PINCTRL_BROXTON is not set # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_ELKHARTLAKE=m CONFIG_PINCTRL_EMMITSBURG=m @@ -4378,7 +4461,6 @@ CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_CPUIDLE=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set # CONFIG_POWER_RESET_GPIO_RESTART is not set CONFIG_POWER_RESET_GPIO=y # CONFIG_POWER_RESET_LTC2952 is not set @@ -4579,7 +4661,6 @@ CONFIG_QFMT_V2=y # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set # CONFIG_QRTR is not set CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -4681,6 +4762,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m # CONFIG_REGULATOR_LTC3589 is not set # CONFIG_REGULATOR_LTC3676 is not set # CONFIG_REGULATOR_MAX1586 is not set +# CONFIG_REGULATOR_MAX77503 is not set # CONFIG_REGULATOR_MAX77857 is not set # CONFIG_REGULATOR_MAX8649 is not set # CONFIG_REGULATOR_MAX8660 is not set @@ -4756,6 +4838,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -4797,7 +4880,6 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_DRV_ABEOZ9 is not set # CONFIG_RTC_DRV_ABX80X is not set CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5081,6 +5163,7 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5235,6 +5318,7 @@ CONFIG_SENSORS_LM95245=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set # CONFIG_SENSORS_LTC3815 is not set CONFIG_SENSORS_LTC4151=m @@ -5293,6 +5377,7 @@ CONFIG_SENSORS_PCF8591=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set CONFIG_SENSORS_PMBUS=m +# CONFIG_SENSORS_POWERZ is not set # CONFIG_SENSORS_POWR1220 is not set # CONFIG_SENSORS_PWM_FAN is not set # CONFIG_SENSORS_PXE1610 is not set @@ -5461,7 +5546,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -5541,6 +5625,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -5677,8 +5762,10 @@ CONFIG_SND_SEQ_UMP=y # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -5769,6 +5856,7 @@ CONFIG_SND_SOC_CX2072X=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -5883,12 +5971,6 @@ CONFIG_SND_SOC_MAX98927=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set # CONFIG_SND_SOC_RL6231 is not set -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -5911,6 +5993,7 @@ CONFIG_SND_SOC_RT1318_SDW=m # CONFIG_SND_SOC_RT715_SDW is not set CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6106,7 +6189,6 @@ CONFIG_SND_VX222=m # CONFIG_SND_XEN_FRONTEND is not set # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set # CONFIG_SOC_TI is not set CONFIG_SOFTLOCKUP_DETECTOR=y @@ -6308,6 +6390,7 @@ CONFIG_TCM_IBLOCK=m CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6361,6 +6444,7 @@ CONFIG_TEST_LIVEPATCH=m # CONFIG_TEST_MEMINIT is not set # CONFIG_TEST_MIN_HEAP is not set # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -6581,6 +6665,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -6657,6 +6742,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m # CONFIG_USB_CHIPIDEA is not set CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m # CONFIG_USB_CONN_GPIO is not set CONFIG_USB_CXACRU=m # CONFIG_USB_CYPRESS_CY7C63 is not set @@ -6755,6 +6841,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m # CONFIG_USB_LED_TRIG is not set CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -6800,6 +6887,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -6944,6 +7032,9 @@ CONFIG_VETH=m # CONFIG_VF610_DAC is not set CONFIG_VFAT_FS=m # CONFIG_VFIO_AMBA is not set +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO=m # CONFIG_VFIO_MDEV is not set @@ -7057,11 +7148,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -7396,19 +7489,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-s390x-debug-fedora.config b/SOURCES/kernel-s390x-debug-fedora.config index 2057166..e8b901f 100644 --- a/SOURCES/kernel-s390x-debug-fedora.config +++ b/SOURCES/kernel-s390x-debug-fedora.config @@ -484,6 +484,15 @@ CONFIG_BAYCOM_SER_HDX=m # CONFIG_BCACHE_ASYNC_REGISTRATION is not set # CONFIG_BCACHE_CLOSURES_DEBUG is not set # CONFIG_BCACHE_DEBUG is not set +CONFIG_BCACHEFS_DEBUG_TRANSACTIONS=y +CONFIG_BCACHEFS_DEBUG=y +# CONFIG_BCACHEFS_ERASURE_CODING is not set +CONFIG_BCACHEFS_FS=m +CONFIG_BCACHEFS_LOCK_TIME_STATS=y +# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set +CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_BCACHEFS_QUOTA=y +# CONFIG_BCACHEFS_TESTS is not set CONFIG_BCACHE=m CONFIG_BCM54140_PHY=m CONFIG_BCM7XXX_PHY=m @@ -624,7 +633,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_BRCM_TRACING=y CONFIG_BRIDGE_CFM=y CONFIG_BRIDGE_EBT_802_3=m @@ -732,7 +740,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -CONFIG_CAN_BXCAN=m CONFIG_CAN_CALC_BITTIMING=y CONFIG_CAN_CAN327=m # CONFIG_CAN_CC770 is not set @@ -815,6 +822,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFI_CLANG is not set CONFIG_CFS_BANDWIDTH=y @@ -905,6 +913,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CIO2_BRIDGE=y # CONFIG_CIO_INJECT is not set CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -976,7 +985,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1042,6 +1050,7 @@ CONFIG_CROS_EC_TYPEC=m CONFIG_CROS_EC_UART=m CONFIG_CROS_HPS_I2C=m CONFIG_CROS_KBD_LED_BACKLIGHT=m +CONFIG_CROS_KUNIT_EC_PROTO_TEST=m CONFIG_CROS_KUNIT=m CONFIG_CROSS_MEMORY_ATTACH=y CONFIG_CROS_TYPEC_SWITCH=m @@ -1128,6 +1137,11 @@ CONFIG_CRYPTO_GHASH=y CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1234,6 +1248,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_CGROUP_REF is not set +# CONFIG_DEBUG_CLOSURES is not set CONFIG_DEBUG_CREDENTIALS=y # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_DRIVER is not set @@ -1329,7 +1344,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_DEFAULT_SECURITY_SELINUX=y # CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEV_APPLETALK is not set CONFIG_DEV_DAX_CXL=m CONFIG_DEV_DAX_HMEM=m CONFIG_DEV_DAX_KMEM=m @@ -1418,7 +1432,7 @@ CONFIG_DNS_RESOLVER=m # CONFIG_DP83640_PHY is not set CONFIG_DP83822_PHY=m CONFIG_DP83848_PHY=m -# CONFIG_DP83867_PHY is not set +CONFIG_DP83867_PHY=m CONFIG_DP83869_PHY=m # CONFIG_DP83TC811_PHY is not set # CONFIG_DP83TD510_PHY is not set @@ -1427,6 +1441,7 @@ CONFIG_DPOT_DAC=m # CONFIG_DPS310 is not set CONFIG_DRAGONRISE_FF=y CONFIG_DRBD_FAULT_INJECTION=y +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL_QAIC is not set CONFIG_DRM_ACCEL=y CONFIG_DRM_AMD_ACP=y @@ -1532,9 +1547,11 @@ CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +CONFIG_DRM_PANEL_ILITEK_ILI9882T=m CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m # CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m +CONFIG_DRM_PANEL_JDI_LPM102A188A=m # CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set CONFIG_DRM_PANEL_JDI_R63452=m # CONFIG_DRM_PANEL_KHADAS_TS050 is not set @@ -1564,6 +1581,7 @@ CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set # CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set # CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m CONFIG_DRM_PANEL_RONBO_RB070D30=m CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m CONFIG_DRM_PANEL_SAMSUNG_DB7430=m @@ -1778,7 +1796,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_EEPROM_AT25 is not set CONFIG_EEPROM_EE1004=m CONFIG_EEPROM_IDT_89HPESX=m -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m CONFIG_EFI_COCO_SECRET=y CONFIG_EFI_CUSTOM_SSDT_OVERLAYS=y @@ -1901,7 +1918,7 @@ CONFIG_FAULT_INJECTION=y # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -1912,6 +1929,7 @@ CONFIG_FB_EFI=y # CONFIG_FB_IBM_GXT4500 is not set # CONFIG_FB_IMSTT is not set # CONFIG_FB_IMX is not set +# CONFIG_FB is not set # CONFIG_FB_KYRO is not set # CONFIG_FB_LE80578 is not set # CONFIG_FB_MATROX_G is not set @@ -1952,7 +1970,6 @@ CONFIG_FB_VESA=y # CONFIG_FB_VOODOO1 is not set # CONFIG_FB_VT8623 is not set # CONFIG_FB_XILINX is not set -CONFIG_FB=y # CONFIG_FCOE is not set # CONFIG_FDDI is not set # CONFIG_FEALNX is not set @@ -2031,6 +2048,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +CONFIG_FUEL_GAUGE_MM8013=m # CONFIG_FUNCTION_ERROR_INJECTION is not set CONFIG_FUNCTION_GRAPH_RETVAL=y CONFIG_FUNCTION_GRAPH_TRACER=y @@ -2496,6 +2514,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m CONFIG_IEEE802154_ADF7242=m # CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set @@ -2581,7 +2600,6 @@ CONFIG_IMA_NG_TEMPLATE=y CONFIG_IMA_READ_POLICY=y # CONFIG_IMA_SIG_TEMPLATE is not set # CONFIG_IMA_TEMPLATE is not set -# CONFIG_IMA_TRUSTED_KEYRING is not set CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2785,8 +2803,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IPC_NS=y # CONFIG_IP_DCCP is not set -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP=m CONFIG_IP_FIB_TRIE_STATS=y # CONFIG_IPMB_DEVICE_INTERFACE is not set CONFIG_IPMI_DEVICE_INTERFACE=m @@ -3103,7 +3119,7 @@ CONFIG_KEYS_REQUEST_CACHE=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set # CONFIG_KFENCE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3134,6 +3150,7 @@ CONFIG_KUNIT_EXAMPLE_TEST=m CONFIG_KUNIT=m CONFIG_KUNIT_TEST=m CONFIG_KVM=m +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_PROVE_MMU=y # CONFIG_KVM_S390_UCONTROL is not set CONFIG_KVM_SMM=y @@ -3190,6 +3207,7 @@ CONFIG_LEDS_GPIO=m CONFIG_LEDS_GROUP_MULTICOLOR=m # CONFIG_LEDS_IS31FL319X is not set CONFIG_LEDS_IS31FL32XX=m +CONFIG_LEDS_KTD202X=m # CONFIG_LEDS_KTD2692 is not set # CONFIG_LEDS_LGM is not set CONFIG_LEDS_LM3530=m @@ -3315,6 +3333,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock" CONFIG_LSM_MMAP_MIN_ADDR=65535 CONFIG_LTC1660=m +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3329,6 +3348,7 @@ CONFIG_LTO_NONE=y CONFIG_LTR501=m CONFIG_LTRF216A=m CONFIG_LV0104CS=m +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3338,6 +3358,7 @@ CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m # CONFIG_MAC80211 is not set +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211_MESH=y CONFIG_MAC80211_MESSAGE_TRACING=y @@ -3414,6 +3435,7 @@ CONFIG_MB1232=m # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set CONFIG_MCP3911=m CONFIG_MCP4018=m CONFIG_MCP41010=m @@ -3424,6 +3446,7 @@ CONFIG_MCP4728=m # CONFIG_MCP4922 is not set CONFIG_MCTP_SERIAL=m # CONFIG_MCTP_TRANSPORT_I2C is not set +# CONFIG_MCTP_TRANSPORT_I3C is not set CONFIG_MCTP=y CONFIG_MD_AUTODETECT=y CONFIG_MD_BITMAP_FILE=y @@ -3453,6 +3476,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m CONFIG_MEDIA_ANALOG_TV_SUPPORT=y CONFIG_MEDIA_ATTACH=y @@ -3664,18 +3688,22 @@ CONFIG_MLX4_DEBUG=y CONFIG_MLX4_EN_DCB=y CONFIG_MLX4_EN=m CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_ACCEL=y CONFIG_MLX5_CLS_ACT=y CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y CONFIG_MLX5_EN_RXNFC=y CONFIG_MLX5_EN_TLS=y CONFIG_MLX5_ESWITCH=y -# CONFIG_MLX5_FPGA is not set +# CONFIG_MLX5_FPGA_IPSEC is not set +# CONFIG_MLX5_FPGA_TLS is not set +CONFIG_MLX5_FPGA=y CONFIG_MLX5_INFINIBAND=m CONFIG_MLX5_IPSEC=y CONFIG_MLX5_MACSEC=y @@ -3775,6 +3803,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3852,6 +3883,8 @@ CONFIG_MT7915E=m CONFIG_MT7921E=m CONFIG_MT7921S=m CONFIG_MT7921U=m +CONFIG_MT7925E=m +CONFIG_MT7925U=m CONFIG_MT7996E=m # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AR7_PARTS is not set @@ -3997,9 +4030,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4072,12 +4102,12 @@ CONFIG_NETFILTER_EGRESS=y CONFIG_NETFILTER_INGRESS=y CONFIG_NETFILTER_NETLINK_ACCT=m # CONFIG_NETFILTER_NETLINK_GLUE_CT is not set -# CONFIG_NETFILTER_NETLINK_HOOK is not set +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_OSF=m CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_XTABLES_COMPAT=y +# CONFIG_NETFILTER_XTABLES_COMPAT is not set CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_CONNMARK=m CONFIG_NETFILTER_XT_MARK=m @@ -4171,6 +4201,7 @@ CONFIG_NET_IPVTI=m # CONFIG_NETIUCV is not set CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +CONFIG_NETKIT=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -4182,15 +4213,12 @@ CONFIG_NET_NS=y CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NETROM=m -CONFIG_NET_SCH_ATM=m CONFIG_NET_SCH_CAKE=m -CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m # CONFIG_NET_SCH_DEFAULT is not set CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -4224,6 +4252,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_NET_TEAM_MODE_RANDOM=m CONFIG_NET_TEAM_MODE_ROUNDROBIN=m +CONFIG_NET_TEST=m CONFIG_NET_TULIP=y CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -4357,7 +4386,7 @@ CONFIG_NFC_ST21NFCA=m # CONFIG_NFC_ST_NCI_I2C is not set # CONFIG_NFC_ST_NCI_SPI is not set CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NFC_TRF7970A=m @@ -4550,8 +4579,9 @@ CONFIG_NUMA=y # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_SHIELD_FF=y # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y CONFIG_NVME_HWMON=y CONFIG_NVMEM_LAYOUT_ONIE_TLV=m CONFIG_NVMEM_LAYOUT_SL28_VPD=m @@ -4571,7 +4601,9 @@ CONFIG_NVME_TARGET=m CONFIG_NVME_TARGET_PASSTHRU=y CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -4688,6 +4720,7 @@ CONFIG_PATA_WINBOND=m # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -4737,6 +4770,7 @@ CONFIG_PCI_STUB=y CONFIG_PCI_SW_SWITCHTEC=m CONFIG_PCI=y CONFIG_PCNET32=m +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4762,16 +4796,12 @@ CONFIG_PHY_CADENCE_SIERRA=m CONFIG_PHY_CADENCE_TORRENT=m # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_HI3670_PCIE is not set # CONFIG_PHY_HI3670_USB is not set # CONFIG_PHY_LAN966X_SERDES is not set CONFIG_PHYLIB=y CONFIG_PHYLINK=m # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -4858,7 +4888,6 @@ CONFIG_POSIX_TIMERS=y CONFIG_POWERCAP=y CONFIG_POWER_MLXBF=m # CONFIG_POWER_RESET_BRCMKONA is not set -# CONFIG_POWER_RESET_BRCMSTB is not set # CONFIG_POWER_RESET is not set # CONFIG_POWER_RESET_LINKSTATION is not set # CONFIG_POWER_RESET_LTC2952 is not set @@ -5106,7 +5135,7 @@ CONFIG_RD_ZSTD=y # CONFIG_READABLE_ASM is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_REALTEK_AUTOPM=y -CONFIG_REALTEK_PHY=y +CONFIG_REALTEK_PHY=m # CONFIG_REED_SOLOMON_TEST is not set # CONFIG_REGMAP_BUILD is not set CONFIG_REGMAP_I2C=m @@ -5138,6 +5167,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m CONFIG_REGULATOR_MAX20411=m CONFIG_REGULATOR_MAX5970=m CONFIG_REGULATOR_MAX597X=m +CONFIG_REGULATOR_MAX77503=m CONFIG_REGULATOR_MAX77650=m # CONFIG_REGULATOR_MAX77826 is not set CONFIG_REGULATOR_MAX77857=m @@ -5243,6 +5273,7 @@ CONFIG_RMI4_SPI=m CONFIG_RMNET=m # CONFIG_ROCKCHIP_PHY is not set CONFIG_ROCKER=m +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set CONFIG_ROHM_BU27034=m CONFIG_ROMFS_BACKED_BY_BLOCK=y @@ -5295,7 +5326,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_ABEOZ9=m CONFIG_RTC_DRV_ABX80X=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5619,11 +5649,12 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y # CONFIG_SECURITY_APPARMOR is not set -# CONFIG_SECURITY_DMESG_RESTRICT is not set +CONFIG_SECURITY_DMESG_RESTRICT=y CONFIG_SECURITYFS=y CONFIG_SECURITY_INFINIBAND=y CONFIG_SECURITY_LANDLOCK=y @@ -5763,6 +5794,7 @@ CONFIG_SENSORS_LTC2947_SPI=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set CONFIG_SENSORS_LTC2990=m +CONFIG_SENSORS_LTC2991=m # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m CONFIG_SENSORS_LTC4151=m @@ -5823,6 +5855,7 @@ CONFIG_SENSORS_PLI1209BC=m CONFIG_SENSORS_PLI1209BC_REGULATOR=y CONFIG_SENSORS_PM6764TR=m CONFIG_SENSORS_PMBUS=m +CONFIG_SENSORS_POWERZ=m CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -5990,7 +6023,6 @@ CONFIG_SLIP_COMPRESSED=y # CONFIG_SLIP is not set # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -6082,6 +6114,7 @@ CONFIG_SND_FIREWORKS=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -6226,8 +6259,10 @@ CONFIG_SND_SOC_AK5558=m # CONFIG_SND_SOC_ARNDALE is not set CONFIG_SND_SOC_AUDIO_IIO_AUX=m CONFIG_SND_SOC_AW8738=m +CONFIG_SND_SOC_AW87390=m CONFIG_SND_SOC_AW88261=m CONFIG_SND_SOC_AW88395=m +CONFIG_SND_SOC_AW88399=m CONFIG_SND_SOC_BD28623=m CONFIG_SND_SOC_BT_SCO=m CONFIG_SND_SOC_CHV3_CODEC=m @@ -6432,12 +6467,6 @@ CONFIG_SND_SOC_PCM3060_SPI=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set CONFIG_SND_SOC_RL6231=m -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set CONFIG_SND_SOC_RT1017_SDCA_SDW=m # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -6460,6 +6489,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m CONFIG_SND_SOC_RT715_SDW=m CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +CONFIG_SND_SOC_RTQ9128=m # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6895,6 +6925,7 @@ CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX_DEBUG is not set CONFIG_TCM_QLA2XXX=m CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6949,6 +6980,7 @@ CONFIG_TEST_LOCKUP=m # CONFIG_TEST_MEMINIT is not set CONFIG_TEST_MIN_HEAP=m # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -7012,8 +7044,6 @@ CONFIG_TIFM_7XX1=m # CONFIG_TIFM_CORE is not set CONFIG_TIGON3_HWMON=y CONFIG_TIGON3=m -# CONFIG_TI_ICSSG_PRUETH is not set -CONFIG_TI_ICSS_IEP=m CONFIG_TI_LMP92064=m CONFIG_TIME_KUNIT_TEST=m CONFIG_TIME_NS=y @@ -7216,6 +7246,7 @@ CONFIG_TYPEC_MUX_FSA4480=m CONFIG_TYPEC_MUX_GPIO_SBU=m CONFIG_TYPEC_MUX_NB7VPQ904M=m CONFIG_TYPEC_MUX_PI3USB30532=m +CONFIG_TYPEC_MUX_PTN36502=m CONFIG_TYPEC_NVIDIA_ALTMODE=m # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -7227,7 +7258,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m CONFIG_TYPEC_TCPM=m CONFIG_TYPEC_TPS6598X=m CONFIG_TYPEC_UCSI=m -CONFIG_TYPEC_WCOVE=m CONFIG_TYPEC_WUSB3801=m CONFIG_TYPHOON=m CONFIG_UACCE=m @@ -7307,6 +7337,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m # CONFIG_USB_CHIPIDEA is not set CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_PCI=m CONFIG_USB_CONFIGFS_F_MIDI2=y # CONFIG_USB_CONFIGFS_F_UAC1_LEGACY is not set @@ -7436,6 +7467,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m CONFIG_USB_MA901=m # CONFIG_USB_MASS_STORAGE is not set @@ -7485,6 +7517,7 @@ CONFIG_USB_ONBOARD_HUB=m # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y CONFIG_USBPCWATCHDOG=m CONFIG_USB_PEGASUS=m @@ -7689,7 +7722,7 @@ CONFIG_VIDEO_BT819=m CONFIG_VIDEO_BT848=m CONFIG_VIDEO_BT856=m CONFIG_VIDEO_BT866=m -CONFIG_VIDEO_CADENCE_CSI2RX=m +# CONFIG_VIDEO_CADENCE_CSI2RX is not set CONFIG_VIDEO_CADENCE_CSI2TX=m # CONFIG_VIDEO_CADENCE is not set # CONFIG_VIDEO_CAFE_CCIC is not set @@ -7766,10 +7799,12 @@ CONFIG_VIDEO_M52790=m CONFIG_VIDEO_MAX9286=m # CONFIG_VIDEO_MAX96712 is not set # CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set +# CONFIG_VIDEO_MGB4 is not set CONFIG_VIDEO_ML86V7667=m CONFIG_VIDEO_MSP3400=m CONFIG_VIDEO_MT9M001=m # CONFIG_VIDEO_MT9M111 is not set +CONFIG_VIDEO_MT9M114=m CONFIG_VIDEO_MT9P031=m CONFIG_VIDEO_MT9T112=m CONFIG_VIDEO_MT9V011=m @@ -8059,6 +8094,7 @@ CONFIG_XDP_SOCKETS_DIAG=m CONFIG_XDP_SOCKETS=y # CONFIG_XEN_GRANT_DMA_ALLOC is not set CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512 +CONFIG_XEN_PRIVCMD_EVENTFD=y CONFIG_XEN_PRIVCMD_IRQFD=y CONFIG_XEN_PRIVCMD=m # CONFIG_XEN_PVCALLS_FRONTEND is not set @@ -8166,19 +8202,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-s390x-debug-rhel.config b/SOURCES/kernel-s390x-debug-rhel.config index 03d4e91..4d051d7 100644 --- a/SOURCES/kernel-s390x-debug-rhel.config +++ b/SOURCES/kernel-s390x-debug-rhel.config @@ -233,7 +233,6 @@ CONFIG_AQUANTIA_PHY=m # CONFIG_ARCH_BITMAIN is not set # CONFIG_ARCH_KEEMBAY is not set # CONFIG_ARCH_LG1K is not set -# CONFIG_ARCH_MA35 is not set # CONFIG_ARCH_MESON is not set CONFIG_ARCH_MMAP_RND_BITS=28 CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8 @@ -275,6 +274,7 @@ CONFIG_ARM_SMCCC_SOC_ID=y CONFIG_ASN1=y # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -387,6 +387,7 @@ CONFIG_BASE_FULL=y # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -515,7 +516,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_BRCM_TRACING=y # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -608,7 +608,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -682,6 +681,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -761,6 +761,7 @@ CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y # CONFIG_CIO_INJECT is not set CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -830,7 +831,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -895,7 +895,6 @@ CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m # CONFIG_CRYPTO_AEGIS128 is not set # CONFIG_CRYPTO_AES_ARM64 is not set -CONFIG_CRYPTO_AES_GCM_P10=y CONFIG_CRYPTO_AES_S390=m # CONFIG_CRYPTO_AES_TI is not set CONFIG_CRYPTO_AES=y @@ -912,7 +911,6 @@ CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_CCM=y CONFIG_CRYPTO_CFB=y CONFIG_CRYPTO_CHACHA20=m -# CONFIG_CRYPTO_CHACHA20_P10 is not set CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CHACHA_S390=y CONFIG_CRYPTO_CMAC=y @@ -977,6 +975,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1000,7 +1003,6 @@ CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_POLY1305=m -# CONFIG_CRYPTO_POLY1305_P10 is not set # CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RSA=y @@ -1274,6 +1276,7 @@ CONFIG_DP83TC811_PHY=m # CONFIG_DPOT_DAC is not set # CONFIG_DPS310 is not set # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1346,6 +1349,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set # CONFIG_DRM_IMX_LCDIF is not set # CONFIG_DRM_ITE_IT6505 is not set # CONFIG_DRM_ITE_IT66121 is not set @@ -1373,36 +1377,90 @@ CONFIG_DRM_NOUVEAU_BACKLIGHT=y # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1577,7 +1635,6 @@ CONFIG_EDAC_PND2=m # CONFIG_EEPROM_AT25 is not set # CONFIG_EEPROM_EE1004 is not set # CONFIG_EEPROM_IDT_89HPESX is not set -# CONFIG_EEPROM_LEGACY is not set # CONFIG_EEPROM_MAX6875 is not set # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1607,7 +1664,12 @@ CONFIG_ENIC=m CONFIG_EPIC100=m CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y # CONFIG_ETHOC is not set CONFIG_ETHTOOL_NETLINK=y @@ -1683,7 +1745,7 @@ CONFIG_FAULT_INJECTION=y # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -1694,9 +1756,9 @@ CONFIG_FB_EFI=y # CONFIG_FB_IBM_GXT4500 is not set # CONFIG_FB_IMSTT is not set # CONFIG_FB_IMX is not set +# CONFIG_FB is not set # CONFIG_FB_KYRO is not set # CONFIG_FB_LE80578 is not set -CONFIG_FB=m # CONFIG_FB_MATROX_G is not set # CONFIG_FB_MATROX_I2C is not set # CONFIG_FB_MATROX is not set @@ -1780,7 +1842,9 @@ CONFIG_FS_DAX=y # CONFIG_FSL_QDMA is not set # CONFIG_FSL_RCPM is not set CONFIG_FSNOTIFY=y -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_RECORD_RECURSION is not set @@ -1788,6 +1852,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +# CONFIG_FUEL_GAUGE_MM8013 is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set # CONFIG_FUNCTION_GRAPH_RETVAL is not set CONFIG_FUNCTION_GRAPH_TRACER=y @@ -1924,6 +1989,7 @@ CONFIG_GPIO_SIM=m # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set CONFIG_GUP_TEST=y CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2095,6 +2161,7 @@ CONFIG_HMC_DRV=m # CONFIG_HNS3_PMU is not set # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2234,6 +2301,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set # CONFIG_IAQCORE is not set CONFIG_IAVF=m # CONFIG_IB700_WDT is not set @@ -2248,6 +2316,7 @@ CONFIG_ICE_SWITCHDEV=y # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set # CONFIG_IEEE802154_AT86RF230 is not set @@ -2317,7 +2386,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10 CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2431,6 +2499,7 @@ CONFIG_INPUT_SPARSEKMAP=m CONFIG_INPUT_UINPUT=m CONFIG_INPUT=y CONFIG_INPUT_YEALINK=m +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_INTEGRITY_AUDIT=y @@ -2473,6 +2542,7 @@ CONFIG_INTEL_SDSI=m # CONFIG_INTEL_SOC_PMIC_CHTWC is not set # CONFIG_INTEL_SOC_PMIC is not set # CONFIG_INTEL_TCC_COOLING is not set +# CONFIG_INTEL_TDX_HOST is not set # CONFIG_INTEL_TH is not set CONFIG_INTEL_UNCORE_FREQ_CONTROL=m # CONFIG_INTEL_VSC is not set @@ -2494,7 +2564,8 @@ CONFIG_IOMMU_DEBUGFS=y # CONFIG_IOMMU_DEFAULT_DMA_LAZY is not set CONFIG_IOMMU_DEFAULT_DMA_STRICT=y # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -2793,7 +2864,7 @@ CONFIG_KEY_NOTIFICATIONS=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set # CONFIG_KFENCE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -2824,6 +2895,7 @@ CONFIG_KVM_AMD_SEV=y # CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set # CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set CONFIG_KVM=m +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_PROVE_MMU=y # CONFIG_KVM_S390_UCONTROL is not set CONFIG_KVM_SMM=y @@ -2990,6 +3062,7 @@ CONFIG_LRU_GEN=y CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3004,6 +3077,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y # CONFIG_LXT_PHY is not set @@ -3013,6 +3087,7 @@ CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m # CONFIG_MAC80211 is not set +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_MESSAGE_TRACING=y @@ -3049,7 +3124,6 @@ CONFIG_MARCH_Z14=y CONFIG_MARVELL_10G_PHY=m # CONFIG_MARVELL_88Q2XXX_PHY is not set # CONFIG_MARVELL_88X2222_PHY is not set -CONFIG_MARVELL_GTI_WDT=y # CONFIG_MARVELL_PHY is not set # CONFIG_MATOM is not set # CONFIG_MAX1027 is not set @@ -3084,6 +3158,7 @@ CONFIG_MAX_SKB_FRAGS=17 # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3107,7 +3182,7 @@ CONFIG_MDIO_HISI_FEMAC=m # CONFIG_MDIO_IPQ8064 is not set CONFIG_MDIO_MSCC_MIIM=m # CONFIG_MDIO_MVUSB is not set -CONFIG_MDIO_OCTEON=m +# CONFIG_MDIO_OCTEON is not set CONFIG_MDIO_THUNDER=m CONFIG_MD_LINEAR=m # CONFIG_MD_MULTIPATH is not set @@ -3116,6 +3191,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3182,7 +3258,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3318,6 +3394,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3426,6 +3503,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3500,6 +3580,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3605,9 +3687,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -3730,6 +3809,7 @@ CONFIG_NET_IPVTI=m # CONFIG_NETIUCV is not set CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -3742,15 +3822,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -3779,6 +3856,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -3878,7 +3956,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4072,8 +4150,9 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_IMX_OCOTP_ELE is not set # CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set @@ -4095,7 +4174,9 @@ CONFIG_NVME_TARGET=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -4200,6 +4281,7 @@ CONFIG_PARTITION_ADVANCED=y # CONFIG_PCI_CNB20LE_QUIRK is not set CONFIG_PCI_DEBUG=y # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -4248,6 +4330,7 @@ CONFIG_PCI_QUIRKS=y # CONFIG_PCI_SW_SWITCHTEC is not set CONFIG_PCI=y # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4273,8 +4356,6 @@ CONFIG_PHY_BCM_SR_USB=m # CONFIG_PHY_CADENCE_TORRENT is not set # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_FSL_LYNX_28G is not set # CONFIG_PHY_HI3660_USB is not set # CONFIG_PHY_HI3670_PCIE is not set @@ -4284,8 +4365,6 @@ CONFIG_PHY_BCM_SR_USB=m # CONFIG_PHY_LAN966X_SERDES is not set # CONFIG_PHYLIB is not set # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -4311,6 +4390,7 @@ CONFIG_PID_NS=y CONFIG_PINCTRL_ALDERLAKE=m # CONFIG_PINCTRL_BROXTON is not set # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_ELKHARTLAKE=m CONFIG_PINCTRL_EMMITSBURG=m @@ -4378,7 +4458,6 @@ CONFIG_POSIX_TIMERS=y CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set # CONFIG_POWER_RESET_GPIO_RESTART is not set # CONFIG_POWER_RESET_LTC2952 is not set # CONFIG_POWER_RESET_REGULATOR is not set @@ -4542,7 +4621,6 @@ CONFIG_QLA3XXX=m # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set # CONFIG_QRTR is not set CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -4644,6 +4722,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m # CONFIG_REGULATOR_LTC3589 is not set # CONFIG_REGULATOR_LTC3676 is not set # CONFIG_REGULATOR_MAX1586 is not set +# CONFIG_REGULATOR_MAX77503 is not set # CONFIG_REGULATOR_MAX77857 is not set # CONFIG_REGULATOR_MAX8649 is not set # CONFIG_REGULATOR_MAX8660 is not set @@ -4718,6 +4797,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -4757,7 +4837,6 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_DRV_ABEOZ9 is not set # CONFIG_RTC_DRV_ABX80X is not set CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5084,6 +5163,7 @@ CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5238,6 +5318,7 @@ CONFIG_SENSORS_LM95245=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set # CONFIG_SENSORS_LTC3815 is not set CONFIG_SENSORS_LTC4151=m @@ -5296,6 +5377,7 @@ CONFIG_SENSORS_PCF8591=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set CONFIG_SENSORS_PMBUS=m +# CONFIG_SENSORS_POWERZ is not set # CONFIG_SENSORS_POWR1220 is not set # CONFIG_SENSORS_PWM_FAN is not set # CONFIG_SENSORS_PXE1610 is not set @@ -5454,7 +5536,6 @@ CONFIG_SLIP_COMPRESSED=y # CONFIG_SLIP is not set # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -5537,6 +5618,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -5674,8 +5756,10 @@ CONFIG_SND_SEQ_UMP=y # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -5766,6 +5850,7 @@ CONFIG_SND_SOC_CX2072X=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -5880,12 +5965,6 @@ CONFIG_SND_SOC_MAX98927=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set # CONFIG_SND_SOC_RL6231 is not set -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -5908,6 +5987,7 @@ CONFIG_SND_SOC_RT1318_SDW=m # CONFIG_SND_SOC_RT715_SDW is not set CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6104,7 +6184,6 @@ CONFIG_SND_VX222=m # CONFIG_SND_XEN_FRONTEND is not set # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set # CONFIG_SOC_TI is not set CONFIG_SOFTLOCKUP_DETECTOR=y @@ -6303,6 +6382,7 @@ CONFIG_TCM_IBLOCK=m CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6356,6 +6436,7 @@ CONFIG_TEST_LIVEPATCH=m # CONFIG_TEST_MEMINIT is not set CONFIG_TEST_MIN_HEAP=m # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -6590,6 +6671,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -6665,6 +6747,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m # CONFIG_USB_CHIPIDEA is not set CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m # CONFIG_USB_CONN_GPIO is not set CONFIG_USB_CXACRU=m # CONFIG_USB_CYPRESS_CY7C63 is not set @@ -6762,6 +6845,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m # CONFIG_USB_LED_TRIG is not set CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -6804,6 +6888,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -6950,6 +7035,9 @@ CONFIG_VFAT_FS=m # CONFIG_VFIO_AMBA is not set CONFIG_VFIO_AP=m CONFIG_VFIO_CCW=m +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO=m CONFIG_VFIO_MDEV=m @@ -7064,11 +7152,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -7405,19 +7495,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-s390x-fedora.config b/SOURCES/kernel-s390x-fedora.config index 1c1503d..e26a697 100644 --- a/SOURCES/kernel-s390x-fedora.config +++ b/SOURCES/kernel-s390x-fedora.config @@ -482,6 +482,15 @@ CONFIG_BAYCOM_SER_HDX=m # CONFIG_BCACHE_ASYNC_REGISTRATION is not set # CONFIG_BCACHE_CLOSURES_DEBUG is not set # CONFIG_BCACHE_DEBUG is not set +# CONFIG_BCACHEFS_DEBUG is not set +# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set +# CONFIG_BCACHEFS_ERASURE_CODING is not set +CONFIG_BCACHEFS_FS=m +# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set +# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set +CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_BCACHEFS_QUOTA=y +# CONFIG_BCACHEFS_TESTS is not set CONFIG_BCACHE=m CONFIG_BCM54140_PHY=m CONFIG_BCM7XXX_PHY=m @@ -622,7 +631,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set # CONFIG_BRCM_TRACING is not set CONFIG_BRIDGE_CFM=y CONFIG_BRIDGE_EBT_802_3=m @@ -730,7 +738,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -CONFIG_CAN_BXCAN=m CONFIG_CAN_CALC_BITTIMING=y CONFIG_CAN_CAN327=m # CONFIG_CAN_CC770 is not set @@ -813,6 +820,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFI_CLANG is not set CONFIG_CFS_BANDWIDTH=y @@ -903,6 +911,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CIO2_BRIDGE=y # CONFIG_CIO_INJECT is not set CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -974,7 +983,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1040,6 +1048,7 @@ CONFIG_CROS_EC_TYPEC=m CONFIG_CROS_EC_UART=m CONFIG_CROS_HPS_I2C=m CONFIG_CROS_KBD_LED_BACKLIGHT=m +CONFIG_CROS_KUNIT_EC_PROTO_TEST=m CONFIG_CROS_KUNIT=m CONFIG_CROSS_MEMORY_ATTACH=y CONFIG_CROS_TYPEC_SWITCH=m @@ -1126,6 +1135,11 @@ CONFIG_CRYPTO_GHASH=y CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1232,6 +1246,7 @@ CONFIG_DE2104X=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_CGROUP_REF is not set +# CONFIG_DEBUG_CLOSURES is not set # CONFIG_DEBUG_CREDENTIALS is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_DRIVER is not set @@ -1319,7 +1334,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_DEFAULT_SECURITY_SELINUX=y # CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set # CONFIG_DETECT_HUNG_TASK is not set -# CONFIG_DEV_APPLETALK is not set CONFIG_DEV_DAX_CXL=m CONFIG_DEV_DAX_HMEM=m CONFIG_DEV_DAX_KMEM=m @@ -1407,7 +1421,7 @@ CONFIG_DNS_RESOLVER=m # CONFIG_DP83640_PHY is not set CONFIG_DP83822_PHY=m CONFIG_DP83848_PHY=m -# CONFIG_DP83867_PHY is not set +CONFIG_DP83867_PHY=m CONFIG_DP83869_PHY=m # CONFIG_DP83TC811_PHY is not set # CONFIG_DP83TD510_PHY is not set @@ -1416,6 +1430,7 @@ CONFIG_DPOT_DAC=m # CONFIG_DPS310 is not set CONFIG_DRAGONRISE_FF=y # CONFIG_DRBD_FAULT_INJECTION is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL_QAIC is not set CONFIG_DRM_ACCEL=y CONFIG_DRM_AMD_ACP=y @@ -1521,9 +1536,11 @@ CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +CONFIG_DRM_PANEL_ILITEK_ILI9882T=m CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m # CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m +CONFIG_DRM_PANEL_JDI_LPM102A188A=m # CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set CONFIG_DRM_PANEL_JDI_R63452=m # CONFIG_DRM_PANEL_KHADAS_TS050 is not set @@ -1553,6 +1570,7 @@ CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set # CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set # CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m CONFIG_DRM_PANEL_RONBO_RB070D30=m CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m CONFIG_DRM_PANEL_SAMSUNG_DB7430=m @@ -1767,7 +1785,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_EEPROM_AT25 is not set CONFIG_EEPROM_EE1004=m CONFIG_EEPROM_IDT_89HPESX=m -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m CONFIG_EFI_COCO_SECRET=y CONFIG_EFI_CUSTOM_SSDT_OVERLAYS=y @@ -1882,7 +1899,7 @@ CONFIG_FAT_KUNIT_TEST=m # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -1893,6 +1910,7 @@ CONFIG_FB_EFI=y # CONFIG_FB_IBM_GXT4500 is not set # CONFIG_FB_IMSTT is not set # CONFIG_FB_IMX is not set +# CONFIG_FB is not set # CONFIG_FB_KYRO is not set # CONFIG_FB_LE80578 is not set # CONFIG_FB_MATROX_G is not set @@ -1933,7 +1951,6 @@ CONFIG_FB_VESA=y # CONFIG_FB_VOODOO1 is not set # CONFIG_FB_VT8623 is not set # CONFIG_FB_XILINX is not set -CONFIG_FB=y # CONFIG_FCOE is not set # CONFIG_FDDI is not set # CONFIG_FEALNX is not set @@ -2012,6 +2029,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +CONFIG_FUEL_GAUGE_MM8013=m # CONFIG_FUNCTION_ERROR_INJECTION is not set CONFIG_FUNCTION_GRAPH_RETVAL=y CONFIG_FUNCTION_GRAPH_TRACER=y @@ -2476,6 +2494,7 @@ CONFIG_ICPLUS_PHY=m # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m CONFIG_IEEE802154_ADF7242=m # CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set @@ -2561,7 +2580,6 @@ CONFIG_IMA_NG_TEMPLATE=y CONFIG_IMA_READ_POLICY=y # CONFIG_IMA_SIG_TEMPLATE is not set # CONFIG_IMA_TEMPLATE is not set -# CONFIG_IMA_TRUSTED_KEYRING is not set CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2765,8 +2783,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IPC_NS=y # CONFIG_IP_DCCP is not set -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP=m CONFIG_IP_FIB_TRIE_STATS=y # CONFIG_IPMB_DEVICE_INTERFACE is not set CONFIG_IPMI_DEVICE_INTERFACE=m @@ -3076,7 +3092,7 @@ CONFIG_KEY_NOTIFICATIONS=y CONFIG_KEYS_REQUEST_CACHE=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3108,6 +3124,7 @@ CONFIG_KUNIT_EXAMPLE_TEST=m CONFIG_KUNIT=m CONFIG_KUNIT_TEST=m CONFIG_KVM=m +CONFIG_KVM_MAX_NR_VCPUS=4096 # CONFIG_KVM_PROVE_MMU is not set # CONFIG_KVM_S390_UCONTROL is not set CONFIG_KVM_SMM=y @@ -3164,6 +3181,7 @@ CONFIG_LEDS_GPIO=m CONFIG_LEDS_GROUP_MULTICOLOR=m # CONFIG_LEDS_IS31FL319X is not set CONFIG_LEDS_IS31FL32XX=m +CONFIG_LEDS_KTD202X=m # CONFIG_LEDS_KTD2692 is not set # CONFIG_LEDS_LGM is not set CONFIG_LEDS_LM3530=m @@ -3289,6 +3307,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock" CONFIG_LSM_MMAP_MIN_ADDR=65535 CONFIG_LTC1660=m +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3303,6 +3322,7 @@ CONFIG_LTO_NONE=y CONFIG_LTR501=m CONFIG_LTRF216A=m CONFIG_LV0104CS=m +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3312,6 +3332,7 @@ CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m # CONFIG_MAC80211 is not set +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211_MESH=y # CONFIG_MAC80211_MESSAGE_TRACING is not set @@ -3387,6 +3408,7 @@ CONFIG_MB1232=m # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set CONFIG_MCP3911=m CONFIG_MCP4018=m CONFIG_MCP41010=m @@ -3397,6 +3419,7 @@ CONFIG_MCP4728=m # CONFIG_MCP4922 is not set CONFIG_MCTP_SERIAL=m # CONFIG_MCTP_TRANSPORT_I2C is not set +# CONFIG_MCTP_TRANSPORT_I3C is not set CONFIG_MCTP=y CONFIG_MD_AUTODETECT=y CONFIG_MD_BITMAP_FILE=y @@ -3426,6 +3449,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m CONFIG_MEDIA_ANALOG_TV_SUPPORT=y CONFIG_MEDIA_ATTACH=y @@ -3637,18 +3661,22 @@ CONFIG_MLX4_DEBUG=y CONFIG_MLX4_EN_DCB=y CONFIG_MLX4_EN=m CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_ACCEL=y CONFIG_MLX5_CLS_ACT=y CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y CONFIG_MLX5_EN_RXNFC=y CONFIG_MLX5_EN_TLS=y CONFIG_MLX5_ESWITCH=y -# CONFIG_MLX5_FPGA is not set +# CONFIG_MLX5_FPGA_IPSEC is not set +# CONFIG_MLX5_FPGA_TLS is not set +CONFIG_MLX5_FPGA=y CONFIG_MLX5_INFINIBAND=m CONFIG_MLX5_IPSEC=y CONFIG_MLX5_MACSEC=y @@ -3747,6 +3775,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3824,6 +3855,8 @@ CONFIG_MT7915E=m CONFIG_MT7921E=m CONFIG_MT7921S=m CONFIG_MT7921U=m +CONFIG_MT7925E=m +CONFIG_MT7925U=m CONFIG_MT7996E=m # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AR7_PARTS is not set @@ -3969,9 +4002,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4044,12 +4074,12 @@ CONFIG_NETFILTER_EGRESS=y CONFIG_NETFILTER_INGRESS=y CONFIG_NETFILTER_NETLINK_ACCT=m # CONFIG_NETFILTER_NETLINK_GLUE_CT is not set -# CONFIG_NETFILTER_NETLINK_HOOK is not set +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_OSF=m CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_XTABLES_COMPAT=y +# CONFIG_NETFILTER_XTABLES_COMPAT is not set CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_CONNMARK=m CONFIG_NETFILTER_XT_MARK=m @@ -4143,6 +4173,7 @@ CONFIG_NET_IPVTI=m # CONFIG_NETIUCV is not set CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +CONFIG_NETKIT=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -4154,15 +4185,12 @@ CONFIG_NET_NS=y CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NETROM=m -CONFIG_NET_SCH_ATM=m CONFIG_NET_SCH_CAKE=m -CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m # CONFIG_NET_SCH_DEFAULT is not set CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -4196,6 +4224,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_NET_TEAM_MODE_RANDOM=m CONFIG_NET_TEAM_MODE_ROUNDROBIN=m +CONFIG_NET_TEST=m CONFIG_NET_TULIP=y CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -4329,7 +4358,7 @@ CONFIG_NFC_ST21NFCA=m # CONFIG_NFC_ST_NCI_I2C is not set # CONFIG_NFC_ST_NCI_SPI is not set CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NFC_TRF7970A=m @@ -4522,8 +4551,9 @@ CONFIG_NUMA=y # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_SHIELD_FF=y # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y CONFIG_NVME_HWMON=y CONFIG_NVMEM_LAYOUT_ONIE_TLV=m CONFIG_NVMEM_LAYOUT_SL28_VPD=m @@ -4543,7 +4573,9 @@ CONFIG_NVME_TARGET=m CONFIG_NVME_TARGET_PASSTHRU=y CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -4659,6 +4691,7 @@ CONFIG_PATA_WINBOND=m # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -4708,6 +4741,7 @@ CONFIG_PCI_STUB=y CONFIG_PCI_SW_SWITCHTEC=m CONFIG_PCI=y CONFIG_PCNET32=m +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4733,16 +4767,12 @@ CONFIG_PHY_CADENCE_SIERRA=m CONFIG_PHY_CADENCE_TORRENT=m # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_HI3670_PCIE is not set # CONFIG_PHY_HI3670_USB is not set # CONFIG_PHY_LAN966X_SERDES is not set CONFIG_PHYLIB=y CONFIG_PHYLINK=m # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -4829,7 +4859,6 @@ CONFIG_POSIX_TIMERS=y CONFIG_POWERCAP=y CONFIG_POWER_MLXBF=m # CONFIG_POWER_RESET_BRCMKONA is not set -# CONFIG_POWER_RESET_BRCMSTB is not set # CONFIG_POWER_RESET is not set # CONFIG_POWER_RESET_LINKSTATION is not set # CONFIG_POWER_RESET_LTC2952 is not set @@ -5077,7 +5106,7 @@ CONFIG_RD_ZSTD=y # CONFIG_READABLE_ASM is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_REALTEK_AUTOPM=y -CONFIG_REALTEK_PHY=y +CONFIG_REALTEK_PHY=m # CONFIG_REED_SOLOMON_TEST is not set # CONFIG_REGMAP_BUILD is not set CONFIG_REGMAP_I2C=m @@ -5109,6 +5138,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m CONFIG_REGULATOR_MAX20411=m CONFIG_REGULATOR_MAX5970=m CONFIG_REGULATOR_MAX597X=m +CONFIG_REGULATOR_MAX77503=m CONFIG_REGULATOR_MAX77650=m # CONFIG_REGULATOR_MAX77826 is not set CONFIG_REGULATOR_MAX77857=m @@ -5214,6 +5244,7 @@ CONFIG_RMI4_SPI=m CONFIG_RMNET=m # CONFIG_ROCKCHIP_PHY is not set CONFIG_ROCKER=m +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set CONFIG_ROHM_BU27034=m CONFIG_ROMFS_BACKED_BY_BLOCK=y @@ -5266,7 +5297,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_ABEOZ9=m CONFIG_RTC_DRV_ABX80X=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5590,11 +5620,12 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y # CONFIG_SECURITY_APPARMOR is not set -# CONFIG_SECURITY_DMESG_RESTRICT is not set +CONFIG_SECURITY_DMESG_RESTRICT=y CONFIG_SECURITYFS=y CONFIG_SECURITY_INFINIBAND=y CONFIG_SECURITY_LANDLOCK=y @@ -5734,6 +5765,7 @@ CONFIG_SENSORS_LTC2947_SPI=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set CONFIG_SENSORS_LTC2990=m +CONFIG_SENSORS_LTC2991=m # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m CONFIG_SENSORS_LTC4151=m @@ -5794,6 +5826,7 @@ CONFIG_SENSORS_PLI1209BC=m CONFIG_SENSORS_PLI1209BC_REGULATOR=y CONFIG_SENSORS_PM6764TR=m CONFIG_SENSORS_PMBUS=m +CONFIG_SENSORS_POWERZ=m CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -5961,7 +5994,6 @@ CONFIG_SLIP_COMPRESSED=y # CONFIG_SLIP is not set # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -6053,6 +6085,7 @@ CONFIG_SND_FIREWORKS=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -6196,8 +6229,10 @@ CONFIG_SND_SOC_AK5558=m # CONFIG_SND_SOC_ARNDALE is not set CONFIG_SND_SOC_AUDIO_IIO_AUX=m CONFIG_SND_SOC_AW8738=m +CONFIG_SND_SOC_AW87390=m CONFIG_SND_SOC_AW88261=m CONFIG_SND_SOC_AW88395=m +CONFIG_SND_SOC_AW88399=m CONFIG_SND_SOC_BD28623=m CONFIG_SND_SOC_BT_SCO=m CONFIG_SND_SOC_CHV3_CODEC=m @@ -6402,12 +6437,6 @@ CONFIG_SND_SOC_PCM3060_SPI=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set CONFIG_SND_SOC_RL6231=m -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set CONFIG_SND_SOC_RT1017_SDCA_SDW=m # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -6430,6 +6459,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m CONFIG_SND_SOC_RT715_SDW=m CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +CONFIG_SND_SOC_RTQ9128=m # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6864,6 +6894,7 @@ CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX_DEBUG is not set CONFIG_TCM_QLA2XXX=m CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6918,6 +6949,7 @@ CONFIG_TEST_KSTRTOX=y # CONFIG_TEST_MEMINIT is not set # CONFIG_TEST_MIN_HEAP is not set # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -6981,8 +7013,6 @@ CONFIG_TIFM_7XX1=m # CONFIG_TIFM_CORE is not set CONFIG_TIGON3_HWMON=y CONFIG_TIGON3=m -# CONFIG_TI_ICSSG_PRUETH is not set -CONFIG_TI_ICSS_IEP=m CONFIG_TI_LMP92064=m CONFIG_TIME_KUNIT_TEST=m CONFIG_TIME_NS=y @@ -7185,6 +7215,7 @@ CONFIG_TYPEC_MUX_FSA4480=m CONFIG_TYPEC_MUX_GPIO_SBU=m CONFIG_TYPEC_MUX_NB7VPQ904M=m CONFIG_TYPEC_MUX_PI3USB30532=m +CONFIG_TYPEC_MUX_PTN36502=m CONFIG_TYPEC_NVIDIA_ALTMODE=m # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -7196,7 +7227,6 @@ CONFIG_TYPEC_TCPCI_MT6370=m CONFIG_TYPEC_TCPM=m CONFIG_TYPEC_TPS6598X=m CONFIG_TYPEC_UCSI=m -CONFIG_TYPEC_WCOVE=m CONFIG_TYPEC_WUSB3801=m CONFIG_TYPHOON=m CONFIG_UACCE=m @@ -7276,6 +7306,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m # CONFIG_USB_CHIPIDEA is not set CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_PCI=m CONFIG_USB_CONFIGFS_F_MIDI2=y # CONFIG_USB_CONFIGFS_F_UAC1_LEGACY is not set @@ -7405,6 +7436,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m CONFIG_USB_MA901=m # CONFIG_USB_MASS_STORAGE is not set @@ -7454,6 +7486,7 @@ CONFIG_USB_ONBOARD_HUB=m # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y CONFIG_USBPCWATCHDOG=m CONFIG_USB_PEGASUS=m @@ -7658,7 +7691,7 @@ CONFIG_VIDEO_BT819=m CONFIG_VIDEO_BT848=m CONFIG_VIDEO_BT856=m CONFIG_VIDEO_BT866=m -CONFIG_VIDEO_CADENCE_CSI2RX=m +# CONFIG_VIDEO_CADENCE_CSI2RX is not set CONFIG_VIDEO_CADENCE_CSI2TX=m # CONFIG_VIDEO_CADENCE is not set # CONFIG_VIDEO_CAFE_CCIC is not set @@ -7735,10 +7768,12 @@ CONFIG_VIDEO_M52790=m CONFIG_VIDEO_MAX9286=m # CONFIG_VIDEO_MAX96712 is not set # CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set +# CONFIG_VIDEO_MGB4 is not set CONFIG_VIDEO_ML86V7667=m CONFIG_VIDEO_MSP3400=m CONFIG_VIDEO_MT9M001=m # CONFIG_VIDEO_MT9M111 is not set +CONFIG_VIDEO_MT9M114=m CONFIG_VIDEO_MT9P031=m CONFIG_VIDEO_MT9T112=m CONFIG_VIDEO_MT9V011=m @@ -8028,6 +8063,7 @@ CONFIG_XDP_SOCKETS_DIAG=m CONFIG_XDP_SOCKETS=y # CONFIG_XEN_GRANT_DMA_ALLOC is not set CONFIG_XEN_MEMORY_HOTPLUG_LIMIT=512 +CONFIG_XEN_PRIVCMD_EVENTFD=y CONFIG_XEN_PRIVCMD_IRQFD=y CONFIG_XEN_PRIVCMD=m # CONFIG_XEN_PVCALLS_FRONTEND is not set @@ -8135,19 +8171,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-s390x-rhel.config b/SOURCES/kernel-s390x-rhel.config index 1ee626a..c2c1ecf 100644 --- a/SOURCES/kernel-s390x-rhel.config +++ b/SOURCES/kernel-s390x-rhel.config @@ -233,7 +233,6 @@ CONFIG_AQUANTIA_PHY=m # CONFIG_ARCH_BITMAIN is not set # CONFIG_ARCH_KEEMBAY is not set # CONFIG_ARCH_LG1K is not set -# CONFIG_ARCH_MA35 is not set # CONFIG_ARCH_MESON is not set CONFIG_ARCH_MMAP_RND_BITS=28 CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8 @@ -275,6 +274,7 @@ CONFIG_ARM_SMCCC_SOC_ID=y CONFIG_ASN1=y # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -387,6 +387,7 @@ CONFIG_BASE_FULL=y # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -515,7 +516,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set # CONFIG_BRCM_TRACING is not set # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -608,7 +608,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -682,6 +681,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y # CONFIG_CFG80211_DEBUGFS is not set CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -761,6 +761,7 @@ CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y # CONFIG_CIO_INJECT is not set CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -830,7 +831,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -895,7 +895,6 @@ CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m # CONFIG_CRYPTO_AEGIS128 is not set # CONFIG_CRYPTO_AES_ARM64 is not set -CONFIG_CRYPTO_AES_GCM_P10=y CONFIG_CRYPTO_AES_S390=m # CONFIG_CRYPTO_AES_TI is not set CONFIG_CRYPTO_AES=y @@ -912,7 +911,6 @@ CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_CCM=y CONFIG_CRYPTO_CFB=y CONFIG_CRYPTO_CHACHA20=m -# CONFIG_CRYPTO_CHACHA20_P10 is not set CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CHACHA_S390=y CONFIG_CRYPTO_CMAC=y @@ -977,6 +975,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1000,7 +1003,6 @@ CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_POLY1305=m -# CONFIG_CRYPTO_POLY1305_P10 is not set # CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RSA=y @@ -1266,6 +1268,7 @@ CONFIG_DP83TC811_PHY=m # CONFIG_DPOT_DAC is not set # CONFIG_DPS310 is not set # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1338,6 +1341,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set # CONFIG_DRM_IMX_LCDIF is not set # CONFIG_DRM_ITE_IT6505 is not set # CONFIG_DRM_ITE_IT66121 is not set @@ -1365,36 +1369,90 @@ CONFIG_DRM_NOUVEAU_BACKLIGHT=y # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1569,7 +1627,6 @@ CONFIG_EDAC_PND2=m # CONFIG_EEPROM_AT25 is not set # CONFIG_EEPROM_EE1004 is not set # CONFIG_EEPROM_IDT_89HPESX is not set -# CONFIG_EEPROM_LEGACY is not set # CONFIG_EEPROM_MAX6875 is not set # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1599,7 +1656,12 @@ CONFIG_ENIC=m CONFIG_EPIC100=m CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y # CONFIG_ETHOC is not set CONFIG_ETHTOOL_NETLINK=y @@ -1667,7 +1729,7 @@ CONFIG_FAT_KUNIT_TEST=m # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -1678,9 +1740,9 @@ CONFIG_FB_EFI=y # CONFIG_FB_IBM_GXT4500 is not set # CONFIG_FB_IMSTT is not set # CONFIG_FB_IMX is not set +# CONFIG_FB is not set # CONFIG_FB_KYRO is not set # CONFIG_FB_LE80578 is not set -CONFIG_FB=m # CONFIG_FB_MATROX_G is not set # CONFIG_FB_MATROX_I2C is not set # CONFIG_FB_MATROX is not set @@ -1764,7 +1826,9 @@ CONFIG_FS_DAX=y # CONFIG_FSL_QDMA is not set # CONFIG_FSL_RCPM is not set CONFIG_FSNOTIFY=y -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_RECORD_RECURSION is not set @@ -1772,6 +1836,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +# CONFIG_FUEL_GAUGE_MM8013 is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set # CONFIG_FUNCTION_GRAPH_RETVAL is not set CONFIG_FUNCTION_GRAPH_TRACER=y @@ -1908,6 +1973,7 @@ CONFIG_GPIO_SIM=m # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set # CONFIG_GUP_TEST is not set CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2079,6 +2145,7 @@ CONFIG_HMC_DRV=m # CONFIG_HNS3_PMU is not set # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2218,6 +2285,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set # CONFIG_IAQCORE is not set CONFIG_IAVF=m # CONFIG_IB700_WDT is not set @@ -2232,6 +2300,7 @@ CONFIG_ICE_SWITCHDEV=y # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set # CONFIG_IEEE802154_AT86RF230 is not set @@ -2301,7 +2370,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10 CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2415,6 +2483,7 @@ CONFIG_INPUT_SPARSEKMAP=m CONFIG_INPUT_UINPUT=m CONFIG_INPUT=y CONFIG_INPUT_YEALINK=m +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_INTEGRITY_AUDIT=y @@ -2457,6 +2526,7 @@ CONFIG_INTEL_SDSI=m # CONFIG_INTEL_SOC_PMIC_CHTWC is not set # CONFIG_INTEL_SOC_PMIC is not set # CONFIG_INTEL_TCC_COOLING is not set +# CONFIG_INTEL_TDX_HOST is not set # CONFIG_INTEL_TH is not set CONFIG_INTEL_UNCORE_FREQ_CONTROL=m # CONFIG_INTEL_VSC is not set @@ -2478,7 +2548,8 @@ CONFIG_IO_DELAY_0X80=y # CONFIG_IOMMU_DEFAULT_DMA_LAZY is not set CONFIG_IOMMU_DEFAULT_DMA_STRICT=y # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -2772,7 +2843,7 @@ CONFIG_KEY_NOTIFICATIONS=y # CONFIG_KEYS_REQUEST_CACHE is not set CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -2804,6 +2875,7 @@ CONFIG_KVM_AMD_SEV=y # CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set # CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set CONFIG_KVM=m +CONFIG_KVM_MAX_NR_VCPUS=4096 # CONFIG_KVM_PROVE_MMU is not set # CONFIG_KVM_S390_UCONTROL is not set CONFIG_KVM_SMM=y @@ -2970,6 +3042,7 @@ CONFIG_LRU_GEN=y CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -2984,6 +3057,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y # CONFIG_LXT_PHY is not set @@ -2993,6 +3067,7 @@ CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m # CONFIG_MAC80211 is not set +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_MESH is not set # CONFIG_MAC80211_MESSAGE_TRACING is not set @@ -3029,7 +3104,6 @@ CONFIG_MARCH_Z14=y CONFIG_MARVELL_10G_PHY=m # CONFIG_MARVELL_88Q2XXX_PHY is not set # CONFIG_MARVELL_88X2222_PHY is not set -CONFIG_MARVELL_GTI_WDT=y # CONFIG_MARVELL_PHY is not set # CONFIG_MATOM is not set # CONFIG_MAX1027 is not set @@ -3064,6 +3138,7 @@ CONFIG_MAX_SKB_FRAGS=17 # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3087,7 +3162,7 @@ CONFIG_MDIO_HISI_FEMAC=m # CONFIG_MDIO_IPQ8064 is not set CONFIG_MDIO_MSCC_MIIM=m # CONFIG_MDIO_MVUSB is not set -CONFIG_MDIO_OCTEON=m +# CONFIG_MDIO_OCTEON is not set CONFIG_MDIO_THUNDER=m CONFIG_MD_LINEAR=m # CONFIG_MD_MULTIPATH is not set @@ -3096,6 +3171,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3162,7 +3238,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3298,6 +3374,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3406,6 +3483,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3480,6 +3560,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3585,9 +3667,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -3710,6 +3789,7 @@ CONFIG_NET_IPVTI=m # CONFIG_NETIUCV is not set CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -3722,15 +3802,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -3759,6 +3836,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -3858,7 +3936,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4052,8 +4130,9 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_IMX_OCOTP_ELE is not set # CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set @@ -4075,7 +4154,9 @@ CONFIG_NVME_TARGET=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -4179,6 +4260,7 @@ CONFIG_PARTITION_ADVANCED=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -4227,6 +4309,7 @@ CONFIG_PCI_QUIRKS=y # CONFIG_PCI_SW_SWITCHTEC is not set CONFIG_PCI=y # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4252,8 +4335,6 @@ CONFIG_PHY_BCM_SR_USB=m # CONFIG_PHY_CADENCE_TORRENT is not set # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_FSL_LYNX_28G is not set # CONFIG_PHY_HI3660_USB is not set # CONFIG_PHY_HI3670_PCIE is not set @@ -4263,8 +4344,6 @@ CONFIG_PHY_BCM_SR_USB=m # CONFIG_PHY_LAN966X_SERDES is not set # CONFIG_PHYLIB is not set # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -4290,6 +4369,7 @@ CONFIG_PID_NS=y CONFIG_PINCTRL_ALDERLAKE=m # CONFIG_PINCTRL_BROXTON is not set # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_ELKHARTLAKE=m CONFIG_PINCTRL_EMMITSBURG=m @@ -4357,7 +4437,6 @@ CONFIG_POSIX_TIMERS=y CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set # CONFIG_POWER_RESET_GPIO_RESTART is not set # CONFIG_POWER_RESET_LTC2952 is not set # CONFIG_POWER_RESET_REGULATOR is not set @@ -4521,7 +4600,6 @@ CONFIG_QLA3XXX=m # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set # CONFIG_QRTR is not set CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -4623,6 +4701,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m # CONFIG_REGULATOR_LTC3589 is not set # CONFIG_REGULATOR_LTC3676 is not set # CONFIG_REGULATOR_MAX1586 is not set +# CONFIG_REGULATOR_MAX77503 is not set # CONFIG_REGULATOR_MAX77857 is not set # CONFIG_REGULATOR_MAX8649 is not set # CONFIG_REGULATOR_MAX8660 is not set @@ -4697,6 +4776,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -4736,7 +4816,6 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_DRV_ABEOZ9 is not set # CONFIG_RTC_DRV_ABX80X is not set CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5063,6 +5142,7 @@ CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5217,6 +5297,7 @@ CONFIG_SENSORS_LM95245=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set # CONFIG_SENSORS_LTC3815 is not set CONFIG_SENSORS_LTC4151=m @@ -5275,6 +5356,7 @@ CONFIG_SENSORS_PCF8591=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set CONFIG_SENSORS_PMBUS=m +# CONFIG_SENSORS_POWERZ is not set # CONFIG_SENSORS_POWR1220 is not set # CONFIG_SENSORS_PWM_FAN is not set # CONFIG_SENSORS_PXE1610 is not set @@ -5433,7 +5515,6 @@ CONFIG_SLIP_COMPRESSED=y # CONFIG_SLIP is not set # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -5516,6 +5597,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -5652,8 +5734,10 @@ CONFIG_SND_SEQ_UMP=y # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -5744,6 +5828,7 @@ CONFIG_SND_SOC_CX2072X=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -5858,12 +5943,6 @@ CONFIG_SND_SOC_MAX98927=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set # CONFIG_SND_SOC_RL6231 is not set -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -5886,6 +5965,7 @@ CONFIG_SND_SOC_RT1318_SDW=m # CONFIG_SND_SOC_RT715_SDW is not set CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6081,7 +6161,6 @@ CONFIG_SND_VX222=m # CONFIG_SND_XEN_FRONTEND is not set # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set # CONFIG_SOC_TI is not set CONFIG_SOFTLOCKUP_DETECTOR=y @@ -6280,6 +6359,7 @@ CONFIG_TCM_IBLOCK=m CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6333,6 +6413,7 @@ CONFIG_TEST_LIVEPATCH=m # CONFIG_TEST_MEMINIT is not set # CONFIG_TEST_MIN_HEAP is not set # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -6567,6 +6648,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -6642,6 +6724,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m # CONFIG_USB_CHIPIDEA is not set CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m # CONFIG_USB_CONN_GPIO is not set CONFIG_USB_CXACRU=m # CONFIG_USB_CYPRESS_CY7C63 is not set @@ -6739,6 +6822,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m # CONFIG_USB_LED_TRIG is not set CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -6781,6 +6865,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -6927,6 +7012,9 @@ CONFIG_VFAT_FS=m # CONFIG_VFIO_AMBA is not set CONFIG_VFIO_AP=m CONFIG_VFIO_CCW=m +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO=m CONFIG_VFIO_MDEV=m @@ -7041,11 +7129,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -7382,19 +7472,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-s390x-zfcpdump-rhel.config b/SOURCES/kernel-s390x-zfcpdump-rhel.config index 667bd35..c987d1e 100644 --- a/SOURCES/kernel-s390x-zfcpdump-rhel.config +++ b/SOURCES/kernel-s390x-zfcpdump-rhel.config @@ -233,7 +233,6 @@ CONFIG_AQUANTIA_PHY=m # CONFIG_ARCH_BITMAIN is not set # CONFIG_ARCH_KEEMBAY is not set # CONFIG_ARCH_LG1K is not set -# CONFIG_ARCH_MA35 is not set # CONFIG_ARCH_MESON is not set CONFIG_ARCH_MMAP_RND_BITS=28 CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8 @@ -275,6 +274,7 @@ CONFIG_ARM_SMCCC_SOC_ID=y CONFIG_ASN1=y # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -389,6 +389,7 @@ CONFIG_BASE_SMALL=0 # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -508,7 +509,7 @@ CONFIG_BPF_JIT=y CONFIG_BPF_LSM=y # CONFIG_BPF_PRELOAD is not set CONFIG_BPF_STREAM_PARSER=y -CONFIG_BPF_SYSCALL=y +# CONFIG_BPF_SYSCALL is not set CONFIG_BPF_UNPRIV_DEFAULT_OFF=y CONFIG_BRANCH_PROFILE_NONE=y # CONFIG_BRCMDBG is not set @@ -517,7 +518,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set # CONFIG_BRCM_TRACING is not set # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -610,7 +610,6 @@ CONFIG_CACHESTAT_SYSCALL=y # CONFIG_CAIF is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -684,6 +683,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y # CONFIG_CFG80211_DEBUGFS is not set CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -763,6 +763,7 @@ CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y # CONFIG_CIO_INJECT is not set CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -832,7 +833,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=y CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -897,7 +897,6 @@ CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_AEGIS128_AESNI_SSE2=m # CONFIG_CRYPTO_AEGIS128 is not set # CONFIG_CRYPTO_AES_ARM64 is not set -CONFIG_CRYPTO_AES_GCM_P10=y CONFIG_CRYPTO_AES_S390=y # CONFIG_CRYPTO_AES_TI is not set CONFIG_CRYPTO_AES=y @@ -913,7 +912,6 @@ CONFIG_CRYPTO_CAST6=y CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_CCM is not set CONFIG_CRYPTO_CFB=y -# CONFIG_CRYPTO_CHACHA20_P10 is not set CONFIG_CRYPTO_CHACHA20POLY1305=y CONFIG_CRYPTO_CHACHA20=y CONFIG_CRYPTO_CHACHA_S390=y @@ -979,6 +977,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set # CONFIG_CRYPTO_LIB_BLAKE2S is not set @@ -1001,7 +1004,6 @@ CONFIG_CRYPTO_OFB=y CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_PCBC=y CONFIG_CRYPTO_PCRYPT=y -# CONFIG_CRYPTO_POLY1305_P10 is not set CONFIG_CRYPTO_POLY1305=y # CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set CONFIG_CRYPTO_RMD160=y @@ -1268,6 +1270,7 @@ CONFIG_DP83TC811_PHY=m # CONFIG_DPOT_DAC is not set # CONFIG_DPS310 is not set # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1340,6 +1343,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set # CONFIG_DRM_IMX_LCDIF is not set # CONFIG_DRM_ITE_IT6505 is not set # CONFIG_DRM_ITE_IT66121 is not set @@ -1367,36 +1371,90 @@ CONFIG_DRM_NOUVEAU_BACKLIGHT=y # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1571,7 +1629,6 @@ CONFIG_EDAC_PND2=m # CONFIG_EEPROM_AT25 is not set # CONFIG_EEPROM_EE1004 is not set # CONFIG_EEPROM_IDT_89HPESX is not set -# CONFIG_EEPROM_LEGACY is not set # CONFIG_EEPROM_MAX6875 is not set # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1601,7 +1658,12 @@ CONFIG_ENIC=m CONFIG_EPIC100=m CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y # CONFIG_ETHOC is not set CONFIG_ETHTOOL_NETLINK=y @@ -1670,7 +1732,7 @@ CONFIG_FAT_KUNIT_TEST=m # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -1681,9 +1743,9 @@ CONFIG_FB_EFI=y # CONFIG_FB_IBM_GXT4500 is not set # CONFIG_FB_IMSTT is not set # CONFIG_FB_IMX is not set +# CONFIG_FB is not set # CONFIG_FB_KYRO is not set # CONFIG_FB_LE80578 is not set -CONFIG_FB=m # CONFIG_FB_MATROX_G is not set # CONFIG_FB_MATROX_I2C is not set # CONFIG_FB_MATROX is not set @@ -1768,7 +1830,9 @@ CONFIG_FSCACHE_STATS=y # CONFIG_FSL_RCPM is not set CONFIG_FSNOTIFY=y # CONFIG_FS_POSIX_ACL is not set -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set # CONFIG_FTRACE is not set CONFIG_FTRACE_MCOUNT_RECORD=y @@ -1776,6 +1840,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_SORT_STARTUP_TEST is not set # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y +# CONFIG_FUEL_GAUGE_MM8013 is not set # CONFIG_FUNCTION_ERROR_INJECTION is not set # CONFIG_FUNCTION_GRAPH_RETVAL is not set CONFIG_FUNCTION_GRAPH_TRACER=y @@ -1914,6 +1979,7 @@ CONFIG_GPIO_SIM=y # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set # CONFIG_GUP_TEST is not set CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2085,6 +2151,7 @@ CONFIG_HMC_DRV=y # CONFIG_HNS3_PMU is not set # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2225,6 +2292,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set # CONFIG_IAQCORE is not set CONFIG_IAVF=m # CONFIG_IB700_WDT is not set @@ -2239,6 +2307,7 @@ CONFIG_ICE_SWITCHDEV=y # CONFIG_ICS932S401 is not set # CONFIG_IDLE_INJECT is not set CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set # CONFIG_IEEE802154_AT86RF230 is not set @@ -2308,7 +2377,6 @@ CONFIG_IMA_MEASURE_PCR_IDX=10 CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2423,6 +2491,7 @@ CONFIG_INPUT_POWERMATE=m CONFIG_INPUT_UINPUT=m CONFIG_INPUT=y CONFIG_INPUT_YEALINK=m +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set # CONFIG_INTEGRITY_ASYMMETRIC_KEYS is not set CONFIG_INTEGRITY_AUDIT=y @@ -2465,6 +2534,7 @@ CONFIG_INTEL_SDSI=m # CONFIG_INTEL_SOC_PMIC_CHTWC is not set # CONFIG_INTEL_SOC_PMIC is not set # CONFIG_INTEL_TCC_COOLING is not set +# CONFIG_INTEL_TDX_HOST is not set # CONFIG_INTEL_TH is not set CONFIG_INTEL_UNCORE_FREQ_CONTROL=m # CONFIG_INTEL_VSC is not set @@ -2486,7 +2556,8 @@ CONFIG_IO_DELAY_0X80=y # CONFIG_IOMMU_DEFAULT_DMA_LAZY is not set CONFIG_IOMMU_DEFAULT_DMA_STRICT=y # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -2781,7 +2852,7 @@ CONFIG_KEY_NOTIFICATIONS=y # CONFIG_KEYS is not set # CONFIG_KEYS_REQUEST_CACHE is not set # CONFIG_KFENCE_DEFERRABLE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -2813,6 +2884,7 @@ CONFIG_KVM_AMD_SEV=y # CONFIG_KVM_BOOK3S_HV_P8_TIMING is not set # CONFIG_KVM_BOOK3S_HV_P9_TIMING is not set # CONFIG_KVM is not set +CONFIG_KVM_MAX_NR_VCPUS=4096 # CONFIG_KVM_PROVE_MMU is not set # CONFIG_KVM_S390_UCONTROL is not set CONFIG_KVM_SMM=y @@ -2980,6 +3052,7 @@ CONFIG_LRU_GEN=y CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -2994,6 +3067,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y # CONFIG_LXT_PHY is not set @@ -3003,6 +3077,7 @@ CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m # CONFIG_MAC80211 is not set +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_MESH is not set # CONFIG_MAC80211_MESSAGE_TRACING is not set @@ -3039,7 +3114,6 @@ CONFIG_MARCH_Z14=y CONFIG_MARVELL_10G_PHY=m # CONFIG_MARVELL_88Q2XXX_PHY is not set # CONFIG_MARVELL_88X2222_PHY is not set -CONFIG_MARVELL_GTI_WDT=y # CONFIG_MARVELL_PHY is not set # CONFIG_MATOM is not set # CONFIG_MAX1027 is not set @@ -3074,6 +3148,7 @@ CONFIG_MAX_SKB_FRAGS=17 # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3097,7 +3172,7 @@ CONFIG_MDIO_HISI_FEMAC=m # CONFIG_MDIO_IPQ8064 is not set CONFIG_MDIO_MSCC_MIIM=m # CONFIG_MDIO_MVUSB is not set -CONFIG_MDIO_OCTEON=m +# CONFIG_MDIO_OCTEON is not set CONFIG_MDIO_THUNDER=m # CONFIG_MD is not set CONFIG_MD_LINEAR=m @@ -3106,6 +3181,7 @@ CONFIG_MD_RAID0=m CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3172,7 +3248,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3308,6 +3384,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3416,6 +3493,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3490,6 +3570,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3595,9 +3677,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -3720,6 +3799,7 @@ CONFIG_NET_IPVTI=m # CONFIG_NETIUCV is not set # CONFIG_NET_KEY is not set CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -3733,15 +3813,12 @@ CONFIG_NET_NS=y # CONFIG_NETPOLL is not set CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set # CONFIG_NET_SCHED is not set CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -3770,6 +3847,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -3870,7 +3948,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4064,8 +4142,9 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set # CONFIG_NVIDIA_WMI_EC_BACKLIGHT is not set -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m # CONFIG_NVME_FC is not set +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_IMX_OCOTP_ELE is not set # CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set @@ -4087,7 +4166,9 @@ CONFIG_NVME_TARGET_LOOP=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set # CONFIG_NVRAM is not set # CONFIG_NVSW_SN2201 is not set @@ -4191,6 +4272,7 @@ CONFIG_PARTITION_ADVANCED=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -4239,6 +4321,7 @@ CONFIG_PCI_QUIRKS=y # CONFIG_PCI_STUB is not set # CONFIG_PCI_SW_SWITCHTEC is not set # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4264,8 +4347,6 @@ CONFIG_PHY_BCM_SR_USB=m # CONFIG_PHY_CADENCE_TORRENT is not set # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_FSL_LYNX_28G is not set # CONFIG_PHY_HI3660_USB is not set # CONFIG_PHY_HI3670_PCIE is not set @@ -4275,8 +4356,6 @@ CONFIG_PHY_BCM_SR_USB=m # CONFIG_PHY_LAN966X_SERDES is not set # CONFIG_PHYLIB is not set # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -4302,6 +4381,7 @@ CONFIG_PID_NS=y CONFIG_PINCTRL_ALDERLAKE=m # CONFIG_PINCTRL_BROXTON is not set # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_ELKHARTLAKE=m CONFIG_PINCTRL_EMMITSBURG=m @@ -4369,7 +4449,6 @@ CONFIG_POSIX_TIMERS=y CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set # CONFIG_POWER_RESET_GPIO_RESTART is not set # CONFIG_POWER_RESET_LTC2952 is not set # CONFIG_POWER_RESET_REGULATOR is not set @@ -4534,7 +4613,6 @@ CONFIG_QLA3XXX=m # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set # CONFIG_QRTR is not set CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -4636,6 +4714,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m # CONFIG_REGULATOR_LTC3589 is not set # CONFIG_REGULATOR_LTC3676 is not set # CONFIG_REGULATOR_MAX1586 is not set +# CONFIG_REGULATOR_MAX77503 is not set # CONFIG_REGULATOR_MAX77857 is not set # CONFIG_REGULATOR_MAX8649 is not set # CONFIG_REGULATOR_MAX8660 is not set @@ -4710,6 +4789,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -4749,7 +4829,6 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_DRV_ABEOZ9 is not set # CONFIG_RTC_DRV_ABX80X is not set CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5080,6 +5159,7 @@ CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5234,6 +5314,7 @@ CONFIG_SENSORS_LM95245=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set # CONFIG_SENSORS_LTC3815 is not set CONFIG_SENSORS_LTC4151=m @@ -5292,6 +5373,7 @@ CONFIG_SENSORS_PCF8591=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set CONFIG_SENSORS_PMBUS=m +# CONFIG_SENSORS_POWERZ is not set # CONFIG_SENSORS_POWR1220 is not set # CONFIG_SENSORS_PWM_FAN is not set # CONFIG_SENSORS_PXE1610 is not set @@ -5450,7 +5532,6 @@ CONFIG_SLIP_COMPRESSED=y # CONFIG_SLIP is not set # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set CONFIG_SLUB_DEBUG=y @@ -5533,6 +5614,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -5669,8 +5751,10 @@ CONFIG_SND_SEQ_UMP=y # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -5761,6 +5845,7 @@ CONFIG_SND_SOC_CX2072X=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -5875,12 +5960,6 @@ CONFIG_SND_SOC_MAX98927=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set # CONFIG_SND_SOC_RL6231 is not set -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set # CONFIG_SND_SOC_RT1308 is not set # CONFIG_SND_SOC_RT1308_SDW is not set @@ -5903,6 +5982,7 @@ CONFIG_SND_SOC_RT1318_SDW=m # CONFIG_SND_SOC_RT715_SDW is not set CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6098,7 +6178,6 @@ CONFIG_SND_VX222=m # CONFIG_SND_XEN_FRONTEND is not set # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set # CONFIG_SOC_TI is not set CONFIG_SOFTLOCKUP_DETECTOR=y @@ -6302,6 +6381,7 @@ CONFIG_TCM_IBLOCK=y CONFIG_TCM_PSCSI=y # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6355,6 +6435,7 @@ CONFIG_TEST_LIVEPATCH=m # CONFIG_TEST_MEMINIT is not set # CONFIG_TEST_MIN_HEAP is not set # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -6590,6 +6671,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -6665,6 +6747,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m # CONFIG_USB_CHIPIDEA is not set CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m # CONFIG_USB_CONN_GPIO is not set CONFIG_USB_CXACRU=m # CONFIG_USB_CYPRESS_CY7C63 is not set @@ -6762,6 +6845,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m # CONFIG_USB_LED_TRIG is not set CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -6804,6 +6888,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -6950,6 +7035,9 @@ CONFIG_VETH=m # CONFIG_VFIO_AMBA is not set CONFIG_VFIO_AP=m CONFIG_VFIO_CCW=m +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m # CONFIG_VFIO is not set CONFIG_VFIO_MDEV=m @@ -7064,11 +7152,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -7405,19 +7495,18 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -# CONFIG_DRM_AMD_COLOR_STEAMDECK is not set +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-x86_64-debug-fedora.config b/SOURCES/kernel-x86_64-debug-fedora.config index a3ad78d..ee8a2e4 100644 --- a/SOURCES/kernel-x86_64-debug-fedora.config +++ b/SOURCES/kernel-x86_64-debug-fedora.config @@ -188,16 +188,16 @@ CONFIG_ADVANTECH_EC_WDT=m # CONFIG_ADVANTECH_WDT is not set CONFIG_ADVISE_SYSCALLS=y CONFIG_ADV_SWBUTTON=m -CONFIG_ADXL313_I2C=m -CONFIG_ADXL313_SPI=m +# CONFIG_ADXL313_I2C is not set +# CONFIG_ADXL313_SPI is not set # CONFIG_ADXL345_I2C is not set # CONFIG_ADXL345_SPI is not set -CONFIG_ADXL355_I2C=m -CONFIG_ADXL355_SPI=m -CONFIG_ADXL367_I2C=m -CONFIG_ADXL367_SPI=m -CONFIG_ADXL372_I2C=m -CONFIG_ADXL372_SPI=m +# CONFIG_ADXL355_I2C is not set +# CONFIG_ADXL355_SPI is not set +# CONFIG_ADXL367_I2C is not set +# CONFIG_ADXL367_SPI is not set +# CONFIG_ADXL372_I2C is not set +# CONFIG_ADXL372_SPI is not set CONFIG_ADXRS290=m # CONFIG_ADXRS450 is not set # CONFIG_AFE4403 is not set @@ -345,6 +345,7 @@ CONFIG_ASUS_NB_WMI=m CONFIG_ASUS_TF103C_DOCK=m CONFIG_ASUS_WIRELESS=m CONFIG_ASUS_WMI=m +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -534,6 +535,15 @@ CONFIG_BAYCOM_SER_HDX=m # CONFIG_BCACHE_ASYNC_REGISTRATION is not set # CONFIG_BCACHE_CLOSURES_DEBUG is not set # CONFIG_BCACHE_DEBUG is not set +CONFIG_BCACHEFS_DEBUG_TRANSACTIONS=y +CONFIG_BCACHEFS_DEBUG=y +# CONFIG_BCACHEFS_ERASURE_CODING is not set +CONFIG_BCACHEFS_FS=m +CONFIG_BCACHEFS_LOCK_TIME_STATS=y +# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set +CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_BCACHEFS_QUOTA=y +# CONFIG_BCACHEFS_TESTS is not set CONFIG_BCACHE=m CONFIG_BCM54140_PHY=m CONFIG_BCM7XXX_PHY=m @@ -676,7 +686,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_BRCM_TRACING=y CONFIG_BRIDGE_CFM=y CONFIG_BRIDGE_EBT_802_3=m @@ -788,7 +797,6 @@ CONFIG_CALL_DEPTH_TRACKING=y # CONFIG_CALL_THUNKS_DEBUG is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -CONFIG_CAN_BXCAN=m CONFIG_CAN_CALC_BITTIMING=y CONFIG_CAN_CAN327=m # CONFIG_CAN_CC770 is not set @@ -868,6 +876,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFI_CLANG is not set CONFIG_CFS_BANDWIDTH=y @@ -960,6 +969,7 @@ CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CIO2_BRIDGE=y CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -1039,7 +1049,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1117,6 +1126,7 @@ CONFIG_CROS_EC_TYPEC=m CONFIG_CROS_EC_UART=m CONFIG_CROS_HPS_I2C=m CONFIG_CROS_KBD_LED_BACKLIGHT=m +CONFIG_CROS_KUNIT_EC_PROTO_TEST=m CONFIG_CROS_KUNIT=m CONFIG_CROSS_MEMORY_ATTACH=y CONFIG_CROS_TYPEC_SWITCH=m @@ -1224,6 +1234,11 @@ CONFIG_CRYPTO_GHASH=y CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1315,7 +1330,7 @@ CONFIG_CXL_PMU=m # CONFIG_CXL_REGION_INVALIDATION_TEST is not set CONFIG_CXL_REGION=y CONFIG_DA280=m -CONFIG_DA311=m +# CONFIG_DA311 is not set CONFIG_DAMON_DBGFS=y # CONFIG_DAMON_LRU_SORT is not set CONFIG_DAMON_PADDR=y @@ -1334,6 +1349,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_CGROUP_REF is not set +# CONFIG_DEBUG_CLOSURES is not set CONFIG_DEBUG_CREDENTIALS=y # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_DRIVER is not set @@ -1446,7 +1462,6 @@ CONFIG_DELL_WMI=m CONFIG_DELL_WMI_PRIVACY=y CONFIG_DELL_WMI_SYSMAN=m CONFIG_DETECT_HUNG_TASK=y -# CONFIG_DEV_APPLETALK is not set CONFIG_DEV_DAX_CXL=m CONFIG_DEV_DAX_HMEM=m CONFIG_DEV_DAX_KMEM=m @@ -1495,7 +1510,7 @@ CONFIG_DMA_NUMA_CMA=y # CONFIG_DMAPOOL_TEST is not set # CONFIG_DMARD06 is not set # CONFIG_DMARD09 is not set -CONFIG_DMARD10=m +# CONFIG_DMARD10 is not set # CONFIG_DMA_RESTRICTED_POOL is not set # CONFIG_DMATEST is not set CONFIG_DM_CACHE=m @@ -1543,7 +1558,7 @@ CONFIG_DNS_RESOLVER=m CONFIG_DP83640_PHY=m CONFIG_DP83822_PHY=m CONFIG_DP83848_PHY=m -# CONFIG_DP83867_PHY is not set +CONFIG_DP83867_PHY=m CONFIG_DP83869_PHY=m # CONFIG_DP83TC811_PHY is not set # CONFIG_DP83TD510_PHY is not set @@ -1554,6 +1569,7 @@ CONFIG_DPTF_PCH_FIVR=m CONFIG_DPTF_POWER=m CONFIG_DRAGONRISE_FF=y CONFIG_DRBD_FAULT_INJECTION=y +CONFIG_DRIVER_PE_KUNIT_TEST=m CONFIG_DRM_ACCEL_HABANALABS=m CONFIG_DRM_ACCEL_IVPU=m CONFIG_DRM_ACCEL_QAIC=m @@ -1672,9 +1688,11 @@ CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +CONFIG_DRM_PANEL_ILITEK_ILI9882T=m CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m # CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m +CONFIG_DRM_PANEL_JDI_LPM102A188A=m # CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set CONFIG_DRM_PANEL_JDI_R63452=m # CONFIG_DRM_PANEL_KHADAS_TS050 is not set @@ -1704,6 +1722,7 @@ CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set # CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set # CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m CONFIG_DRM_PANEL_RONBO_RB070D30=m CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m CONFIG_DRM_PANEL_SAMSUNG_DB7430=m @@ -1940,7 +1959,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_EEPROM_AT25 is not set CONFIG_EEPROM_EE1004=m CONFIG_EEPROM_IDT_89HPESX=m -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m # CONFIG_EFI_BOOTLOADER_CONTROL is not set # CONFIG_EFI_CAPSULE_LOADER is not set @@ -2078,7 +2096,7 @@ CONFIG_FAULT_INJECTION=y # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -2216,6 +2234,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +CONFIG_FUEL_GAUGE_MM8013=m CONFIG_FUJITSU_ES=m CONFIG_FUJITSU_LAPTOP=m CONFIG_FUJITSU_TABLET=m @@ -2324,6 +2343,7 @@ CONFIG_GPIO_IT87=m # CONFIG_GPIO_LATCH is not set CONFIG_GPIOLIB_FASTPATH_LIMIT=512 CONFIG_GPIOLIB=y +CONFIG_GPIO_LJCA=m # CONFIG_GPIO_LOGICVC is not set # CONFIG_GPIO_MAX3191X is not set # CONFIG_GPIO_MAX7300 is not set @@ -2668,6 +2688,7 @@ CONFIG_I2C_HID=y CONFIG_I2C_I801=m CONFIG_I2C_ISCH=m CONFIG_I2C_ISMT=m +CONFIG_I2C_LJCA=m CONFIG_I2C_MLXBF=m CONFIG_I2C_MLXCPLD=m CONFIG_I2C_MULTI_INSTANTIATE=m @@ -2720,6 +2741,7 @@ CONFIG_I40EVF=m CONFIG_I6300ESB_WDT=m CONFIG_I82092=m # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set CONFIG_IA32_EMULATION=y # CONFIG_IAQCORE is not set CONFIG_IB700_WDT=m @@ -2735,6 +2757,7 @@ CONFIG_ICPLUS_PHY=m CONFIG_IDEAPAD_LAPTOP=m CONFIG_IDLE_INJECT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IE6XX_WDT=m CONFIG_IEEE802154_6LOWPAN=m CONFIG_IEEE802154_ADF7242=m @@ -2776,8 +2799,8 @@ CONFIG_IIO_FORMAT_KUNIT_TEST=m CONFIG_IIO_HRTIMER_TRIGGER=m CONFIG_IIO_INTERRUPT_TRIGGER=m CONFIG_IIO_KFIFO_BUF=m -CONFIG_IIO_KX022A_I2C=m -CONFIG_IIO_KX022A_SPI=m +# CONFIG_IIO_KX022A_I2C is not set +# CONFIG_IIO_KX022A_SPI is not set CONFIG_IIO=m CONFIG_IIO_MUX=m CONFIG_IIO_RESCALE_KUNIT_TEST=m @@ -2786,7 +2809,7 @@ CONFIG_IIO_RESCALE=m # CONFIG_IIO_SSP_SENSORHUB is not set CONFIG_IIO_ST_ACCEL_3AXIS=m CONFIG_IIO_ST_ACCEL_I2C_3AXIS=m -CONFIG_IIO_ST_ACCEL_SPI_3AXIS=m +# CONFIG_IIO_ST_ACCEL_SPI_3AXIS is not set CONFIG_IIO_ST_GYRO_3AXIS=m CONFIG_IIO_ST_GYRO_I2C_3AXIS=m CONFIG_IIO_ST_GYRO_SPI_3AXIS=m @@ -2826,7 +2849,6 @@ CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y # CONFIG_IMA_SIG_TEMPLATE is not set # CONFIG_IMA_TEMPLATE is not set -# CONFIG_IMA_TRUSTED_KEYRING is not set CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2955,6 +2977,7 @@ CONFIG_INPUT_WISTRON_BTNS=m CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m CONFIG_INPUT=y CONFIG_INPUT_YEALINK=m +CONFIG_INSPUR_PLATFORM_PROFILE=m CONFIG_INT3406_THERMAL=m CONFIG_INT340X_THERMAL=m CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y @@ -3037,6 +3060,7 @@ CONFIG_INTEL_SOC_PMIC=y CONFIG_INTEL_SPEED_SELECT_INTERFACE=m CONFIG_INTEL_TCC_COOLING=m CONFIG_INTEL_TDX_GUEST=y +CONFIG_INTEL_TDX_HOST=y CONFIG_INTEL_TELEMETRY=m CONFIG_INTEL_TH_ACPI=m # CONFIG_INTEL_TH_DEBUG is not set @@ -3109,8 +3133,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IPC_NS=y # CONFIG_IP_DCCP is not set -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP=m CONFIG_IP_FIB_TRIE_STATS=y # CONFIG_IPMB_DEVICE_INTERFACE is not set CONFIG_IPMI_DEVICE_INTERFACE=m @@ -3432,7 +3454,7 @@ CONFIG_KEYS_REQUEST_CACHE=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set # CONFIG_KFENCE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3470,6 +3492,7 @@ CONFIG_KVM_AMD_SEV=y CONFIG_KVM_GUEST=y CONFIG_KVM_INTEL=m CONFIG_KVM=m +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_MMU_AUDIT=y CONFIG_KVM_PROVE_MMU=y CONFIG_KVM_SMM=y @@ -3528,6 +3551,7 @@ CONFIG_LEDS_GROUP_MULTICOLOR=m CONFIG_LEDS_INTEL_SS4200=m # CONFIG_LEDS_IS31FL319X is not set CONFIG_LEDS_IS31FL32XX=m +CONFIG_LEDS_KTD202X=m # CONFIG_LEDS_KTD2692 is not set # CONFIG_LEDS_LGM is not set CONFIG_LEDS_LM3530=m @@ -3657,6 +3681,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock" CONFIG_LSM_MMAP_MIN_ADDR=65535 CONFIG_LTC1660=m +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3671,6 +3696,7 @@ CONFIG_LTO_NONE=y CONFIG_LTR501=m CONFIG_LTRF216A=m CONFIG_LV0104CS=m +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3679,6 +3705,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y @@ -3747,6 +3774,7 @@ CONFIG_MB1232=m # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set CONFIG_MCP3911=m CONFIG_MCP4018=m CONFIG_MCP41010=m @@ -3757,6 +3785,7 @@ CONFIG_MCP4728=m # CONFIG_MCP4922 is not set CONFIG_MCTP_SERIAL=m # CONFIG_MCTP_TRANSPORT_I2C is not set +# CONFIG_MCTP_TRANSPORT_I3C is not set CONFIG_MCTP=y CONFIG_MD_AUTODETECT=y CONFIG_MD_BITMAP_FILE=y @@ -3786,6 +3815,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m CONFIG_MEDIA_ANALOG_TV_SUPPORT=y CONFIG_MEDIA_ATTACH=y @@ -4013,18 +4043,22 @@ CONFIG_MLX4_DEBUG=y CONFIG_MLX4_EN_DCB=y CONFIG_MLX4_EN=m CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_ACCEL=y CONFIG_MLX5_CLS_ACT=y CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y CONFIG_MLX5_EN_RXNFC=y CONFIG_MLX5_EN_TLS=y CONFIG_MLX5_ESWITCH=y -# CONFIG_MLX5_FPGA is not set +# CONFIG_MLX5_FPGA_IPSEC is not set +# CONFIG_MLX5_FPGA_TLS is not set +CONFIG_MLX5_FPGA=y CONFIG_MLX5_INFINIBAND=m CONFIG_MLX5_IPSEC=y CONFIG_MLX5_MACSEC=y @@ -4126,6 +4160,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -4181,7 +4218,7 @@ CONFIG_MQ_IOSCHED_DEADLINE=y CONFIG_MQ_IOSCHED_KYBER=y # CONFIG_MS5611 is not set # CONFIG_MS5637 is not set -CONFIG_MSA311=m +# CONFIG_MSA311 is not set # CONFIG_MS_BLOCK is not set CONFIG_MSDOS_FS=m CONFIG_MSDOS_PARTITION=y @@ -4207,6 +4244,8 @@ CONFIG_MT7915E=m CONFIG_MT7921E=m CONFIG_MT7921S=m CONFIG_MT7921U=m +CONFIG_MT7925E=m +CONFIG_MT7925U=m CONFIG_MT7996E=m # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AR7_PARTS is not set @@ -4273,10 +4312,11 @@ CONFIG_MTD_RAW_NAND=m # CONFIG_MTD_SHARPSL_PARTS is not set # CONFIG_MTD_SLRAM is not set # CONFIG_MTD_SPI_NAND is not set -# CONFIG_MTD_SPI_NOR is not set +CONFIG_MTD_SPI_NOR=m # CONFIG_MTD_SPI_NOR_SWP_DISABLE is not set CONFIG_MTD_SPI_NOR_SWP_DISABLE_ON_VOLATILE=y # CONFIG_MTD_SPI_NOR_SWP_KEEP is not set +CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y # CONFIG_MTD_SST25L is not set # CONFIG_MTD_SWAP is not set # CONFIG_MTD_TESTS is not set @@ -4358,9 +4398,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4433,12 +4470,12 @@ CONFIG_NETFILTER_EGRESS=y CONFIG_NETFILTER_INGRESS=y CONFIG_NETFILTER_NETLINK_ACCT=m # CONFIG_NETFILTER_NETLINK_GLUE_CT is not set -# CONFIG_NETFILTER_NETLINK_HOOK is not set +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_OSF=m CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_XTABLES_COMPAT=y +# CONFIG_NETFILTER_XTABLES_COMPAT is not set CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_CONNMARK=m CONFIG_NETFILTER_XT_MARK=m @@ -4531,6 +4568,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +CONFIG_NETKIT=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -4543,15 +4581,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NETROM=m # CONFIG_NET_SB1000 is not set -CONFIG_NET_SCH_ATM=m CONFIG_NET_SCH_CAKE=m -CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m # CONFIG_NET_SCH_DEFAULT is not set CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -4585,6 +4620,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_NET_TEAM_MODE_RANDOM=m CONFIG_NET_TEAM_MODE_ROUNDROBIN=m +CONFIG_NET_TEST=m CONFIG_NET_TULIP=y CONFIG_NET_UDP_TUNNEL=m CONFIG_NET_VENDOR_3COM=y @@ -4721,7 +4757,7 @@ CONFIG_NFC_ST21NFCA=m # CONFIG_NFC_ST_NCI_I2C is not set # CONFIG_NFC_ST_NCI_SPI is not set CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NFC_TRF7970A=m @@ -4927,8 +4963,9 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_SHIELD_FF=y CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y CONFIG_NVME_HWMON=y CONFIG_NVMEM_LAYOUT_ONIE_TLV=m CONFIG_NVMEM_LAYOUT_SL28_VPD=m @@ -4948,7 +4985,9 @@ CONFIG_NVME_TARGET=m CONFIG_NVME_TARGET_PASSTHRU=y CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set CONFIG_NVRAM=y CONFIG_NVSW_SN2201=m @@ -5085,6 +5124,7 @@ CONFIG_PCI_BIOS=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -5148,6 +5188,7 @@ CONFIG_PCMCIA_XIRC2PS=m CONFIG_PCMCIA_XIRCOM=m CONFIG_PCMCIA=y CONFIG_PCNET32=m +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -5179,8 +5220,6 @@ CONFIG_PHY_CADENCE_SIERRA=m CONFIG_PHY_CADENCE_TORRENT=m # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_HI3670_PCIE is not set # CONFIG_PHY_HI3670_USB is not set # CONFIG_PHY_INTEL_LGM_COMBO is not set @@ -5189,8 +5228,6 @@ CONFIG_PHY_CADENCE_TORRENT=m CONFIG_PHYLIB=y CONFIG_PHYLINK=m # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -5295,7 +5332,6 @@ CONFIG_POSIX_TIMERS=y CONFIG_POWERCAP=y CONFIG_POWER_MLXBF=m # CONFIG_POWER_RESET_BRCMKONA is not set -# CONFIG_POWER_RESET_BRCMSTB is not set # CONFIG_POWER_RESET_LINKSTATION is not set # CONFIG_POWER_RESET_LTC2952 is not set # CONFIG_POWER_RESET_REGULATOR is not set @@ -5550,7 +5586,7 @@ CONFIG_RD_ZSTD=y # CONFIG_READABLE_ASM is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_REALTEK_AUTOPM=y -CONFIG_REALTEK_PHY=y +CONFIG_REALTEK_PHY=m # CONFIG_REED_SOLOMON_TEST is not set # CONFIG_REGMAP_BUILD is not set CONFIG_REGMAP_I2C=y @@ -5585,6 +5621,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m CONFIG_REGULATOR_MAX20411=m CONFIG_REGULATOR_MAX5970=m CONFIG_REGULATOR_MAX597X=m +CONFIG_REGULATOR_MAX77503=m CONFIG_REGULATOR_MAX77650=m # CONFIG_REGULATOR_MAX77826 is not set CONFIG_REGULATOR_MAX77857=m @@ -5692,6 +5729,7 @@ CONFIG_RMI4_SPI=m CONFIG_RMNET=m # CONFIG_ROCKCHIP_PHY is not set CONFIG_ROCKER=m +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set CONFIG_ROHM_BU27034=m CONFIG_ROMFS_BACKED_BY_BLOCK=y @@ -5744,7 +5782,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_ABEOZ9=m CONFIG_RTC_DRV_ABX80X=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_CROS_EC=m @@ -5928,7 +5965,7 @@ CONFIG_SBP_TARGET=m # CONFIG_SC1200_WDT is not set CONFIG_SC92031=m # CONFIG_SCA3000 is not set -CONFIG_SCA3300=m +# CONFIG_SCA3300 is not set CONFIG_SCD30_CORE=m CONFIG_SCD30_I2C=m CONFIG_SCD30_SERIAL=m @@ -6060,11 +6097,12 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y # CONFIG_SECURITY_APPARMOR is not set -# CONFIG_SECURITY_DMESG_RESTRICT is not set +CONFIG_SECURITY_DMESG_RESTRICT=y CONFIG_SECURITYFS=y CONFIG_SECURITY_INFINIBAND=y CONFIG_SECURITY_LANDLOCK=y @@ -6219,6 +6257,7 @@ CONFIG_SENSORS_LTC2947_SPI=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set CONFIG_SENSORS_LTC2990=m +CONFIG_SENSORS_LTC2991=m # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m CONFIG_SENSORS_LTC4151=m @@ -6280,6 +6319,7 @@ CONFIG_SENSORS_PLI1209BC=m CONFIG_SENSORS_PLI1209BC_REGULATOR=y CONFIG_SENSORS_PM6764TR=m CONFIG_SENSORS_PMBUS=m +CONFIG_SENSORS_POWERZ=m CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -6462,7 +6502,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLS=y CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set @@ -6499,6 +6538,9 @@ CONFIG_SND_ALOOP=m CONFIG_SND_ALS300=m CONFIG_SND_ALS4000=m CONFIG_SND_AMD_ACP_CONFIG=m +# CONFIG_SND_AMD_ASOC_ACP63 is not set +# CONFIG_SND_AMD_ASOC_REMBRANDT is not set +# CONFIG_SND_AMD_ASOC_RENOIR is not set CONFIG_SND_ASIHPI=m CONFIG_SND_ATIIXP=m CONFIG_SND_ATIIXP_MODEM=m @@ -6558,6 +6600,7 @@ CONFIG_SND_FM801=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -6689,14 +6732,18 @@ CONFIG_SND_SOC_AMD_ACP5x=m CONFIG_SND_SOC_AMD_ACP6x=m CONFIG_SND_SOC_AMD_ACP_COMMON=m CONFIG_SND_SOC_AMD_ACP=m +# CONFIG_SND_SOC_AMD_ACP_PCI is not set CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m +# CONFIG_SND_SOC_AMD_LEGACY_MACH is not set +CONFIG_SND_SOC_AMD_MACH_COMMON=m CONFIG_SND_SOC_AMD_PS=m CONFIG_SND_SOC_AMD_PS_MACH=m CONFIG_SND_SOC_AMD_RENOIR=m CONFIG_SND_SOC_AMD_RENOIR_MACH=m CONFIG_SND_SOC_AMD_RPL_ACP6x=m CONFIG_SND_SOC_AMD_RV_RT5682_MACH=m +CONFIG_SND_SOC_AMD_SOF_MACH=m CONFIG_SND_SOC_AMD_ST_ES8336_MACH=m CONFIG_SND_SOC_AMD_VANGOGH_MACH=m CONFIG_SND_SOC_AMD_YC_MACH=m @@ -6704,8 +6751,10 @@ CONFIG_SND_SOC_AMD_YC_MACH=m # CONFIG_SND_SOC_ARNDALE is not set CONFIG_SND_SOC_AUDIO_IIO_AUX=m CONFIG_SND_SOC_AW8738=m +CONFIG_SND_SOC_AW87390=m CONFIG_SND_SOC_AW88261=m CONFIG_SND_SOC_AW88395=m +CONFIG_SND_SOC_AW88399=m CONFIG_SND_SOC_BD28623=m CONFIG_SND_SOC_BT_SCO=m CONFIG_SND_SOC_CHV3_CODEC=m @@ -6800,6 +6849,7 @@ CONFIG_SND_SOC_INTEL_AVS_MACH_PROBE=m CONFIG_SND_SOC_INTEL_AVS_MACH_RT274=m CONFIG_SND_SOC_INTEL_AVS_MACH_RT286=m CONFIG_SND_SOC_INTEL_AVS_MACH_RT298=m +CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514=m CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663=m CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682=m CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567=m @@ -6840,6 +6890,7 @@ CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y CONFIG_SND_SOC_INTEL_SKYLAKE=m CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m +CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH=m CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH=m CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH=m @@ -6915,12 +6966,6 @@ CONFIG_SND_SOC_PCM512x=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set CONFIG_SND_SOC_RL6231=m -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set CONFIG_SND_SOC_RT1017_SDCA_SDW=m CONFIG_SND_SOC_RT1308=m CONFIG_SND_SOC_RT1308_SDW=m @@ -6943,6 +6988,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m CONFIG_SND_SOC_RT715_SDW=m CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +CONFIG_SND_SOC_RTQ9128=m # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6960,6 +7006,7 @@ CONFIG_SND_SOC_SMA1303=m # CONFIG_SND_SOC_SNOW is not set CONFIG_SND_SOC_SOF_ACPI=m CONFIG_SND_SOC_SOF_ALDERLAKE=m +CONFIG_SND_SOC_SOF_AMD_ACP63=m CONFIG_SND_SOC_SOF_AMD_REMBRANDT=m CONFIG_SND_SOC_SOF_AMD_RENOIR=m CONFIG_SND_SOC_SOF_AMD_TOPLEVEL=m @@ -7200,11 +7247,15 @@ CONFIG_SPI_FSL_LPSPI=m # CONFIG_SPI_GPIO is not set # CONFIG_SPI_HISI_KUNPENG is not set # CONFIG_SPI_HISI_SFC_V3XX is not set +CONFIG_SPI_INTEL=m +CONFIG_SPI_INTEL_PCI=m +# CONFIG_SPI_INTEL_PLATFORM is not set # CONFIG_SPI_LANTIQ_SSC is not set +CONFIG_SPI_LJCA=m # CONFIG_SPI_LM70_LLP is not set # CONFIG_SPI_LOOPBACK_TEST is not set CONFIG_SPI_MASTER=y -# CONFIG_SPI_MEM is not set +CONFIG_SPI_MEM=y CONFIG_SPI_MICROCHIP_CORE=m CONFIG_SPI_MICROCHIP_CORE_QSPI=m CONFIG_SPI_MUX=m @@ -7402,6 +7453,7 @@ CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX_DEBUG is not set CONFIG_TCM_QLA2XXX=m CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -7456,6 +7508,7 @@ CONFIG_TEST_LOCKUP=m # CONFIG_TEST_MEMINIT is not set CONFIG_TEST_MIN_HEAP=m # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -7526,8 +7579,6 @@ CONFIG_TIFM_7XX1=m CONFIG_TIFM_CORE=m CONFIG_TIGON3_HWMON=y CONFIG_TIGON3=m -# CONFIG_TI_ICSSG_PRUETH is not set -CONFIG_TI_ICSS_IEP=m CONFIG_TI_LMP92064=m CONFIG_TIME_KUNIT_TEST=m CONFIG_TIME_NS=y @@ -7720,6 +7771,7 @@ CONFIG_TYPEC_MUX_GPIO_SBU=m CONFIG_TYPEC_MUX_INTEL_PMC=m CONFIG_TYPEC_MUX_NB7VPQ904M=m CONFIG_TYPEC_MUX_PI3USB30532=m +CONFIG_TYPEC_MUX_PTN36502=m CONFIG_TYPEC_NVIDIA_ALTMODE=m # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -7813,6 +7865,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_PCI=m CONFIG_USB_CONFIGFS_F_MIDI2=y # CONFIG_USB_CONFIGFS_F_UAC1_LEGACY is not set @@ -7945,6 +7998,7 @@ CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LGM_PHY is not set # CONFIG_USB_LINK_LAYER_TEST is not set +CONFIG_USB_LJCA=m CONFIG_USB_M5602=m CONFIG_USB_MA901=m # CONFIG_USB_MASS_STORAGE is not set @@ -7994,6 +8048,7 @@ CONFIG_USB_ONBOARD_HUB=m # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y CONFIG_USBPCWATCHDOG=m CONFIG_USB_PEGASUS=m @@ -8206,7 +8261,7 @@ CONFIG_VIDEO_BT819=m CONFIG_VIDEO_BT848=m CONFIG_VIDEO_BT856=m CONFIG_VIDEO_BT866=m -CONFIG_VIDEO_CADENCE_CSI2RX=m +# CONFIG_VIDEO_CADENCE_CSI2RX is not set CONFIG_VIDEO_CADENCE_CSI2TX=m # CONFIG_VIDEO_CADENCE is not set # CONFIG_VIDEO_CAFE_CCIC is not set @@ -8285,10 +8340,12 @@ CONFIG_VIDEO_M52790=m CONFIG_VIDEO_MAX9286=m # CONFIG_VIDEO_MAX96712 is not set # CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set +# CONFIG_VIDEO_MGB4 is not set CONFIG_VIDEO_ML86V7667=m CONFIG_VIDEO_MSP3400=m CONFIG_VIDEO_MT9M001=m # CONFIG_VIDEO_MT9M111 is not set +CONFIG_VIDEO_MT9M114=m CONFIG_VIDEO_MT9P031=m CONFIG_VIDEO_MT9T112=m CONFIG_VIDEO_MT9V011=m @@ -8666,6 +8723,7 @@ CONFIG_XEN_NETDEV_BACKEND=m CONFIG_XEN_NETDEV_FRONTEND=m CONFIG_XEN_PCIDEV_BACKEND=m CONFIG_XEN_PCIDEV_FRONTEND=m +CONFIG_XEN_PRIVCMD_EVENTFD=y CONFIG_XEN_PRIVCMD_IRQFD=y CONFIG_XEN_PRIVCMD=m # CONFIG_XEN_PVCALLS_BACKEND is not set @@ -8792,22 +8850,13 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m CONFIG_LEGION_LAPTOP=m CONFIG_ACPI_CALL=m CONFIG_MFD_STEAMDECK=m @@ -8843,16 +8892,17 @@ CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2 # CONFIG_USB_DUMMY_HCD is not set # CONFIG_USB_CONFIGFS is not set # CONFIG_PHY_SAMSUNG_USB2 is not set -CONFIG_SND_SOC_AMD_SOF_MACH=m -CONFIG_SND_SOC_AMD_MACH_COMMON=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y CONFIG_SND_SOC_SOF=m CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE=y CONFIG_SND_SOC_SOF_IPC3=y CONFIG_SND_SOC_SOF_INTEL_IPC4=y CONFIG_SND_SOC_SOF_AMD_COMMON=m -CONFIG_SND_SOC_SOF_AMD_ACP63=m -# CONFIG_SND_SOC_AMD_ACP_PCI is not set -# CONFIG_SND_AMD_ASOC_RENOIR is not set -# CONFIG_SND_AMD_ASOC_REMBRANDT is not set -# CONFIG_SND_SOC_AMD_LEGACY_MACH is not set CONFIG_SND_SOC_TOPOLOGY=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-x86_64-debug-rhel.config b/SOURCES/kernel-x86_64-debug-rhel.config index 9ad5728..46a8f09 100644 --- a/SOURCES/kernel-x86_64-debug-rhel.config +++ b/SOURCES/kernel-x86_64-debug-rhel.config @@ -258,7 +258,6 @@ CONFIG_AQUANTIA_PHY=m # CONFIG_ARCH_BITMAIN is not set # CONFIG_ARCH_KEEMBAY is not set # CONFIG_ARCH_LG1K is not set -# CONFIG_ARCH_MA35 is not set # CONFIG_ARCH_MEMORY_PROBE is not set # CONFIG_ARCH_MESON is not set CONFIG_ARCH_MMAP_RND_BITS=28 @@ -304,6 +303,7 @@ CONFIG_ASUS_NB_WMI=m # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set CONFIG_ASUS_WMI=m +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -417,6 +417,7 @@ CONFIG_BASE_FULL=y # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -545,7 +546,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_BRCM_TRACING=y # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -640,7 +640,6 @@ CONFIG_CALL_DEPTH_TRACKING=y # CONFIG_CALL_THUNKS_DEBUG is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -710,6 +709,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -787,6 +787,7 @@ CONFIG_CIFS_SMB_DIRECT=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -862,7 +863,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -936,7 +936,6 @@ CONFIG_CRYPTO_ADIANTUM=m # CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set # CONFIG_CRYPTO_AEGIS128 is not set # CONFIG_CRYPTO_AES_ARM64 is not set -CONFIG_CRYPTO_AES_GCM_P10=y CONFIG_CRYPTO_AES_NI_INTEL=y # CONFIG_CRYPTO_AES_TI is not set CONFIG_CRYPTO_AES=y @@ -963,7 +962,6 @@ CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_CCM=y CONFIG_CRYPTO_CFB=y CONFIG_CRYPTO_CHACHA20=m -# CONFIG_CRYPTO_CHACHA20_P10 is not set CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CHACHA20_X86_64=y CONFIG_CRYPTO_CMAC=y @@ -1036,6 +1034,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1060,7 +1063,6 @@ CONFIG_CRYPTO_OFB=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_POLY1305=m -# CONFIG_CRYPTO_POLY1305_P10 is not set CONFIG_CRYPTO_POLY1305_X86_64=y # CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set CONFIG_CRYPTO_RMD160=m @@ -1356,6 +1358,7 @@ CONFIG_DP83TC811_PHY=m CONFIG_DPTF_PCH_FIVR=m CONFIG_DPTF_POWER=m # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1431,6 +1434,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set # CONFIG_DRM_IMX_LCDIF is not set # CONFIG_DRM_ITE_IT6505 is not set # CONFIG_DRM_ITE_IT66121 is not set @@ -1457,36 +1461,90 @@ CONFIG_DRM_NOUVEAU=m # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1681,7 +1739,6 @@ CONFIG_EEPROM_93CX6=m # CONFIG_EEPROM_AT25 is not set # CONFIG_EEPROM_EE1004 is not set # CONFIG_EEPROM_IDT_89HPESX is not set -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1721,7 +1778,12 @@ CONFIG_ENIC=m CONFIG_EPIC100=m CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y CONFIG_ETHOC=m CONFIG_ETHTOOL_NETLINK=y @@ -1792,7 +1854,7 @@ CONFIG_FAULT_INJECTION=y # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -1891,7 +1953,9 @@ CONFIG_FS_DAX=y # CONFIG_FSL_QDMA is not set # CONFIG_FSL_RCPM is not set CONFIG_FSNOTIFY=y -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_RECORD_RECURSION is not set @@ -1899,6 +1963,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +# CONFIG_FUEL_GAUGE_MM8013 is not set CONFIG_FUJITSU_ES=m CONFIG_FUJITSU_LAPTOP=m CONFIG_FUJITSU_TABLET=m @@ -2041,6 +2106,7 @@ CONFIG_GPIO_SIM=m # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set CONFIG_GUP_TEST=y CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2226,6 +2292,7 @@ CONFIG_HMM_MIRROR=y # CONFIG_HNS3_PMU is not set # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2381,6 +2448,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set CONFIG_IA32_EMULATION=y # CONFIG_IAQCORE is not set CONFIG_IAVF=m @@ -2397,6 +2465,7 @@ CONFIG_ICPLUS_PHY=m CONFIG_IDEAPAD_LAPTOP=m CONFIG_IDLE_INJECT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m # CONFIG_IE6XX_WDT is not set CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set @@ -2468,7 +2537,6 @@ CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2586,6 +2654,7 @@ CONFIG_INPUT_UINPUT=m CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m CONFIG_INPUT=y CONFIG_INPUT_YEALINK=m +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set CONFIG_INT340X_THERMAL=m CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y @@ -2654,6 +2723,7 @@ CONFIG_INTEL_SDSI=m CONFIG_INTEL_SPEED_SELECT_INTERFACE=m CONFIG_INTEL_TCC_COOLING=m CONFIG_INTEL_TDX_GUEST=y +# CONFIG_INTEL_TDX_HOST is not set CONFIG_INTEL_TH_ACPI=m # CONFIG_INTEL_TH_DEBUG is not set CONFIG_INTEL_TH_GTH=m @@ -2688,7 +2758,8 @@ CONFIG_IOMMU_DEBUGFS=y CONFIG_IOMMU_DEFAULT_DMA_LAZY=y # CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -2989,7 +3060,7 @@ CONFIG_KEY_NOTIFICATIONS=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set # CONFIG_KFENCE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3023,6 +3094,7 @@ CONFIG_KVM_AMD_SEV=y CONFIG_KVM_GUEST=y CONFIG_KVM_INTEL=m CONFIG_KVM=m +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_MMU_AUDIT=y CONFIG_KVM_PROVE_MMU=y CONFIG_KVM_SMM=y @@ -3190,6 +3262,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3204,6 +3277,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3212,6 +3286,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m # CONFIG_MAC80211_MESH is not set @@ -3241,7 +3316,6 @@ CONFIG_MANTIS_CORE=m CONFIG_MARVELL_10G_PHY=m # CONFIG_MARVELL_88Q2XXX_PHY is not set # CONFIG_MARVELL_88X2222_PHY is not set -CONFIG_MARVELL_GTI_WDT=y CONFIG_MARVELL_PHY=m # CONFIG_MATOM is not set # CONFIG_MAX1027 is not set @@ -3277,6 +3351,7 @@ CONFIG_MAXSMP=y # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3300,7 +3375,7 @@ CONFIG_MDIO_HISI_FEMAC=m # CONFIG_MDIO_IPQ8064 is not set CONFIG_MDIO_MSCC_MIIM=m # CONFIG_MDIO_MVUSB is not set -CONFIG_MDIO_OCTEON=m +# CONFIG_MDIO_OCTEON is not set CONFIG_MDIO_THUNDER=m CONFIG_MD_LINEAR=m # CONFIG_MD_MULTIPATH is not set @@ -3309,6 +3384,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3375,7 +3451,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3520,6 +3596,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3629,6 +3706,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3706,6 +3786,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3816,9 +3898,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -3940,6 +4019,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -3952,15 +4032,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -3989,6 +4066,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -4088,7 +4166,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4285,8 +4363,9 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_IMX_OCOTP_ELE is not set # CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set @@ -4308,7 +4387,9 @@ CONFIG_NVME_TARGET=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set CONFIG_NVRAM=y CONFIG_NVSW_SN2201=m @@ -4424,6 +4505,7 @@ CONFIG_PARTITION_ADVANCED=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -4472,6 +4554,7 @@ CONFIG_PCI_STUB=y # CONFIG_PCI_SW_SWITCHTEC is not set CONFIG_PCI=y # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4501,8 +4584,6 @@ CONFIG_PHY_BCM_SR_USB=m # CONFIG_PHY_CADENCE_TORRENT is not set # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_FSL_LYNX_28G is not set # CONFIG_PHY_HI3660_USB is not set # CONFIG_PHY_HI3670_PCIE is not set @@ -4514,8 +4595,6 @@ CONFIG_PHY_BCM_SR_USB=m CONFIG_PHYLIB=y CONFIG_PHYLINK=m # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -4547,6 +4626,7 @@ CONFIG_PINCTRL_BROXTON=m CONFIG_PINCTRL_CANNONLAKE=m CONFIG_PINCTRL_CEDARFORK=m # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_DENVERTON=m CONFIG_PINCTRL_ELKHARTLAKE=m @@ -4624,7 +4704,6 @@ CONFIG_POWERCAP=y CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set # CONFIG_POWER_RESET_GPIO_RESTART is not set # CONFIG_POWER_RESET_LTC2952 is not set # CONFIG_POWER_RESET_REGULATOR is not set @@ -4788,7 +4867,6 @@ CONFIG_QLA3XXX=m # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set CONFIG_QRTR=m CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -4890,6 +4968,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m # CONFIG_REGULATOR_LTC3589 is not set # CONFIG_REGULATOR_LTC3676 is not set # CONFIG_REGULATOR_MAX1586 is not set +# CONFIG_REGULATOR_MAX77503 is not set # CONFIG_REGULATOR_MAX77857 is not set # CONFIG_REGULATOR_MAX8649 is not set # CONFIG_REGULATOR_MAX8660 is not set @@ -4965,6 +5044,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -5004,7 +5084,6 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_DRV_ABEOZ9 is not set # CONFIG_RTC_DRV_ABX80X is not set CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5316,6 +5395,7 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5472,6 +5552,7 @@ CONFIG_SENSORS_LM95245=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set # CONFIG_SENSORS_LTC3815 is not set CONFIG_SENSORS_LTC4151=m @@ -5530,6 +5611,7 @@ CONFIG_SENSORS_PCF8591=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set CONFIG_SENSORS_PMBUS=m +# CONFIG_SENSORS_POWERZ is not set # CONFIG_SENSORS_POWR1220 is not set # CONFIG_SENSORS_PWM_FAN is not set # CONFIG_SENSORS_PXE1610 is not set @@ -5699,7 +5781,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLS=y CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set @@ -5781,6 +5862,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -5923,8 +6005,10 @@ CONFIG_SND_SOC_AMD_YC_MACH=m # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -6016,6 +6100,7 @@ CONFIG_SND_SOC_INTEL_AVS=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -6056,6 +6141,7 @@ CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y CONFIG_SND_SOC_INTEL_SKYLAKE=m CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m +CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH=m CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH=m CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH=m @@ -6131,12 +6217,6 @@ CONFIG_SND_SOC_PCM512x=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set CONFIG_SND_SOC_RL6231=m -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set CONFIG_SND_SOC_RT1308=m CONFIG_SND_SOC_RT1308_SDW=m @@ -6159,6 +6239,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m CONFIG_SND_SOC_RT715_SDW=m CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6356,7 +6437,6 @@ CONFIG_SND_X86=y CONFIG_SND_XEN_FRONTEND=m # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set # CONFIG_SOC_TI is not set CONFIG_SOFTLOCKUP_DETECTOR=y @@ -6574,6 +6654,7 @@ CONFIG_TCM_IBLOCK=m CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6627,6 +6708,7 @@ CONFIG_TEST_LIVEPATCH=m # CONFIG_TEST_MEMINIT is not set CONFIG_TEST_MIN_HEAP=m # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -6856,6 +6938,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -6931,6 +7014,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m # CONFIG_USB_CONN_GPIO is not set CONFIG_USB_CXACRU=m # CONFIG_USB_CYPRESS_CY7C63 is not set @@ -7029,6 +7113,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LGM_PHY is not set # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -7071,6 +7156,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -7220,6 +7306,9 @@ CONFIG_VETH=m # CONFIG_VF610_DAC is not set CONFIG_VFAT_FS=m # CONFIG_VFIO_AMBA is not set +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO=m CONFIG_VFIO_MDEV=m @@ -7335,11 +7424,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -7754,22 +7845,13 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m CONFIG_LEGION_LAPTOP=m CONFIG_ACPI_CALL=m CONFIG_IIO_HRTIMER_TRIGGER=m @@ -7811,6 +7893,7 @@ CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2 # CONFIG_USB_DUMMY_HCD is not set # CONFIG_USB_CONFIGFS is not set # CONFIG_PHY_SAMSUNG_USB2 is not set +CONFIG_DRM_AMD_COLOR_STEAMDECK=y CONFIG_SND_SOC_AMD_SOF_MACH=m CONFIG_SND_SOC_AMD_MACH_COMMON=m CONFIG_SND_SOC_SOF=m @@ -7824,3 +7907,10 @@ CONFIG_SND_SOC_SOF_AMD_ACP63=m # CONFIG_SND_AMD_ASOC_REMBRANDT is not set # CONFIG_SND_SOC_AMD_LEGACY_MACH is not set CONFIG_SND_SOC_TOPOLOGY=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-x86_64-fedora.config b/SOURCES/kernel-x86_64-fedora.config index 212554b..2b338b9 100644 --- a/SOURCES/kernel-x86_64-fedora.config +++ b/SOURCES/kernel-x86_64-fedora.config @@ -188,16 +188,16 @@ CONFIG_ADVANTECH_EC_WDT=m # CONFIG_ADVANTECH_WDT is not set CONFIG_ADVISE_SYSCALLS=y CONFIG_ADV_SWBUTTON=m -CONFIG_ADXL313_I2C=m -CONFIG_ADXL313_SPI=m +# CONFIG_ADXL313_I2C is not set +# CONFIG_ADXL313_SPI is not set # CONFIG_ADXL345_I2C is not set # CONFIG_ADXL345_SPI is not set -CONFIG_ADXL355_I2C=m -CONFIG_ADXL355_SPI=m -CONFIG_ADXL367_I2C=m -CONFIG_ADXL367_SPI=m -CONFIG_ADXL372_I2C=m -CONFIG_ADXL372_SPI=m +# CONFIG_ADXL355_I2C is not set +# CONFIG_ADXL355_SPI is not set +# CONFIG_ADXL367_I2C is not set +# CONFIG_ADXL367_SPI is not set +# CONFIG_ADXL372_I2C is not set +# CONFIG_ADXL372_SPI is not set CONFIG_ADXRS290=m # CONFIG_ADXRS450 is not set # CONFIG_AFE4403 is not set @@ -343,6 +343,7 @@ CONFIG_ASUS_NB_WMI=m CONFIG_ASUS_TF103C_DOCK=m CONFIG_ASUS_WIRELESS=m CONFIG_ASUS_WMI=m +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -532,6 +533,15 @@ CONFIG_BAYCOM_SER_HDX=m # CONFIG_BCACHE_ASYNC_REGISTRATION is not set # CONFIG_BCACHE_CLOSURES_DEBUG is not set # CONFIG_BCACHE_DEBUG is not set +# CONFIG_BCACHEFS_DEBUG is not set +# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set +# CONFIG_BCACHEFS_ERASURE_CODING is not set +CONFIG_BCACHEFS_FS=m +# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set +# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set +CONFIG_BCACHEFS_POSIX_ACL=y +CONFIG_BCACHEFS_QUOTA=y +# CONFIG_BCACHEFS_TESTS is not set CONFIG_BCACHE=m CONFIG_BCM54140_PHY=m CONFIG_BCM7XXX_PHY=m @@ -674,7 +684,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set # CONFIG_BRCM_TRACING is not set CONFIG_BRIDGE_CFM=y CONFIG_BRIDGE_EBT_802_3=m @@ -786,7 +795,6 @@ CONFIG_CALL_DEPTH_TRACKING=y # CONFIG_CALL_THUNKS_DEBUG is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -CONFIG_CAN_BXCAN=m CONFIG_CAN_CALC_BITTIMING=y CONFIG_CAN_CAN327=m # CONFIG_CAN_CC770 is not set @@ -866,6 +874,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFI_CLANG is not set CONFIG_CFS_BANDWIDTH=y @@ -958,6 +967,7 @@ CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CIO2_BRIDGE=y CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -1037,7 +1047,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=3 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -1115,6 +1124,7 @@ CONFIG_CROS_EC_TYPEC=m CONFIG_CROS_EC_UART=m CONFIG_CROS_HPS_I2C=m CONFIG_CROS_KBD_LED_BACKLIGHT=m +CONFIG_CROS_KUNIT_EC_PROTO_TEST=m CONFIG_CROS_KUNIT=m CONFIG_CROSS_MEMORY_ATTACH=y CONFIG_CROS_TYPEC_SWITCH=m @@ -1222,6 +1232,11 @@ CONFIG_CRYPTO_GHASH=y CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1313,7 +1328,7 @@ CONFIG_CXL_PMU=m # CONFIG_CXL_REGION_INVALIDATION_TEST is not set CONFIG_CXL_REGION=y CONFIG_DA280=m -CONFIG_DA311=m +# CONFIG_DA311 is not set CONFIG_DAMON_DBGFS=y # CONFIG_DAMON_LRU_SORT is not set CONFIG_DAMON_PADDR=y @@ -1332,6 +1347,7 @@ CONFIG_DE2104X=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_CGROUP_REF is not set +# CONFIG_DEBUG_CLOSURES is not set # CONFIG_DEBUG_CREDENTIALS is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_DRIVER is not set @@ -1436,7 +1452,6 @@ CONFIG_DELL_WMI=m CONFIG_DELL_WMI_PRIVACY=y CONFIG_DELL_WMI_SYSMAN=m # CONFIG_DETECT_HUNG_TASK is not set -# CONFIG_DEV_APPLETALK is not set CONFIG_DEV_DAX_CXL=m CONFIG_DEV_DAX_HMEM=m CONFIG_DEV_DAX_KMEM=m @@ -1484,7 +1499,7 @@ CONFIG_DMA_NUMA_CMA=y # CONFIG_DMAPOOL_TEST is not set # CONFIG_DMARD06 is not set # CONFIG_DMARD09 is not set -CONFIG_DMARD10=m +# CONFIG_DMARD10 is not set # CONFIG_DMA_RESTRICTED_POOL is not set # CONFIG_DMATEST is not set CONFIG_DM_CACHE=m @@ -1532,7 +1547,7 @@ CONFIG_DNS_RESOLVER=m CONFIG_DP83640_PHY=m CONFIG_DP83822_PHY=m CONFIG_DP83848_PHY=m -# CONFIG_DP83867_PHY is not set +CONFIG_DP83867_PHY=m CONFIG_DP83869_PHY=m # CONFIG_DP83TC811_PHY is not set # CONFIG_DP83TD510_PHY is not set @@ -1543,6 +1558,7 @@ CONFIG_DPTF_PCH_FIVR=m CONFIG_DPTF_POWER=m CONFIG_DRAGONRISE_FF=y # CONFIG_DRBD_FAULT_INJECTION is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m CONFIG_DRM_ACCEL_HABANALABS=m CONFIG_DRM_ACCEL_IVPU=m CONFIG_DRM_ACCEL_QAIC=m @@ -1661,9 +1677,11 @@ CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D=m # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +CONFIG_DRM_PANEL_ILITEK_ILI9882T=m CONFIG_DRM_PANEL_INNOLUX_EJ030NA=m # CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set CONFIG_DRM_PANEL_JADARD_JD9365DA_H3=m +CONFIG_DRM_PANEL_JDI_LPM102A188A=m # CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set CONFIG_DRM_PANEL_JDI_R63452=m # CONFIG_DRM_PANEL_KHADAS_TS050 is not set @@ -1693,6 +1711,7 @@ CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set # CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set # CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m CONFIG_DRM_PANEL_RONBO_RB070D30=m CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=m CONFIG_DRM_PANEL_SAMSUNG_DB7430=m @@ -1929,7 +1948,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_EEPROM_AT25 is not set CONFIG_EEPROM_EE1004=m CONFIG_EEPROM_IDT_89HPESX=m -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m # CONFIG_EFI_BOOTLOADER_CONTROL is not set # CONFIG_EFI_CAPSULE_LOADER is not set @@ -2059,7 +2077,7 @@ CONFIG_FAT_KUNIT_TEST=m # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -2197,6 +2215,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +CONFIG_FUEL_GAUGE_MM8013=m CONFIG_FUJITSU_ES=m CONFIG_FUJITSU_LAPTOP=m CONFIG_FUJITSU_TABLET=m @@ -2305,6 +2324,7 @@ CONFIG_GPIO_IT87=m # CONFIG_GPIO_LATCH is not set CONFIG_GPIOLIB_FASTPATH_LIMIT=512 CONFIG_GPIOLIB=y +CONFIG_GPIO_LJCA=m # CONFIG_GPIO_LOGICVC is not set # CONFIG_GPIO_MAX3191X is not set # CONFIG_GPIO_MAX7300 is not set @@ -2648,6 +2668,7 @@ CONFIG_I2C_HID=y CONFIG_I2C_I801=m CONFIG_I2C_ISCH=m CONFIG_I2C_ISMT=m +CONFIG_I2C_LJCA=m CONFIG_I2C_MLXBF=m CONFIG_I2C_MLXCPLD=m CONFIG_I2C_MULTI_INSTANTIATE=m @@ -2700,6 +2721,7 @@ CONFIG_I40EVF=m CONFIG_I6300ESB_WDT=m CONFIG_I82092=m # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set CONFIG_IA32_EMULATION=y # CONFIG_IAQCORE is not set CONFIG_IB700_WDT=m @@ -2715,6 +2737,7 @@ CONFIG_ICPLUS_PHY=m CONFIG_IDEAPAD_LAPTOP=m CONFIG_IDLE_INJECT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m CONFIG_IE6XX_WDT=m CONFIG_IEEE802154_6LOWPAN=m CONFIG_IEEE802154_ADF7242=m @@ -2756,8 +2779,8 @@ CONFIG_IIO_FORMAT_KUNIT_TEST=m CONFIG_IIO_HRTIMER_TRIGGER=m CONFIG_IIO_INTERRUPT_TRIGGER=m CONFIG_IIO_KFIFO_BUF=m -CONFIG_IIO_KX022A_I2C=m -CONFIG_IIO_KX022A_SPI=m +# CONFIG_IIO_KX022A_I2C is not set +# CONFIG_IIO_KX022A_SPI is not set CONFIG_IIO=m CONFIG_IIO_MUX=m CONFIG_IIO_RESCALE_KUNIT_TEST=m @@ -2766,7 +2789,7 @@ CONFIG_IIO_RESCALE=m # CONFIG_IIO_SSP_SENSORHUB is not set CONFIG_IIO_ST_ACCEL_3AXIS=m CONFIG_IIO_ST_ACCEL_I2C_3AXIS=m -CONFIG_IIO_ST_ACCEL_SPI_3AXIS=m +# CONFIG_IIO_ST_ACCEL_SPI_3AXIS is not set CONFIG_IIO_ST_GYRO_3AXIS=m CONFIG_IIO_ST_GYRO_I2C_3AXIS=m CONFIG_IIO_ST_GYRO_SPI_3AXIS=m @@ -2806,7 +2829,6 @@ CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y # CONFIG_IMA_SIG_TEMPLATE is not set # CONFIG_IMA_TEMPLATE is not set -# CONFIG_IMA_TRUSTED_KEYRING is not set CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2935,6 +2957,7 @@ CONFIG_INPUT_WISTRON_BTNS=m CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m CONFIG_INPUT=y CONFIG_INPUT_YEALINK=m +CONFIG_INSPUR_PLATFORM_PROFILE=m CONFIG_INT3406_THERMAL=m CONFIG_INT340X_THERMAL=m CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y @@ -3017,6 +3040,7 @@ CONFIG_INTEL_SOC_PMIC=y CONFIG_INTEL_SPEED_SELECT_INTERFACE=m CONFIG_INTEL_TCC_COOLING=m CONFIG_INTEL_TDX_GUEST=y +CONFIG_INTEL_TDX_HOST=y CONFIG_INTEL_TELEMETRY=m CONFIG_INTEL_TH_ACPI=m # CONFIG_INTEL_TH_DEBUG is not set @@ -3089,8 +3113,6 @@ CONFIG_IP6_NF_TARGET_SYNPROXY=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IPC_NS=y # CONFIG_IP_DCCP is not set -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP=m CONFIG_IP_FIB_TRIE_STATS=y # CONFIG_IPMB_DEVICE_INTERFACE is not set CONFIG_IPMI_DEVICE_INTERFACE=m @@ -3405,7 +3427,7 @@ CONFIG_KEY_NOTIFICATIONS=y CONFIG_KEYS_REQUEST_CACHE=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3444,6 +3466,7 @@ CONFIG_KVM_AMD_SEV=y CONFIG_KVM_GUEST=y CONFIG_KVM_INTEL=m CONFIG_KVM=m +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_MMU_AUDIT=y # CONFIG_KVM_PROVE_MMU is not set CONFIG_KVM_SMM=y @@ -3502,6 +3525,7 @@ CONFIG_LEDS_GROUP_MULTICOLOR=m CONFIG_LEDS_INTEL_SS4200=m # CONFIG_LEDS_IS31FL319X is not set CONFIG_LEDS_IS31FL32XX=m +CONFIG_LEDS_KTD202X=m # CONFIG_LEDS_KTD2692 is not set # CONFIG_LEDS_LGM is not set CONFIG_LEDS_LM3530=m @@ -3631,6 +3655,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf,landlock" CONFIG_LSM_MMAP_MIN_ADDR=65535 CONFIG_LTC1660=m +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3645,6 +3670,7 @@ CONFIG_LTO_NONE=y CONFIG_LTR501=m CONFIG_LTRF216A=m CONFIG_LV0104CS=m +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3653,6 +3679,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y @@ -3721,6 +3748,7 @@ CONFIG_MB1232=m # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set CONFIG_MCP3911=m CONFIG_MCP4018=m CONFIG_MCP41010=m @@ -3731,6 +3759,7 @@ CONFIG_MCP4728=m # CONFIG_MCP4922 is not set CONFIG_MCTP_SERIAL=m # CONFIG_MCTP_TRANSPORT_I2C is not set +# CONFIG_MCTP_TRANSPORT_I3C is not set CONFIG_MCTP=y CONFIG_MD_AUTODETECT=y CONFIG_MD_BITMAP_FILE=y @@ -3760,6 +3789,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m CONFIG_MEDIA_ANALOG_TV_SUPPORT=y CONFIG_MEDIA_ATTACH=y @@ -3987,18 +4017,22 @@ CONFIG_MLX4_DEBUG=y CONFIG_MLX4_EN_DCB=y CONFIG_MLX4_EN=m CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_ACCEL=y CONFIG_MLX5_CLS_ACT=y CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y CONFIG_MLX5_EN_RXNFC=y CONFIG_MLX5_EN_TLS=y CONFIG_MLX5_ESWITCH=y -# CONFIG_MLX5_FPGA is not set +# CONFIG_MLX5_FPGA_IPSEC is not set +# CONFIG_MLX5_FPGA_TLS is not set +CONFIG_MLX5_FPGA=y CONFIG_MLX5_INFINIBAND=m CONFIG_MLX5_IPSEC=y CONFIG_MLX5_MACSEC=y @@ -4100,6 +4134,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -4155,7 +4192,7 @@ CONFIG_MQ_IOSCHED_DEADLINE=y CONFIG_MQ_IOSCHED_KYBER=y # CONFIG_MS5611 is not set # CONFIG_MS5637 is not set -CONFIG_MSA311=m +# CONFIG_MSA311 is not set # CONFIG_MS_BLOCK is not set CONFIG_MSDOS_FS=m CONFIG_MSDOS_PARTITION=y @@ -4181,6 +4218,8 @@ CONFIG_MT7915E=m CONFIG_MT7921E=m CONFIG_MT7921S=m CONFIG_MT7921U=m +CONFIG_MT7925E=m +CONFIG_MT7925U=m CONFIG_MT7996E=m # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AR7_PARTS is not set @@ -4247,10 +4286,11 @@ CONFIG_MTD_RAW_NAND=m # CONFIG_MTD_SHARPSL_PARTS is not set # CONFIG_MTD_SLRAM is not set # CONFIG_MTD_SPI_NAND is not set -# CONFIG_MTD_SPI_NOR is not set +CONFIG_MTD_SPI_NOR=m # CONFIG_MTD_SPI_NOR_SWP_DISABLE is not set CONFIG_MTD_SPI_NOR_SWP_DISABLE_ON_VOLATILE=y # CONFIG_MTD_SPI_NOR_SWP_KEEP is not set +CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y # CONFIG_MTD_SST25L is not set # CONFIG_MTD_SWAP is not set # CONFIG_MTD_TESTS is not set @@ -4332,9 +4372,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -4407,12 +4444,12 @@ CONFIG_NETFILTER_EGRESS=y CONFIG_NETFILTER_INGRESS=y CONFIG_NETFILTER_NETLINK_ACCT=m # CONFIG_NETFILTER_NETLINK_GLUE_CT is not set -# CONFIG_NETFILTER_NETLINK_HOOK is not set +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_OSF=m CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_XTABLES_COMPAT=y +# CONFIG_NETFILTER_XTABLES_COMPAT is not set CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_CONNMARK=m CONFIG_NETFILTER_XT_MARK=m @@ -4505,6 +4542,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +CONFIG_NETKIT=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -4517,15 +4555,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NETROM=m # CONFIG_NET_SB1000 is not set -CONFIG_NET_SCH_ATM=m CONFIG_NET_SCH_CAKE=m -CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m # CONFIG_NET_SCH_DEFAULT is not set CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -4559,6 +4594,7 @@ CONFIG_NET_TEAM_MODE_BROADCAST=m CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_NET_TEAM_MODE_RANDOM=m CONFIG_NET_TEAM_MODE_ROUNDROBIN=m +CONFIG_NET_TEST=m CONFIG_NET_TULIP=y CONFIG_NET_UDP_TUNNEL=m CONFIG_NET_VENDOR_3COM=y @@ -4695,7 +4731,7 @@ CONFIG_NFC_ST21NFCA=m # CONFIG_NFC_ST_NCI_I2C is not set # CONFIG_NFC_ST_NCI_SPI is not set CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NFC_TRF7970A=m @@ -4901,8 +4937,9 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_SHIELD_FF=y CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y CONFIG_NVME_HWMON=y CONFIG_NVMEM_LAYOUT_ONIE_TLV=m CONFIG_NVMEM_LAYOUT_SL28_VPD=m @@ -4922,7 +4959,9 @@ CONFIG_NVME_TARGET=m CONFIG_NVME_TARGET_PASSTHRU=y CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set CONFIG_NVRAM=y CONFIG_NVSW_SN2201=m @@ -5058,6 +5097,7 @@ CONFIG_PCI_BIOS=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -5121,6 +5161,7 @@ CONFIG_PCMCIA_XIRC2PS=m CONFIG_PCMCIA_XIRCOM=m CONFIG_PCMCIA=y CONFIG_PCNET32=m +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -5152,8 +5193,6 @@ CONFIG_PHY_CADENCE_SIERRA=m CONFIG_PHY_CADENCE_TORRENT=m # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_HI3670_PCIE is not set # CONFIG_PHY_HI3670_USB is not set # CONFIG_PHY_INTEL_LGM_COMBO is not set @@ -5162,8 +5201,6 @@ CONFIG_PHY_CADENCE_TORRENT=m CONFIG_PHYLIB=y CONFIG_PHYLINK=m # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -5267,7 +5304,6 @@ CONFIG_POSIX_TIMERS=y CONFIG_POWERCAP=y CONFIG_POWER_MLXBF=m # CONFIG_POWER_RESET_BRCMKONA is not set -# CONFIG_POWER_RESET_BRCMSTB is not set # CONFIG_POWER_RESET_LINKSTATION is not set # CONFIG_POWER_RESET_LTC2952 is not set # CONFIG_POWER_RESET_REGULATOR is not set @@ -5522,7 +5558,7 @@ CONFIG_RD_ZSTD=y # CONFIG_READABLE_ASM is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_REALTEK_AUTOPM=y -CONFIG_REALTEK_PHY=y +CONFIG_REALTEK_PHY=m # CONFIG_REED_SOLOMON_TEST is not set # CONFIG_REGMAP_BUILD is not set CONFIG_REGMAP_I2C=y @@ -5557,6 +5593,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m CONFIG_REGULATOR_MAX20411=m CONFIG_REGULATOR_MAX5970=m CONFIG_REGULATOR_MAX597X=m +CONFIG_REGULATOR_MAX77503=m CONFIG_REGULATOR_MAX77650=m # CONFIG_REGULATOR_MAX77826 is not set CONFIG_REGULATOR_MAX77857=m @@ -5664,6 +5701,7 @@ CONFIG_RMI4_SPI=m CONFIG_RMNET=m # CONFIG_ROCKCHIP_PHY is not set CONFIG_ROCKER=m +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set CONFIG_ROHM_BU27034=m CONFIG_ROMFS_BACKED_BY_BLOCK=y @@ -5716,7 +5754,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_ABEOZ9=m CONFIG_RTC_DRV_ABX80X=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_CROS_EC=m @@ -5900,7 +5937,7 @@ CONFIG_SBP_TARGET=m # CONFIG_SC1200_WDT is not set CONFIG_SC92031=m # CONFIG_SCA3000 is not set -CONFIG_SCA3300=m +# CONFIG_SCA3300 is not set CONFIG_SCD30_CORE=m CONFIG_SCD30_I2C=m CONFIG_SCD30_SERIAL=m @@ -6032,11 +6069,12 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y # CONFIG_SECURITY_APPARMOR is not set -# CONFIG_SECURITY_DMESG_RESTRICT is not set +CONFIG_SECURITY_DMESG_RESTRICT=y CONFIG_SECURITYFS=y CONFIG_SECURITY_INFINIBAND=y CONFIG_SECURITY_LANDLOCK=y @@ -6191,6 +6229,7 @@ CONFIG_SENSORS_LTC2947_SPI=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set CONFIG_SENSORS_LTC2990=m +CONFIG_SENSORS_LTC2991=m # CONFIG_SENSORS_LTC2992 is not set CONFIG_SENSORS_LTC3815=m CONFIG_SENSORS_LTC4151=m @@ -6252,6 +6291,7 @@ CONFIG_SENSORS_PLI1209BC=m CONFIG_SENSORS_PLI1209BC_REGULATOR=y CONFIG_SENSORS_PM6764TR=m CONFIG_SENSORS_PMBUS=m +CONFIG_SENSORS_POWERZ=m CONFIG_SENSORS_POWR1220=m CONFIG_SENSORS_PWM_FAN=m # CONFIG_SENSORS_PXE1610 is not set @@ -6434,7 +6474,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLS=y CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set @@ -6471,6 +6510,9 @@ CONFIG_SND_ALOOP=m CONFIG_SND_ALS300=m CONFIG_SND_ALS4000=m CONFIG_SND_AMD_ACP_CONFIG=m +# CONFIG_SND_AMD_ASOC_ACP63 is not set +# CONFIG_SND_AMD_ASOC_REMBRANDT is not set +# CONFIG_SND_AMD_ASOC_RENOIR is not set CONFIG_SND_ASIHPI=m CONFIG_SND_ATIIXP=m CONFIG_SND_ATIIXP_MODEM=m @@ -6530,6 +6572,7 @@ CONFIG_SND_FM801=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -6660,14 +6703,18 @@ CONFIG_SND_SOC_AMD_ACP5x=m CONFIG_SND_SOC_AMD_ACP6x=m CONFIG_SND_SOC_AMD_ACP_COMMON=m CONFIG_SND_SOC_AMD_ACP=m +# CONFIG_SND_SOC_AMD_ACP_PCI is not set CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m +# CONFIG_SND_SOC_AMD_LEGACY_MACH is not set +CONFIG_SND_SOC_AMD_MACH_COMMON=m CONFIG_SND_SOC_AMD_PS=m CONFIG_SND_SOC_AMD_PS_MACH=m CONFIG_SND_SOC_AMD_RENOIR=m CONFIG_SND_SOC_AMD_RENOIR_MACH=m CONFIG_SND_SOC_AMD_RPL_ACP6x=m CONFIG_SND_SOC_AMD_RV_RT5682_MACH=m +CONFIG_SND_SOC_AMD_SOF_MACH=m CONFIG_SND_SOC_AMD_ST_ES8336_MACH=m CONFIG_SND_SOC_AMD_VANGOGH_MACH=m CONFIG_SND_SOC_AMD_YC_MACH=m @@ -6675,8 +6722,10 @@ CONFIG_SND_SOC_AMD_YC_MACH=m # CONFIG_SND_SOC_ARNDALE is not set CONFIG_SND_SOC_AUDIO_IIO_AUX=m CONFIG_SND_SOC_AW8738=m +CONFIG_SND_SOC_AW87390=m CONFIG_SND_SOC_AW88261=m CONFIG_SND_SOC_AW88395=m +CONFIG_SND_SOC_AW88399=m CONFIG_SND_SOC_BD28623=m CONFIG_SND_SOC_BT_SCO=m CONFIG_SND_SOC_CHV3_CODEC=m @@ -6771,6 +6820,7 @@ CONFIG_SND_SOC_INTEL_AVS_MACH_PROBE=m CONFIG_SND_SOC_INTEL_AVS_MACH_RT274=m CONFIG_SND_SOC_INTEL_AVS_MACH_RT286=m CONFIG_SND_SOC_INTEL_AVS_MACH_RT298=m +CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514=m CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663=m CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682=m CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567=m @@ -6811,6 +6861,7 @@ CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y CONFIG_SND_SOC_INTEL_SKYLAKE=m CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m +CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH=m CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH=m CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH=m @@ -6886,12 +6937,6 @@ CONFIG_SND_SOC_PCM512x=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set CONFIG_SND_SOC_RL6231=m -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set CONFIG_SND_SOC_RT1017_SDCA_SDW=m CONFIG_SND_SOC_RT1308=m CONFIG_SND_SOC_RT1308_SDW=m @@ -6914,6 +6959,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m CONFIG_SND_SOC_RT715_SDW=m CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +CONFIG_SND_SOC_RTQ9128=m # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6931,6 +6977,7 @@ CONFIG_SND_SOC_SMA1303=m # CONFIG_SND_SOC_SNOW is not set CONFIG_SND_SOC_SOF_ACPI=m CONFIG_SND_SOC_SOF_ALDERLAKE=m +CONFIG_SND_SOC_SOF_AMD_ACP63=m CONFIG_SND_SOC_SOF_AMD_REMBRANDT=m CONFIG_SND_SOC_SOF_AMD_RENOIR=m CONFIG_SND_SOC_SOF_AMD_TOPLEVEL=m @@ -7170,11 +7217,15 @@ CONFIG_SPI_FSL_LPSPI=m # CONFIG_SPI_GPIO is not set # CONFIG_SPI_HISI_KUNPENG is not set # CONFIG_SPI_HISI_SFC_V3XX is not set +CONFIG_SPI_INTEL=m +CONFIG_SPI_INTEL_PCI=m +# CONFIG_SPI_INTEL_PLATFORM is not set # CONFIG_SPI_LANTIQ_SSC is not set +CONFIG_SPI_LJCA=m # CONFIG_SPI_LM70_LLP is not set # CONFIG_SPI_LOOPBACK_TEST is not set CONFIG_SPI_MASTER=y -# CONFIG_SPI_MEM is not set +CONFIG_SPI_MEM=y CONFIG_SPI_MICROCHIP_CORE=m CONFIG_SPI_MICROCHIP_CORE_QSPI=m CONFIG_SPI_MUX=m @@ -7372,6 +7423,7 @@ CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX_DEBUG is not set CONFIG_TCM_QLA2XXX=m CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -7426,6 +7478,7 @@ CONFIG_TEST_KSTRTOX=y # CONFIG_TEST_MEMINIT is not set # CONFIG_TEST_MIN_HEAP is not set # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -7496,8 +7549,6 @@ CONFIG_TIFM_7XX1=m CONFIG_TIFM_CORE=m CONFIG_TIGON3_HWMON=y CONFIG_TIGON3=m -# CONFIG_TI_ICSSG_PRUETH is not set -CONFIG_TI_ICSS_IEP=m CONFIG_TI_LMP92064=m CONFIG_TIME_KUNIT_TEST=m CONFIG_TIME_NS=y @@ -7690,6 +7741,7 @@ CONFIG_TYPEC_MUX_GPIO_SBU=m CONFIG_TYPEC_MUX_INTEL_PMC=m CONFIG_TYPEC_MUX_NB7VPQ904M=m CONFIG_TYPEC_MUX_PI3USB30532=m +CONFIG_TYPEC_MUX_PTN36502=m CONFIG_TYPEC_NVIDIA_ALTMODE=m # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -7783,6 +7835,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m CONFIG_USB_CHIPIDEA_PCI=m CONFIG_USB_CONFIGFS_F_MIDI2=y # CONFIG_USB_CONFIGFS_F_UAC1_LEGACY is not set @@ -7915,6 +7968,7 @@ CONFIG_USB_LED_TRIG=y CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LGM_PHY is not set # CONFIG_USB_LINK_LAYER_TEST is not set +CONFIG_USB_LJCA=m CONFIG_USB_M5602=m CONFIG_USB_MA901=m # CONFIG_USB_MASS_STORAGE is not set @@ -7964,6 +8018,7 @@ CONFIG_USB_ONBOARD_HUB=m # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y CONFIG_USBPCWATCHDOG=m CONFIG_USB_PEGASUS=m @@ -8176,7 +8231,7 @@ CONFIG_VIDEO_BT819=m CONFIG_VIDEO_BT848=m CONFIG_VIDEO_BT856=m CONFIG_VIDEO_BT866=m -CONFIG_VIDEO_CADENCE_CSI2RX=m +# CONFIG_VIDEO_CADENCE_CSI2RX is not set CONFIG_VIDEO_CADENCE_CSI2TX=m # CONFIG_VIDEO_CADENCE is not set # CONFIG_VIDEO_CAFE_CCIC is not set @@ -8255,10 +8310,12 @@ CONFIG_VIDEO_M52790=m CONFIG_VIDEO_MAX9286=m # CONFIG_VIDEO_MAX96712 is not set # CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set +# CONFIG_VIDEO_MGB4 is not set CONFIG_VIDEO_ML86V7667=m CONFIG_VIDEO_MSP3400=m CONFIG_VIDEO_MT9M001=m # CONFIG_VIDEO_MT9M111 is not set +CONFIG_VIDEO_MT9M114=m CONFIG_VIDEO_MT9P031=m CONFIG_VIDEO_MT9T112=m CONFIG_VIDEO_MT9V011=m @@ -8636,6 +8693,7 @@ CONFIG_XEN_NETDEV_BACKEND=m CONFIG_XEN_NETDEV_FRONTEND=m CONFIG_XEN_PCIDEV_BACKEND=m CONFIG_XEN_PCIDEV_FRONTEND=m +CONFIG_XEN_PRIVCMD_EVENTFD=y CONFIG_XEN_PRIVCMD_IRQFD=y CONFIG_XEN_PRIVCMD=m # CONFIG_XEN_PVCALLS_BACKEND is not set @@ -8762,22 +8820,13 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m CONFIG_LEGION_LAPTOP=m CONFIG_ACPI_CALL=m CONFIG_MFD_STEAMDECK=m @@ -8813,16 +8862,17 @@ CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2 # CONFIG_USB_DUMMY_HCD is not set # CONFIG_USB_CONFIGFS is not set # CONFIG_PHY_SAMSUNG_USB2 is not set -CONFIG_SND_SOC_AMD_SOF_MACH=m -CONFIG_SND_SOC_AMD_MACH_COMMON=m +CONFIG_DRM_AMD_COLOR_STEAMDECK=y CONFIG_SND_SOC_SOF=m CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE=y CONFIG_SND_SOC_SOF_IPC3=y CONFIG_SND_SOC_SOF_INTEL_IPC4=y CONFIG_SND_SOC_SOF_AMD_COMMON=m -CONFIG_SND_SOC_SOF_AMD_ACP63=m -# CONFIG_SND_SOC_AMD_ACP_PCI is not set -# CONFIG_SND_AMD_ASOC_RENOIR is not set -# CONFIG_SND_AMD_ASOC_REMBRANDT is not set -# CONFIG_SND_SOC_AMD_LEGACY_MACH is not set CONFIG_SND_SOC_TOPOLOGY=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-x86_64-rhel.config b/SOURCES/kernel-x86_64-rhel.config index 981f729..304cfeb 100644 --- a/SOURCES/kernel-x86_64-rhel.config +++ b/SOURCES/kernel-x86_64-rhel.config @@ -258,7 +258,6 @@ CONFIG_AQUANTIA_PHY=m # CONFIG_ARCH_BITMAIN is not set # CONFIG_ARCH_KEEMBAY is not set # CONFIG_ARCH_LG1K is not set -# CONFIG_ARCH_MA35 is not set # CONFIG_ARCH_MEMORY_PROBE is not set # CONFIG_ARCH_MESON is not set CONFIG_ARCH_MMAP_RND_BITS=28 @@ -304,6 +303,7 @@ CONFIG_ASUS_NB_WMI=m # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set CONFIG_ASUS_WMI=m +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -417,6 +417,7 @@ CONFIG_BASE_FULL=y # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -545,7 +546,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set # CONFIG_BRCM_TRACING is not set # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -640,7 +640,6 @@ CONFIG_CALL_DEPTH_TRACKING=y # CONFIG_CALL_THUNKS_DEBUG is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -710,6 +709,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y # CONFIG_CFG80211_DEBUGFS is not set CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -787,6 +787,7 @@ CONFIG_CIFS_SMB_DIRECT=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -862,7 +863,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -936,7 +936,6 @@ CONFIG_CRYPTO_ADIANTUM=m # CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set # CONFIG_CRYPTO_AEGIS128 is not set # CONFIG_CRYPTO_AES_ARM64 is not set -CONFIG_CRYPTO_AES_GCM_P10=y CONFIG_CRYPTO_AES_NI_INTEL=y # CONFIG_CRYPTO_AES_TI is not set CONFIG_CRYPTO_AES=y @@ -963,7 +962,6 @@ CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_CCM=y CONFIG_CRYPTO_CFB=y CONFIG_CRYPTO_CHACHA20=m -# CONFIG_CRYPTO_CHACHA20_P10 is not set CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CHACHA20_X86_64=y CONFIG_CRYPTO_CMAC=y @@ -1036,6 +1034,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1060,7 +1063,6 @@ CONFIG_CRYPTO_OFB=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_POLY1305=m -# CONFIG_CRYPTO_POLY1305_P10 is not set CONFIG_CRYPTO_POLY1305_X86_64=y # CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set CONFIG_CRYPTO_RMD160=m @@ -1348,6 +1350,7 @@ CONFIG_DP83TC811_PHY=m CONFIG_DPTF_PCH_FIVR=m CONFIG_DPTF_POWER=m # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1423,6 +1426,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set # CONFIG_DRM_IMX_LCDIF is not set # CONFIG_DRM_ITE_IT6505 is not set # CONFIG_DRM_ITE_IT66121 is not set @@ -1449,36 +1453,90 @@ CONFIG_DRM_NOUVEAU=m # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1673,7 +1731,6 @@ CONFIG_EEPROM_93CX6=m # CONFIG_EEPROM_AT25 is not set # CONFIG_EEPROM_EE1004 is not set # CONFIG_EEPROM_IDT_89HPESX is not set -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1713,7 +1770,12 @@ CONFIG_ENIC=m CONFIG_EPIC100=m CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y CONFIG_ETHOC=m CONFIG_ETHTOOL_NETLINK=y @@ -1776,7 +1838,7 @@ CONFIG_FAT_KUNIT_TEST=m # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -1875,7 +1937,9 @@ CONFIG_FS_DAX=y # CONFIG_FSL_QDMA is not set # CONFIG_FSL_RCPM is not set CONFIG_FSNOTIFY=y -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_RECORD_RECURSION is not set @@ -1883,6 +1947,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +# CONFIG_FUEL_GAUGE_MM8013 is not set CONFIG_FUJITSU_ES=m CONFIG_FUJITSU_LAPTOP=m CONFIG_FUJITSU_TABLET=m @@ -2025,6 +2090,7 @@ CONFIG_GPIO_SIM=m # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set # CONFIG_GUP_TEST is not set CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2210,6 +2276,7 @@ CONFIG_HMM_MIRROR=y # CONFIG_HNS3_PMU is not set # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2365,6 +2432,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set CONFIG_IA32_EMULATION=y # CONFIG_IAQCORE is not set CONFIG_IAVF=m @@ -2381,6 +2449,7 @@ CONFIG_ICPLUS_PHY=m CONFIG_IDEAPAD_LAPTOP=m CONFIG_IDLE_INJECT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m # CONFIG_IE6XX_WDT is not set CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set @@ -2452,7 +2521,6 @@ CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2570,6 +2638,7 @@ CONFIG_INPUT_UINPUT=m CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m CONFIG_INPUT=y CONFIG_INPUT_YEALINK=m +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set CONFIG_INT340X_THERMAL=m CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y @@ -2638,6 +2707,7 @@ CONFIG_INTEL_SDSI=m CONFIG_INTEL_SPEED_SELECT_INTERFACE=m CONFIG_INTEL_TCC_COOLING=m CONFIG_INTEL_TDX_GUEST=y +# CONFIG_INTEL_TDX_HOST is not set CONFIG_INTEL_TH_ACPI=m # CONFIG_INTEL_TH_DEBUG is not set CONFIG_INTEL_TH_GTH=m @@ -2672,7 +2742,8 @@ CONFIG_IO_DELAY_0X80=y CONFIG_IOMMU_DEFAULT_DMA_LAZY=y # CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -2968,7 +3039,7 @@ CONFIG_KEY_NOTIFICATIONS=y # CONFIG_KEYS_REQUEST_CACHE is not set CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3003,6 +3074,7 @@ CONFIG_KVM_AMD_SEV=y CONFIG_KVM_GUEST=y CONFIG_KVM_INTEL=m CONFIG_KVM=m +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_MMU_AUDIT=y # CONFIG_KVM_PROVE_MMU is not set CONFIG_KVM_SMM=y @@ -3170,6 +3242,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3184,6 +3257,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3192,6 +3266,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m # CONFIG_MAC80211_MESH is not set @@ -3221,7 +3296,6 @@ CONFIG_MANTIS_CORE=m CONFIG_MARVELL_10G_PHY=m # CONFIG_MARVELL_88Q2XXX_PHY is not set # CONFIG_MARVELL_88X2222_PHY is not set -CONFIG_MARVELL_GTI_WDT=y CONFIG_MARVELL_PHY=m # CONFIG_MATOM is not set # CONFIG_MAX1027 is not set @@ -3257,6 +3331,7 @@ CONFIG_MAXSMP=y # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3280,7 +3355,7 @@ CONFIG_MDIO_HISI_FEMAC=m # CONFIG_MDIO_IPQ8064 is not set CONFIG_MDIO_MSCC_MIIM=m # CONFIG_MDIO_MVUSB is not set -CONFIG_MDIO_OCTEON=m +# CONFIG_MDIO_OCTEON is not set CONFIG_MDIO_THUNDER=m CONFIG_MD_LINEAR=m # CONFIG_MD_MULTIPATH is not set @@ -3289,6 +3364,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3355,7 +3431,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3500,6 +3576,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3609,6 +3686,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3686,6 +3766,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3796,9 +3878,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -3920,6 +3999,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -3932,15 +4012,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -3969,6 +4046,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -4068,7 +4146,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4265,8 +4343,9 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_IMX_OCOTP_ELE is not set # CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set @@ -4288,7 +4367,9 @@ CONFIG_NVME_TARGET=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set CONFIG_NVRAM=y CONFIG_NVSW_SN2201=m @@ -4403,6 +4484,7 @@ CONFIG_PARTITION_ADVANCED=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -4451,6 +4533,7 @@ CONFIG_PCI_STUB=y # CONFIG_PCI_SW_SWITCHTEC is not set CONFIG_PCI=y # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4480,8 +4563,6 @@ CONFIG_PHY_BCM_SR_USB=m # CONFIG_PHY_CADENCE_TORRENT is not set # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_FSL_LYNX_28G is not set # CONFIG_PHY_HI3660_USB is not set # CONFIG_PHY_HI3670_PCIE is not set @@ -4493,8 +4574,6 @@ CONFIG_PHY_BCM_SR_USB=m CONFIG_PHYLIB=y CONFIG_PHYLINK=m # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -4526,6 +4605,7 @@ CONFIG_PINCTRL_BROXTON=m CONFIG_PINCTRL_CANNONLAKE=m CONFIG_PINCTRL_CEDARFORK=m # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_DENVERTON=m CONFIG_PINCTRL_ELKHARTLAKE=m @@ -4602,7 +4682,6 @@ CONFIG_POWERCAP=y CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set # CONFIG_POWER_RESET_GPIO_RESTART is not set # CONFIG_POWER_RESET_LTC2952 is not set # CONFIG_POWER_RESET_REGULATOR is not set @@ -4766,7 +4845,6 @@ CONFIG_QLA3XXX=m # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set CONFIG_QRTR=m CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -4868,6 +4946,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m # CONFIG_REGULATOR_LTC3589 is not set # CONFIG_REGULATOR_LTC3676 is not set # CONFIG_REGULATOR_MAX1586 is not set +# CONFIG_REGULATOR_MAX77503 is not set # CONFIG_REGULATOR_MAX77857 is not set # CONFIG_REGULATOR_MAX8649 is not set # CONFIG_REGULATOR_MAX8660 is not set @@ -4943,6 +5022,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -4982,7 +5062,6 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_DRV_ABEOZ9 is not set # CONFIG_RTC_DRV_ABX80X is not set CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5294,6 +5373,7 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5450,6 +5530,7 @@ CONFIG_SENSORS_LM95245=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set # CONFIG_SENSORS_LTC3815 is not set CONFIG_SENSORS_LTC4151=m @@ -5508,6 +5589,7 @@ CONFIG_SENSORS_PCF8591=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set CONFIG_SENSORS_PMBUS=m +# CONFIG_SENSORS_POWERZ is not set # CONFIG_SENSORS_POWR1220 is not set # CONFIG_SENSORS_PWM_FAN is not set # CONFIG_SENSORS_PXE1610 is not set @@ -5677,7 +5759,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set CONFIG_SLS=y CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_DEBUG_ON is not set @@ -5759,6 +5840,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -5900,8 +5982,10 @@ CONFIG_SND_SOC_AMD_YC_MACH=m # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -5993,6 +6077,7 @@ CONFIG_SND_SOC_INTEL_AVS=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -6033,6 +6118,7 @@ CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y CONFIG_SND_SOC_INTEL_SKYLAKE=m CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m +CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH=m CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH=m CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH=m @@ -6108,12 +6194,6 @@ CONFIG_SND_SOC_PCM512x=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set CONFIG_SND_SOC_RL6231=m -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set CONFIG_SND_SOC_RT1308=m CONFIG_SND_SOC_RT1308_SDW=m @@ -6136,6 +6216,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m CONFIG_SND_SOC_RT715_SDW=m CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6332,7 +6413,6 @@ CONFIG_SND_X86=y CONFIG_SND_XEN_FRONTEND=m # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set # CONFIG_SOC_TI is not set CONFIG_SOFTLOCKUP_DETECTOR=y @@ -6550,6 +6630,7 @@ CONFIG_TCM_IBLOCK=m CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6603,6 +6684,7 @@ CONFIG_TEST_LIVEPATCH=m # CONFIG_TEST_MEMINIT is not set # CONFIG_TEST_MIN_HEAP is not set # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -6832,6 +6914,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -6907,6 +6990,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m # CONFIG_USB_CONN_GPIO is not set CONFIG_USB_CXACRU=m # CONFIG_USB_CYPRESS_CY7C63 is not set @@ -7005,6 +7089,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LGM_PHY is not set # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -7047,6 +7132,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -7196,6 +7282,9 @@ CONFIG_VETH=m # CONFIG_VF610_DAC is not set CONFIG_VFAT_FS=m # CONFIG_VFIO_AMBA is not set +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO=m CONFIG_VFIO_MDEV=m @@ -7311,11 +7400,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -7730,22 +7821,13 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m CONFIG_LEGION_LAPTOP=m CONFIG_ACPI_CALL=m CONFIG_IIO_HRTIMER_TRIGGER=m @@ -7787,6 +7869,7 @@ CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2 # CONFIG_USB_DUMMY_HCD is not set # CONFIG_USB_CONFIGFS is not set # CONFIG_PHY_SAMSUNG_USB2 is not set +CONFIG_DRM_AMD_COLOR_STEAMDECK=y CONFIG_SND_SOC_AMD_SOF_MACH=m CONFIG_SND_SOC_AMD_MACH_COMMON=m CONFIG_SND_SOC_SOF=m @@ -7800,3 +7883,10 @@ CONFIG_SND_SOC_SOF_AMD_ACP63=m # CONFIG_SND_AMD_ASOC_REMBRANDT is not set # CONFIG_SND_SOC_AMD_LEGACY_MACH is not set CONFIG_SND_SOC_TOPOLOGY=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-x86_64-rt-debug-rhel.config b/SOURCES/kernel-x86_64-rt-debug-rhel.config index 3c168ab..985beda 100644 --- a/SOURCES/kernel-x86_64-rt-debug-rhel.config +++ b/SOURCES/kernel-x86_64-rt-debug-rhel.config @@ -258,7 +258,6 @@ CONFIG_AQUANTIA_PHY=m # CONFIG_ARCH_BITMAIN is not set # CONFIG_ARCH_KEEMBAY is not set # CONFIG_ARCH_LG1K is not set -# CONFIG_ARCH_MA35 is not set # CONFIG_ARCH_MEMORY_PROBE is not set # CONFIG_ARCH_MESON is not set CONFIG_ARCH_MMAP_RND_BITS=28 @@ -304,6 +303,7 @@ CONFIG_ASUS_NB_WMI=m # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set CONFIG_ASUS_WMI=m +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -417,6 +417,7 @@ CONFIG_BASE_FULL=y # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -545,7 +546,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set CONFIG_BRCM_TRACING=y # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -640,7 +640,6 @@ CONFIG_CALL_DEPTH_TRACKING=y # CONFIG_CALL_THUNKS_DEBUG is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -710,6 +709,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y CONFIG_CFG80211_DEBUGFS=y CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -787,6 +787,7 @@ CONFIG_CIFS_SMB_DIRECT=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -862,7 +863,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -937,7 +937,6 @@ CONFIG_CRYPTO_ADIANTUM=m # CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set # CONFIG_CRYPTO_AEGIS128 is not set # CONFIG_CRYPTO_AES_ARM64 is not set -CONFIG_CRYPTO_AES_GCM_P10=y CONFIG_CRYPTO_AES_NI_INTEL=y # CONFIG_CRYPTO_AES_TI is not set CONFIG_CRYPTO_AES=y @@ -964,7 +963,6 @@ CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_CCM=y CONFIG_CRYPTO_CFB=y CONFIG_CRYPTO_CHACHA20=m -# CONFIG_CRYPTO_CHACHA20_P10 is not set CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CHACHA20_X86_64=y CONFIG_CRYPTO_CMAC=y @@ -1037,6 +1035,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1061,7 +1064,6 @@ CONFIG_CRYPTO_OFB=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_POLY1305=m -# CONFIG_CRYPTO_POLY1305_P10 is not set CONFIG_CRYPTO_POLY1305_X86_64=y # CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set CONFIG_CRYPTO_RMD160=m @@ -1358,6 +1360,7 @@ CONFIG_DP83TC811_PHY=m CONFIG_DPTF_PCH_FIVR=m CONFIG_DPTF_POWER=m # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1433,6 +1436,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set # CONFIG_DRM_IMX_LCDIF is not set # CONFIG_DRM_ITE_IT6505 is not set # CONFIG_DRM_ITE_IT66121 is not set @@ -1459,36 +1463,90 @@ CONFIG_DRM_NOUVEAU=m # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1719,7 +1777,6 @@ CONFIG_EEPROM_93CX6=m # CONFIG_EEPROM_AT25 is not set # CONFIG_EEPROM_EE1004 is not set # CONFIG_EEPROM_IDT_89HPESX is not set -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1759,7 +1816,12 @@ CONFIG_ENIC=m CONFIG_EPIC100=m CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y CONFIG_ETHOC=m CONFIG_ETHTOOL_NETLINK=y @@ -1830,7 +1892,7 @@ CONFIG_FAULT_INJECTION=y # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -1929,7 +1991,9 @@ CONFIG_FS_DAX=y # CONFIG_FSL_QDMA is not set # CONFIG_FSL_RCPM is not set CONFIG_FSNOTIFY=y -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_RECORD_RECURSION is not set @@ -1937,6 +2001,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +# CONFIG_FUEL_GAUGE_MM8013 is not set CONFIG_FUJITSU_ES=m CONFIG_FUJITSU_LAPTOP=m CONFIG_FUJITSU_TABLET=m @@ -2080,6 +2145,7 @@ CONFIG_GPIO_SIM=m # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set CONFIG_GUP_TEST=y CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2266,6 +2332,7 @@ CONFIG_HMM_MIRROR=y # CONFIG_HNS3_PMU is not set # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2421,6 +2488,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set CONFIG_IA32_EMULATION=y # CONFIG_IAQCORE is not set CONFIG_IAVF=m @@ -2437,6 +2505,7 @@ CONFIG_ICPLUS_PHY=m CONFIG_IDEAPAD_LAPTOP=m CONFIG_IDLE_INJECT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m # CONFIG_IE6XX_WDT is not set CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set @@ -2508,7 +2577,6 @@ CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2626,6 +2694,7 @@ CONFIG_INPUT_UINPUT=m CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m CONFIG_INPUT=y CONFIG_INPUT_YEALINK=m +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set CONFIG_INT340X_THERMAL=m CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y @@ -2694,6 +2763,7 @@ CONFIG_INTEL_SDSI=m CONFIG_INTEL_SPEED_SELECT_INTERFACE=m CONFIG_INTEL_TCC_COOLING=m CONFIG_INTEL_TDX_GUEST=y +# CONFIG_INTEL_TDX_HOST is not set CONFIG_INTEL_TH_ACPI=m # CONFIG_INTEL_TH_DEBUG is not set CONFIG_INTEL_TH_GTH=m @@ -2728,7 +2798,8 @@ CONFIG_IOMMU_DEBUGFS=y CONFIG_IOMMU_DEFAULT_DMA_LAZY=y # CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -3029,7 +3100,7 @@ CONFIG_KEY_NOTIFICATIONS=y CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set # CONFIG_KFENCE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3063,6 +3134,7 @@ CONFIG_KVM_AMD_SEV=y CONFIG_KVM_GUEST=y CONFIG_KVM_INTEL=m CONFIG_KVM=m +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_MMU_AUDIT=y CONFIG_KVM_PROVE_MMU=y CONFIG_KVM_SMM=y @@ -3230,6 +3302,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3244,6 +3317,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3252,6 +3326,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m # CONFIG_MAC80211_MESH is not set @@ -3281,7 +3356,6 @@ CONFIG_MANTIS_CORE=m CONFIG_MARVELL_10G_PHY=m # CONFIG_MARVELL_88Q2XXX_PHY is not set # CONFIG_MARVELL_88X2222_PHY is not set -CONFIG_MARVELL_GTI_WDT=y CONFIG_MARVELL_PHY=m # CONFIG_MATOM is not set # CONFIG_MAX1027 is not set @@ -3317,6 +3391,7 @@ CONFIG_MAXSMP=y # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3340,7 +3415,7 @@ CONFIG_MDIO_HISI_FEMAC=m # CONFIG_MDIO_IPQ8064 is not set CONFIG_MDIO_MSCC_MIIM=m # CONFIG_MDIO_MVUSB is not set -CONFIG_MDIO_OCTEON=m +# CONFIG_MDIO_OCTEON is not set CONFIG_MDIO_THUNDER=m CONFIG_MD_LINEAR=m # CONFIG_MD_MULTIPATH is not set @@ -3349,6 +3424,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3420,7 +3496,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3565,6 +3641,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3674,6 +3751,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3751,6 +3831,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3861,9 +3943,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -3985,6 +4064,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -3997,15 +4077,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -4034,6 +4111,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -4133,7 +4211,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4330,8 +4408,9 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_IMX_OCOTP_ELE is not set # CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set @@ -4353,7 +4432,9 @@ CONFIG_NVME_TARGET=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set CONFIG_NVRAM=y CONFIG_NVSW_SN2201=m @@ -4469,6 +4550,7 @@ CONFIG_PARTITION_ADVANCED=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -4517,6 +4599,7 @@ CONFIG_PCI_STUB=y # CONFIG_PCI_SW_SWITCHTEC is not set CONFIG_PCI=y # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4546,8 +4629,6 @@ CONFIG_PHY_BCM_SR_USB=m # CONFIG_PHY_CADENCE_TORRENT is not set # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_FSL_LYNX_28G is not set # CONFIG_PHY_HI3660_USB is not set # CONFIG_PHY_HI3670_PCIE is not set @@ -4559,8 +4640,6 @@ CONFIG_PHY_BCM_SR_USB=m CONFIG_PHYLIB=y CONFIG_PHYLINK=m # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -4592,6 +4671,7 @@ CONFIG_PINCTRL_BROXTON=m CONFIG_PINCTRL_CANNONLAKE=m CONFIG_PINCTRL_CEDARFORK=m # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_DENVERTON=m CONFIG_PINCTRL_ELKHARTLAKE=m @@ -4669,7 +4749,6 @@ CONFIG_POWERCAP=y CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set # CONFIG_POWER_RESET_GPIO_RESTART is not set # CONFIG_POWER_RESET_LTC2952 is not set # CONFIG_POWER_RESET_REGULATOR is not set @@ -4837,7 +4916,6 @@ CONFIG_QLA3XXX=m # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set CONFIG_QRTR=m CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -4942,6 +5020,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m # CONFIG_REGULATOR_LTC3589 is not set # CONFIG_REGULATOR_LTC3676 is not set # CONFIG_REGULATOR_MAX1586 is not set +# CONFIG_REGULATOR_MAX77503 is not set # CONFIG_REGULATOR_MAX77857 is not set # CONFIG_REGULATOR_MAX8649 is not set # CONFIG_REGULATOR_MAX8660 is not set @@ -5017,6 +5096,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -5056,7 +5136,6 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_DRV_ABEOZ9 is not set # CONFIG_RTC_DRV_ABX80X is not set CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5368,6 +5447,7 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5524,6 +5604,7 @@ CONFIG_SENSORS_LM95245=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set # CONFIG_SENSORS_LTC3815 is not set CONFIG_SENSORS_LTC4151=m @@ -5582,6 +5663,7 @@ CONFIG_SENSORS_PCF8591=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set CONFIG_SENSORS_PMBUS=m +# CONFIG_SENSORS_POWERZ is not set # CONFIG_SENSORS_POWR1220 is not set # CONFIG_SENSORS_PWM_FAN is not set # CONFIG_SENSORS_PXE1610 is not set @@ -5751,8 +5833,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set -# CONFIG_SLOB is not set CONFIG_SLS=y # CONFIG_SLUB_CPU_PARTIAL is not set # CONFIG_SLUB_DEBUG_ON is not set @@ -5835,6 +5915,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -5977,8 +6058,10 @@ CONFIG_SND_SOC_AMD_YC_MACH=m # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -6070,6 +6153,7 @@ CONFIG_SND_SOC_INTEL_AVS=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -6110,6 +6194,7 @@ CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y CONFIG_SND_SOC_INTEL_SKYLAKE=m CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m +CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH=m CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH=m CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH=m @@ -6185,12 +6270,6 @@ CONFIG_SND_SOC_PCM512x=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set CONFIG_SND_SOC_RL6231=m -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set CONFIG_SND_SOC_RT1308=m CONFIG_SND_SOC_RT1308_SDW=m @@ -6213,6 +6292,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m CONFIG_SND_SOC_RT715_SDW=m CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6410,7 +6490,6 @@ CONFIG_SND_X86=y CONFIG_SND_XEN_FRONTEND=m # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set # CONFIG_SOC_TI is not set CONFIG_SOFTLOCKUP_DETECTOR=y @@ -6628,6 +6707,7 @@ CONFIG_TCM_IBLOCK=m CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6681,6 +6761,7 @@ CONFIG_TEST_LIVEPATCH=m # CONFIG_TEST_MEMINIT is not set CONFIG_TEST_MIN_HEAP=m # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -6910,6 +6991,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -6986,6 +7068,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m # CONFIG_USB_CONN_GPIO is not set CONFIG_USB_CXACRU=m # CONFIG_USB_CYPRESS_CY7C63 is not set @@ -7084,6 +7167,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LGM_PHY is not set # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -7126,6 +7210,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -7275,6 +7360,9 @@ CONFIG_VETH=m # CONFIG_VF610_DAC is not set CONFIG_VFAT_FS=m # CONFIG_VFIO_AMBA is not set +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO=m CONFIG_VFIO_MDEV=m @@ -7390,11 +7478,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -7816,22 +7906,13 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m CONFIG_LEGION_LAPTOP=m CONFIG_ACPI_CALL=m CONFIG_IIO_HRTIMER_TRIGGER=m @@ -7873,6 +7954,7 @@ CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2 # CONFIG_USB_DUMMY_HCD is not set # CONFIG_USB_CONFIGFS is not set # CONFIG_PHY_SAMSUNG_USB2 is not set +CONFIG_DRM_AMD_COLOR_STEAMDECK=y CONFIG_SND_SOC_AMD_SOF_MACH=m CONFIG_SND_SOC_AMD_MACH_COMMON=m CONFIG_SND_SOC_SOF=m @@ -7886,3 +7968,10 @@ CONFIG_SND_SOC_SOF_AMD_ACP63=m # CONFIG_SND_AMD_ASOC_REMBRANDT is not set # CONFIG_SND_SOC_AMD_LEGACY_MACH is not set CONFIG_SND_SOC_TOPOLOGY=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel-x86_64-rt-rhel.config b/SOURCES/kernel-x86_64-rt-rhel.config index 998a273..bce617a 100644 --- a/SOURCES/kernel-x86_64-rt-rhel.config +++ b/SOURCES/kernel-x86_64-rt-rhel.config @@ -258,7 +258,6 @@ CONFIG_AQUANTIA_PHY=m # CONFIG_ARCH_BITMAIN is not set # CONFIG_ARCH_KEEMBAY is not set # CONFIG_ARCH_LG1K is not set -# CONFIG_ARCH_MA35 is not set # CONFIG_ARCH_MEMORY_PROBE is not set # CONFIG_ARCH_MESON is not set CONFIG_ARCH_MMAP_RND_BITS=28 @@ -304,6 +303,7 @@ CONFIG_ASUS_NB_WMI=m # CONFIG_ASUS_TF103C_DOCK is not set # CONFIG_ASUS_WIRELESS is not set CONFIG_ASUS_WMI=m +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_ASYNC_TX_DMA=y @@ -417,6 +417,7 @@ CONFIG_BASE_FULL=y # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_SBS is not set # CONFIG_BATTERY_UG3105 is not set +# CONFIG_BCACHEFS_FS is not set # CONFIG_BCACHE is not set # CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m @@ -545,7 +546,6 @@ CONFIG_BRCMFMAC_PCIE=y CONFIG_BRCMFMAC_SDIO=y CONFIG_BRCMFMAC_USB=y CONFIG_BRCMSMAC=m -# CONFIG_BRCMSTB_GISB_ARB is not set # CONFIG_BRCM_TRACING is not set # CONFIG_BRIDGE_CFM is not set CONFIG_BRIDGE_EBT_802_3=m @@ -640,7 +640,6 @@ CONFIG_CALL_DEPTH_TRACKING=y # CONFIG_CALL_THUNKS_DEBUG is not set CONFIG_CAN_8DEV_USB=m CONFIG_CAN_BCM=m -# CONFIG_CAN_BXCAN is not set CONFIG_CAN_CALC_BITTIMING=y # CONFIG_CAN_CAN327 is not set # CONFIG_CAN_CC770 is not set @@ -710,6 +709,7 @@ CONFIG_CFG80211_CRDA_SUPPORT=y # CONFIG_CFG80211_DEBUGFS is not set CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set +CONFIG_CFG80211_KUNIT_TEST=m CONFIG_CFG80211=m # CONFIG_CFG80211_WEXT is not set # CONFIG_CFI_CLANG is not set @@ -787,6 +787,7 @@ CONFIG_CIFS_SMB_DIRECT=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CLEANCACHE=y +CONFIG_CLK_FD_KUNIT_TEST=m CONFIG_CLK_GATE_KUNIT_TEST=m # CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_ICST is not set @@ -862,7 +863,6 @@ CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_CONTEXT_SWITCH_TRACER=y # CONFIG_CONTEXT_TRACKING_USER_FORCE is not set -# CONFIG_COPS is not set CONFIG_CORDIC=m CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_COREDUMP=y @@ -937,7 +937,6 @@ CONFIG_CRYPTO_ADIANTUM=m # CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set # CONFIG_CRYPTO_AEGIS128 is not set # CONFIG_CRYPTO_AES_ARM64 is not set -CONFIG_CRYPTO_AES_GCM_P10=y CONFIG_CRYPTO_AES_NI_INTEL=y # CONFIG_CRYPTO_AES_TI is not set CONFIG_CRYPTO_AES=y @@ -964,7 +963,6 @@ CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_CCM=y CONFIG_CRYPTO_CFB=y CONFIG_CRYPTO_CHACHA20=m -# CONFIG_CRYPTO_CHACHA20_P10 is not set CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_CHACHA20_X86_64=y CONFIG_CRYPTO_CMAC=y @@ -1037,6 +1035,11 @@ CONFIG_CRYPTO_GHASH=y # CONFIG_CRYPTO_HCTR2 is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_1024 is not set +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_128 is not set +CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_2=y +# CONFIG_CRYPTO_JITTERENTROPY_MEMSIZE_8192 is not set +CONFIG_CRYPTO_JITTERENTROPY_OSR=1 # CONFIG_CRYPTO_JITTERENTROPY_TESTINTERFACE is not set # CONFIG_CRYPTO_KEYWRAP is not set CONFIG_CRYPTO_LIB_BLAKE2S=m @@ -1061,7 +1064,6 @@ CONFIG_CRYPTO_OFB=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_POLY1305=m -# CONFIG_CRYPTO_POLY1305_P10 is not set CONFIG_CRYPTO_POLY1305_X86_64=y # CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set CONFIG_CRYPTO_RMD160=m @@ -1350,6 +1352,7 @@ CONFIG_DP83TC811_PHY=m CONFIG_DPTF_PCH_FIVR=m CONFIG_DPTF_POWER=m # CONFIG_DRAGONRISE_FF is not set +CONFIG_DRIVER_PE_KUNIT_TEST=m # CONFIG_DRM_ACCEL is not set CONFIG_DRM_AMD_ACP=y # CONFIG_DRM_AMD_DC_HDCP is not set @@ -1425,6 +1428,7 @@ CONFIG_DRM_I915_USERPTR=y # CONFIG_DRM_IMX8QXP_LDB is not set # CONFIG_DRM_IMX8QXP_PIXEL_COMBINER is not set # CONFIG_DRM_IMX8QXP_PIXEL_LINK_TO_DPI is not set +# CONFIG_DRM_IMX93_MIPI_DSI is not set # CONFIG_DRM_IMX_LCDIF is not set # CONFIG_DRM_ITE_IT6505 is not set # CONFIG_DRM_ITE_IT66121 is not set @@ -1451,36 +1455,90 @@ CONFIG_DRM_NOUVEAU=m # CONFIG_DRM_OFDRM is not set # CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_AUO_A030JTN01 is not set +# CONFIG_DRM_PANEL_BOE_BF060Y8M_AJ0 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +# CONFIG_DRM_PANEL_EBBG_FT8719 is not set # CONFIG_DRM_PANEL_EDP is not set +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_HIMAX_HX8394 is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9882T is not set # CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set +# CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set +# CONFIG_DRM_PANEL_JADARD_JD9365DA_H3 is not set +# CONFIG_DRM_PANEL_JDI_LPM102A188A is not set +# CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_JDI_R63452 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LVDS is not set +# CONFIG_DRM_PANEL_MAGNACHIP_D53E6EA8966 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_MIPI_DBI is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NEWVISION_NV3051D is not set # CONFIG_DRM_PANEL_NEWVISION_NV3052C is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35560 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35950 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36523 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTA5601A is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +# CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00 is not set # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM692E5 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set # CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D27A1 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D7AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +# CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +# CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set +# CONFIG_DRM_PANEL_SHARP_LS060T1SX01 is not set # CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_SONY_TD4353_JDI is not set +# CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521 is not set +# CONFIG_DRM_PANEL_STARTEK_KD070FHFID015 is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_R66451 is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_VISIONOX_VTDR6130 is not set # CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # CONFIG_DRM_PANFROST is not set # CONFIG_DRM_PARADE_PS8622 is not set # CONFIG_DRM_PARADE_PS8640 is not set @@ -1711,7 +1769,6 @@ CONFIG_EEPROM_93CX6=m # CONFIG_EEPROM_AT25 is not set # CONFIG_EEPROM_EE1004 is not set # CONFIG_EEPROM_IDT_89HPESX is not set -CONFIG_EEPROM_LEGACY=m CONFIG_EEPROM_MAX6875=m # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set # CONFIG_EFI_BOOTLOADER_CONTROL is not set @@ -1751,7 +1808,12 @@ CONFIG_ENIC=m CONFIG_EPIC100=m CONFIG_EPOLL=y # CONFIG_EQUALIZER is not set -# CONFIG_EROFS_FS is not set +# CONFIG_EROFS_FS_DEBUG is not set +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_POSIX_ACL=y +CONFIG_EROFS_FS_SECURITY=y +CONFIG_EROFS_FS_XATTR=y +# CONFIG_EROFS_FS_ZIP is not set CONFIG_ETHERNET=y CONFIG_ETHOC=m CONFIG_ETHTOOL_NETLINK=y @@ -1814,7 +1876,7 @@ CONFIG_FAT_KUNIT_TEST=m # CONFIG_FB_CIRRUS is not set # CONFIG_FB_CYBER2000 is not set # CONFIG_FB_DA8XX is not set -CONFIG_FB_DEVICE=y +# CONFIG_FB_DEVICE is not set CONFIG_FB_EFI=y # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_GEODE is not set @@ -1913,7 +1975,9 @@ CONFIG_FS_DAX=y # CONFIG_FSL_QDMA is not set # CONFIG_FSL_RCPM is not set CONFIG_FSNOTIFY=y -# CONFIG_FS_VERITY is not set +# CONFIG_FS_VERITY_BUILTIN_SIGNATURES is not set +# CONFIG_FS_VERITY_DEBUG is not set +CONFIG_FS_VERITY=y # CONFIG_FTL is not set CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_RECORD_RECURSION is not set @@ -1921,6 +1985,7 @@ CONFIG_FTRACE_MCOUNT_RECORD=y # CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE=y +# CONFIG_FUEL_GAUGE_MM8013 is not set CONFIG_FUJITSU_ES=m CONFIG_FUJITSU_LAPTOP=m CONFIG_FUJITSU_TABLET=m @@ -2064,6 +2129,7 @@ CONFIG_GPIO_SIM=m # CONFIG_GREYBUS is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_GTP is not set +# CONFIG_GUEST_STATE_BUFFER_TEST is not set # CONFIG_GUP_TEST is not set CONFIG_GVE=m # CONFIG_HABANA_AI is not set @@ -2250,6 +2316,7 @@ CONFIG_HMM_MIRROR=y # CONFIG_HNS3_PMU is not set # CONFIG_HOLTEK_FF is not set CONFIG_HOTPLUG_CPU=y +# CONFIG_HOTPLUG_PCI_ACPI_AMPERE_ALTRA is not set CONFIG_HOTPLUG_PCI_ACPI_IBM=m CONFIG_HOTPLUG_PCI_ACPI=y # CONFIG_HOTPLUG_PCI_CPCI is not set @@ -2405,6 +2472,7 @@ CONFIG_I40E=m CONFIG_I40EVF=m # CONFIG_I6300ESB_WDT is not set # CONFIG_I8K is not set +# CONFIG_IA32_EMULATION_DEFAULT_DISABLED is not set CONFIG_IA32_EMULATION=y # CONFIG_IAQCORE is not set CONFIG_IAVF=m @@ -2421,6 +2489,7 @@ CONFIG_ICPLUS_PHY=m CONFIG_IDEAPAD_LAPTOP=m CONFIG_IDLE_INJECT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IDPF=m # CONFIG_IE6XX_WDT is not set CONFIG_IEEE802154_6LOWPAN=m # CONFIG_IEEE802154_ADF7242 is not set @@ -2492,7 +2561,6 @@ CONFIG_IMA_READ_POLICY=y CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT=y CONFIG_IMA_SIG_TEMPLATE=y # CONFIG_IMA_TEMPLATE is not set -CONFIG_IMA_TRUSTED_KEYRING=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA=y # CONFIG_IMG_ASCII_LCD is not set @@ -2610,6 +2678,7 @@ CONFIG_INPUT_UINPUT=m CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m CONFIG_INPUT=y CONFIG_INPUT_YEALINK=m +# CONFIG_INSPUR_PLATFORM_PROFILE is not set # CONFIG_INT3406_THERMAL is not set CONFIG_INT340X_THERMAL=m CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y @@ -2678,6 +2747,7 @@ CONFIG_INTEL_SDSI=m CONFIG_INTEL_SPEED_SELECT_INTERFACE=m CONFIG_INTEL_TCC_COOLING=m CONFIG_INTEL_TDX_GUEST=y +# CONFIG_INTEL_TDX_HOST is not set CONFIG_INTEL_TH_ACPI=m # CONFIG_INTEL_TH_DEBUG is not set CONFIG_INTEL_TH_GTH=m @@ -2712,7 +2782,8 @@ CONFIG_IO_DELAY_0X80=y CONFIG_IOMMU_DEFAULT_DMA_LAZY=y # CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set -# CONFIG_IOMMUFD is not set +CONFIG_IOMMUFD=m +# CONFIG_IOMMUFD_TEST is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST is not set @@ -3008,7 +3079,7 @@ CONFIG_KEY_NOTIFICATIONS=y # CONFIG_KEYS_REQUEST_CACHE is not set CONFIG_KEYS=y # CONFIG_KFENCE_DEFERRABLE is not set -# CONFIG_KFENCE_KUNIT_TEST is not set +CONFIG_KFENCE_KUNIT_TEST=m CONFIG_KFENCE_NUM_OBJECTS=255 CONFIG_KFENCE_SAMPLE_INTERVAL=100 # CONFIG_KFENCE_STATIC_KEYS is not set @@ -3043,6 +3114,7 @@ CONFIG_KVM_AMD_SEV=y CONFIG_KVM_GUEST=y CONFIG_KVM_INTEL=m CONFIG_KVM=m +CONFIG_KVM_MAX_NR_VCPUS=4096 CONFIG_KVM_MMU_AUDIT=y # CONFIG_KVM_PROVE_MMU is not set CONFIG_KVM_SMM=y @@ -3210,6 +3282,7 @@ CONFIG_LSI_ET1011C_PHY=m CONFIG_LSM="lockdown,yama,integrity,selinux,bpf" CONFIG_LSM_MMAP_MIN_ADDR=65535 # CONFIG_LTC1660 is not set +# CONFIG_LTC2309 is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set # CONFIG_LTC2496 is not set @@ -3224,6 +3297,7 @@ CONFIG_LTO_NONE=y # CONFIG_LTR501 is not set # CONFIG_LTRF216A is not set # CONFIG_LV0104CS is not set +# CONFIG_LWQ_TEST is not set CONFIG_LWTUNNEL_BPF=y CONFIG_LWTUNNEL=y CONFIG_LXT_PHY=m @@ -3232,6 +3306,7 @@ CONFIG_LZ4_COMPRESS=m CONFIG_MAC80211_DEBUGFS=y # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_HWSIM=m +CONFIG_MAC80211_KUNIT_TEST=m CONFIG_MAC80211_LEDS=y CONFIG_MAC80211=m # CONFIG_MAC80211_MESH is not set @@ -3261,7 +3336,6 @@ CONFIG_MANTIS_CORE=m CONFIG_MARVELL_10G_PHY=m # CONFIG_MARVELL_88Q2XXX_PHY is not set # CONFIG_MARVELL_88X2222_PHY is not set -CONFIG_MARVELL_GTI_WDT=y CONFIG_MARVELL_PHY=m # CONFIG_MATOM is not set # CONFIG_MAX1027 is not set @@ -3297,6 +3371,7 @@ CONFIG_MAXSMP=y # CONFIG_MCORE2 is not set # CONFIG_MCP320X is not set # CONFIG_MCP3422 is not set +# CONFIG_MCP3564 is not set # CONFIG_MCP3911 is not set # CONFIG_MCP4018 is not set # CONFIG_MCP41010 is not set @@ -3320,7 +3395,7 @@ CONFIG_MDIO_HISI_FEMAC=m # CONFIG_MDIO_IPQ8064 is not set CONFIG_MDIO_MSCC_MIIM=m # CONFIG_MDIO_MVUSB is not set -CONFIG_MDIO_OCTEON=m +# CONFIG_MDIO_OCTEON is not set CONFIG_MDIO_THUNDER=m CONFIG_MD_LINEAR=m # CONFIG_MD_MULTIPATH is not set @@ -3329,6 +3404,7 @@ CONFIG_MD_RAID10=m CONFIG_MD_RAID1=m CONFIG_MD_RAID456=m CONFIG_MD=y +CONFIG_MEAN_AND_VARIANCE_UNIT_TEST=m CONFIG_MEDIA_ALTERA_CI=m # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set CONFIG_MEDIA_ATTACH=y @@ -3400,7 +3476,7 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_BD9571MWV is not set # CONFIG_MFD_CPCAP is not set # CONFIG_MFD_CS42L43_I2C is not set -# CONFIG_MFD_CS42L43_SDW is not set +CONFIG_MFD_CS42L43_SDW=m # CONFIG_MFD_DA9052_I2C is not set # CONFIG_MFD_DA9052_SPI is not set # CONFIG_MFD_DA9055 is not set @@ -3545,6 +3621,7 @@ CONFIG_MLX5_CORE_EN_DCB=y CONFIG_MLX5_CORE_EN=y CONFIG_MLX5_CORE_IPOIB=y CONFIG_MLX5_CORE=m +CONFIG_MLX5_DPLL=m CONFIG_MLX5_EN_ARFS=y CONFIG_MLX5_EN_IPSEC=y CONFIG_MLX5_EN_MACSEC=y @@ -3654,6 +3731,9 @@ CONFIG_MODULE_SIG_KEY_TYPE_RSA=y # CONFIG_MODULE_SIG_SHA1 is not set # CONFIG_MODULE_SIG_SHA224 is not set # CONFIG_MODULE_SIG_SHA256 is not set +# CONFIG_MODULE_SIG_SHA3_256 is not set +# CONFIG_MODULE_SIG_SHA3_384 is not set +# CONFIG_MODULE_SIG_SHA3_512 is not set # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG=y @@ -3731,6 +3811,8 @@ CONFIG_MT76x2U=m CONFIG_MT7921E=m # CONFIG_MT7921S is not set # CONFIG_MT7921U is not set +# CONFIG_MT7925E is not set +# CONFIG_MT7925U is not set # CONFIG_MT7996E is not set # CONFIG_MTD_ABSENT is not set # CONFIG_MTD_AFS_PARTS is not set @@ -3841,9 +3923,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_MATCHALL=m # CONFIG_NET_CLS_ROUTE4 is not set -# CONFIG_NET_CLS_RSVP6 is not set -# CONFIG_NET_CLS_RSVP is not set -# CONFIG_NET_CLS_TCINDEX is not set CONFIG_NET_CLS_U32=m CONFIG_NET_CLS=y CONFIG_NETCONSOLE_DYNAMIC=y @@ -3965,6 +4044,7 @@ CONFIG_NET_IPIP=m CONFIG_NET_IPVTI=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y +# CONFIG_NETKIT is not set CONFIG_NET_L3_MASTER_DEV=y CONFIG_NETLABEL=y CONFIG_NETLINK_DIAG=y @@ -3977,15 +4057,12 @@ CONFIG_NET_PKTGEN=m CONFIG_NET_POLL_CONTROLLER=y CONFIG_NET_RX_BUSY_POLL=y # CONFIG_NET_SB1000 is not set -# CONFIG_NET_SCH_ATM is not set CONFIG_NET_SCH_CAKE=m -# CONFIG_NET_SCH_CBQ is not set CONFIG_NET_SCH_CBS=m # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_CODEL is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_NET_SCH_DRR is not set -# CONFIG_NET_SCH_DSMARK is not set CONFIG_NET_SCHED=y CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_ETS=m @@ -4014,6 +4091,7 @@ CONFIG_NET_SCH_TBF=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_TC_SKB_EXT=y # CONFIG_NET_TEAM is not set +CONFIG_NET_TEST=m # CONFIG_NET_TULIP is not set CONFIG_NET_UDP_TUNNEL=m # CONFIG_NET_VENDOR_3COM is not set @@ -4113,7 +4191,7 @@ CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_PROTO_DCCP=y +# CONFIG_NF_CT_PROTO_DCCP is not set CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y CONFIG_NF_DUP_NETDEV=m @@ -4310,8 +4388,9 @@ CONFIG_NVDIMM_PFN=y # CONFIG_NVDIMM_SECURITY_TEST is not set # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_NVIDIA_WMI_EC_BACKLIGHT=m -CONFIG_NVME_AUTH=y +CONFIG_NVME_AUTH=m CONFIG_NVME_FC=m +CONFIG_NVME_HOST_AUTH=y # CONFIG_NVME_HWMON is not set # CONFIG_NVMEM_IMX_OCOTP_ELE is not set # CONFIG_NVMEM_LAYOUT_ONIE_TLV is not set @@ -4333,7 +4412,9 @@ CONFIG_NVME_TARGET=m # CONFIG_NVME_TARGET_PASSTHRU is not set CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_TCP=m +CONFIG_NVME_TARGET_TCP_TLS=y CONFIG_NVME_TCP=m +CONFIG_NVME_TCP_TLS=y # CONFIG_NVME_VERBOSE_ERRORS is not set CONFIG_NVRAM=y CONFIG_NVSW_SN2201=m @@ -4448,6 +4529,7 @@ CONFIG_PARTITION_ADVANCED=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_DYNAMIC_OF_NODES is not set +CONFIG_PCIEAER_CXL=y CONFIG_PCIEAER_INJECT=m CONFIG_PCIEAER=y # CONFIG_PCIE_ALTERA is not set @@ -4496,6 +4578,7 @@ CONFIG_PCI_STUB=y # CONFIG_PCI_SW_SWITCHTEC is not set CONFIG_PCI=y # CONFIG_PCNET32 is not set +CONFIG_PCP_BATCH_SCALE_MAX=5 CONFIG_PCPU_DEV_REFCNT=y CONFIG_PCSPKR_PLATFORM=y CONFIG_PCS_XPCS=m @@ -4525,8 +4608,6 @@ CONFIG_PHY_BCM_SR_USB=m # CONFIG_PHY_CADENCE_TORRENT is not set # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set -# CONFIG_PHY_FSL_IMX8M_PCIE is not set -# CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_FSL_LYNX_28G is not set # CONFIG_PHY_HI3660_USB is not set # CONFIG_PHY_HI3670_PCIE is not set @@ -4538,8 +4619,6 @@ CONFIG_PHY_BCM_SR_USB=m CONFIG_PHYLIB=y CONFIG_PHYLINK=m # CONFIG_PHY_MAPPHONE_MDM6600 is not set -# CONFIG_PHY_MIXEL_LVDS_PHY is not set -# CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_OCELOT_SERDES is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set @@ -4571,6 +4650,7 @@ CONFIG_PINCTRL_BROXTON=m CONFIG_PINCTRL_CANNONLAKE=m CONFIG_PINCTRL_CEDARFORK=m # CONFIG_PINCTRL_CHERRYVIEW is not set +# CONFIG_PINCTRL_CS42L43 is not set # CONFIG_PINCTRL_CY8C95X0 is not set CONFIG_PINCTRL_DENVERTON=m CONFIG_PINCTRL_ELKHARTLAKE=m @@ -4647,7 +4727,6 @@ CONFIG_POWERCAP=y CONFIG_POWERNV_CPUFREQ=y CONFIG_POWERNV_OP_PANEL=m # CONFIG_POWERPC64_CPU is not set -# CONFIG_POWER_RESET_BRCMSTB is not set # CONFIG_POWER_RESET_GPIO_RESTART is not set # CONFIG_POWER_RESET_LTC2952 is not set # CONFIG_POWER_RESET_REGULATOR is not set @@ -4815,7 +4894,6 @@ CONFIG_QLA3XXX=m # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_QORIQ_CPUFREQ is not set -# CONFIG_QORIQ_THERMAL is not set CONFIG_QRTR=m CONFIG_QRTR_MHI=m # CONFIG_QRTR_SMD is not set @@ -4920,6 +4998,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=m # CONFIG_REGULATOR_LTC3589 is not set # CONFIG_REGULATOR_LTC3676 is not set # CONFIG_REGULATOR_MAX1586 is not set +# CONFIG_REGULATOR_MAX77503 is not set # CONFIG_REGULATOR_MAX77857 is not set # CONFIG_REGULATOR_MAX8649 is not set # CONFIG_REGULATOR_MAX8660 is not set @@ -4995,6 +5074,7 @@ CONFIG_RMI4_SPI=m CONFIG_ROCKCHIP_PHY=m CONFIG_ROCKER=m CONFIG_RODATA_FULL_DEFAULT_ENABLED=y +# CONFIG_ROHM_BM1390 is not set # CONFIG_ROHM_BU27008 is not set # CONFIG_ROHM_BU27034 is not set # CONFIG_ROMFS_FS is not set @@ -5034,7 +5114,6 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_DRV_ABEOZ9 is not set # CONFIG_RTC_DRV_ABX80X is not set CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_BQ4802=m # CONFIG_RTC_DRV_CADENCE is not set CONFIG_RTC_DRV_CMOS=y CONFIG_RTC_DRV_DS1286=m @@ -5346,6 +5425,7 @@ CONFIG_SDIO_UART=m # CONFIG_SDX_GCC_55 is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_SECCOMP=y +# CONFIG_SECONDARY_TRUSTED_KEYRING_SIGNED_BY_BUILTIN is not set CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECRETMEM=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y @@ -5502,6 +5582,7 @@ CONFIG_SENSORS_LM95245=m CONFIG_SENSORS_LTC2978=m # CONFIG_SENSORS_LTC2978_REGULATOR is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2991 is not set # CONFIG_SENSORS_LTC2992 is not set # CONFIG_SENSORS_LTC3815 is not set CONFIG_SENSORS_LTC4151=m @@ -5560,6 +5641,7 @@ CONFIG_SENSORS_PCF8591=m # CONFIG_SENSORS_PLI1209BC is not set # CONFIG_SENSORS_PM6764TR is not set CONFIG_SENSORS_PMBUS=m +# CONFIG_SENSORS_POWERZ is not set # CONFIG_SENSORS_POWR1220 is not set # CONFIG_SENSORS_PWM_FAN is not set # CONFIG_SENSORS_PXE1610 is not set @@ -5729,8 +5811,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP=m # CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_SLIP_SMART=y -# CONFIG_SLOB_DEPRECATED is not set -# CONFIG_SLOB is not set CONFIG_SLS=y # CONFIG_SLUB_CPU_PARTIAL is not set # CONFIG_SLUB_DEBUG_ON is not set @@ -5813,6 +5893,7 @@ CONFIG_SND_FIREWORKS=m # CONFIG_SND_FM801_TEA575X_BOOL is not set CONFIG_SND_GINA20=m CONFIG_SND_GINA24=m +CONFIG_SND_HDA_CIRRUS_SCODEC_KUNIT_TEST=m CONFIG_SND_HDA_CODEC_ANALOG=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132_DSP=y @@ -5954,8 +6035,10 @@ CONFIG_SND_SOC_AMD_YC_MACH=m # CONFIG_SND_SOC_ARNDALE is not set # CONFIG_SND_SOC_AUDIO_IIO_AUX is not set # CONFIG_SND_SOC_AW8738 is not set +# CONFIG_SND_SOC_AW87390 is not set # CONFIG_SND_SOC_AW88261 is not set # CONFIG_SND_SOC_AW88395 is not set +# CONFIG_SND_SOC_AW88399 is not set # CONFIG_SND_SOC_BD28623 is not set # CONFIG_SND_SOC_BT_SCO is not set # CONFIG_SND_SOC_CHV3_CODEC is not set @@ -6047,6 +6130,7 @@ CONFIG_SND_SOC_INTEL_AVS=m # CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set +# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5663 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set # CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set @@ -6087,6 +6171,7 @@ CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y CONFIG_SND_SOC_INTEL_SKYLAKE=m CONFIG_SND_SOC_INTEL_SOF_CML_RT1011_RT5682_MACH=m CONFIG_SND_SOC_INTEL_SOF_CS42L42_MACH=m +CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH=m CONFIG_SND_SOC_INTEL_SOF_DA7219_MAX98373_MACH=m CONFIG_SND_SOC_INTEL_SOF_ES8336_MACH=m CONFIG_SND_SOC_INTEL_SOF_NAU8825_MACH=m @@ -6162,12 +6247,6 @@ CONFIG_SND_SOC_PCM512x=m # CONFIG_SND_SOC_RK3399_GRU_SOUND is not set # CONFIG_SND_SOC_RK817 is not set CONFIG_SND_SOC_RL6231=m -# CONFIG_SND_SOC_ROCKCHIP_I2S is not set -# CONFIG_SND_SOC_ROCKCHIP is not set -# CONFIG_SND_SOC_ROCKCHIP_MAX98090 is not set -# CONFIG_SND_SOC_ROCKCHIP_PDM is not set -# CONFIG_SND_SOC_ROCKCHIP_RT5645 is not set -# CONFIG_SND_SOC_ROCKCHIP_SPDIF is not set # CONFIG_SND_SOC_RT1017_SDCA_SDW is not set CONFIG_SND_SOC_RT1308=m CONFIG_SND_SOC_RT1308_SDW=m @@ -6190,6 +6269,7 @@ CONFIG_SND_SOC_RT715_SDCA_SDW=m CONFIG_SND_SOC_RT715_SDW=m CONFIG_SND_SOC_RT722_SDCA_SDW=m # CONFIG_SND_SOC_RT9120 is not set +# CONFIG_SND_SOC_RTQ9128 is not set # CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set # CONFIG_SND_SOC_SAMSUNG is not set # CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set @@ -6386,7 +6466,6 @@ CONFIG_SND_X86=y CONFIG_SND_XEN_FRONTEND=m # CONFIG_SND_YMFPCI is not set # CONFIG_SNET_VDPA is not set -# CONFIG_SOC_BRCMSTB is not set # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set # CONFIG_SOC_TI is not set CONFIG_SOFTLOCKUP_DETECTOR=y @@ -6604,6 +6683,7 @@ CONFIG_TCM_IBLOCK=m CONFIG_TCM_PSCSI=m # CONFIG_TCM_QLA2XXX is not set CONFIG_TCM_USER2=m +CONFIG_TCP_AO=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_TCP_CONG_BIC=m @@ -6657,6 +6737,7 @@ CONFIG_TEST_LIVEPATCH=m # CONFIG_TEST_MEMINIT is not set # CONFIG_TEST_MIN_HEAP is not set # CONFIG_TEST_OBJAGG is not set +# CONFIG_TEST_OBJPOOL is not set # CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PARMAN is not set # CONFIG_TEST_POWER is not set @@ -6886,6 +6967,7 @@ CONFIG_TYPEC_MUX_FSA4480=m # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_NB7VPQ904M is not set CONFIG_TYPEC_MUX_PI3USB30532=m +# CONFIG_TYPEC_MUX_PTN36502 is not set # CONFIG_TYPEC_NVIDIA_ALTMODE is not set # CONFIG_TYPEC_QCOM_PMIC is not set # CONFIG_TYPEC_RT1711H is not set @@ -6962,6 +7044,7 @@ CONFIG_USB_CHIPIDEA_GENERIC=m CONFIG_USB_CHIPIDEA_IMX=m CONFIG_USB_CHIPIDEA=m CONFIG_USB_CHIPIDEA_MSM=m +CONFIG_USB_CHIPIDEA_NPCM=m # CONFIG_USB_CONN_GPIO is not set CONFIG_USB_CXACRU=m # CONFIG_USB_CYPRESS_CY7C63 is not set @@ -7060,6 +7143,7 @@ CONFIG_USB_LEDS_TRIGGER_USBPORT=m CONFIG_USB_LEGOTOWER=m # CONFIG_USB_LGM_PHY is not set # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_LJCA is not set CONFIG_USB_M5602=m # CONFIG_USB_MA901 is not set # CONFIG_USB_MAX3421_HCD is not set @@ -7102,6 +7186,7 @@ CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OXU210HP_HCD is not set +CONFIG_USB_PCI_AMD=y CONFIG_USB_PCI=y # CONFIG_USBPCWATCHDOG is not set CONFIG_USB_PEGASUS=m @@ -7251,6 +7336,9 @@ CONFIG_VETH=m # CONFIG_VF610_DAC is not set CONFIG_VFAT_FS=m # CONFIG_VFIO_AMBA is not set +CONFIG_VFIO_CONTAINER=y +CONFIG_VFIO_DEVICE_CDEV=y +CONFIG_VFIO_GROUP=y CONFIG_VFIO_IOMMU_TYPE1=m CONFIG_VFIO=m CONFIG_VFIO_MDEV=m @@ -7366,11 +7454,13 @@ CONFIG_VIDEO_IVTV=m # CONFIG_VIDEO_M5MOLS is not set # CONFIG_VIDEO_MAX9286 is not set # CONFIG_VIDEO_MEYE is not set +# CONFIG_VIDEO_MGB4 is not set # CONFIG_VIDEO_ML86V7667 is not set # CONFIG_VIDEO_MSP3400 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set # CONFIG_VIDEO_MT9M111 is not set +# CONFIG_VIDEO_MT9M114 is not set # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set @@ -7792,22 +7882,13 @@ CONFIG_ZENIFY=y CONFIG_WINESYNC=y CONFIG_USER_NS_UNPRIVILEGED=y CONFIG_TCP_CONG_BBR2=m -CONFIG_BCACHEFS_FS=m -CONFIG_BCACHEFS_QUOTA=y -CONFIG_BCACHEFS_POSIX_ACL=y -# CONFIG_BCACHEFS_DEBUG_TRANSACTIONS is not set -# CONFIG_BCACHEFS_DEBUG is not set -# CONFIG_BCACHEFS_TESTS is not set -# CONFIG_BCACHEFS_LOCK_TIME_STATS is not set -# CONFIG_BCACHEFS_NO_LATENCY_ACCT is not set -# CONFIG_MEAN_AND_VARIANCE_UNIT_TEST is not set -# CONFIG_DEBUG_CLOSURES is not set CONFIG_HID_IPTS=m CONFIG_HID_ITHC=m CONFIG_SURFACE_BOOK1_DGPU_SWITCH=m CONFIG_IPC_CLASSES=y CONFIG_LEDS_TPS68470=m -CONFIG_DRM_AMD_COLOR_STEAMDECK=y +CONFIG_SENSORS_SURFACE_FAN=m +CONFIG_SENSORS_SURFACE_TEMP=m CONFIG_LEGION_LAPTOP=m CONFIG_ACPI_CALL=m CONFIG_IIO_HRTIMER_TRIGGER=m @@ -7849,6 +7930,7 @@ CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2 # CONFIG_USB_DUMMY_HCD is not set # CONFIG_USB_CONFIGFS is not set # CONFIG_PHY_SAMSUNG_USB2 is not set +CONFIG_DRM_AMD_COLOR_STEAMDECK=y CONFIG_SND_SOC_AMD_SOF_MACH=m CONFIG_SND_SOC_AMD_MACH_COMMON=m CONFIG_SND_SOC_SOF=m @@ -7862,3 +7944,10 @@ CONFIG_SND_SOC_SOF_AMD_ACP63=m # CONFIG_SND_AMD_ASOC_REMBRANDT is not set # CONFIG_SND_SOC_AMD_LEGACY_MACH is not set CONFIG_SND_SOC_TOPOLOGY=y +CONFIG_BMI323_I2C=m +CONFIG_DRM_APPLETBDRM=m +CONFIG_HID_APPLETB_BL=m +CONFIG_HID_APPLETB_KBD=m +CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m +CONFIG_APPLE_BCE=m +CONFIG_BMI323_SPI=m diff --git a/SOURCES/kernel.changelog b/SOURCES/kernel.changelog new file mode 100644 index 0000000..f4767f4 --- /dev/null +++ b/SOURCES/kernel.changelog @@ -0,0 +1,2477 @@ +* Wed Jan 31 2024 Justin M. Forbes [6.7.3-0] +- Config update for stable backport (Justin M. Forbes) +- Add some more bugs to BugsFixed (Justin M. Forbes) +- Linux v6.7.3 +Resolves: + +* Fri Jan 26 2024 Justin M. Forbes [6.7.2-0] +- redhat: spec: Fix update_scripts run for CentOS builds (Neal Gompa) +- BPF Tool versioning seems incompatible with stable Fedroa (Justin M. Forbes) +- Linux v6.7.2 +Resolves: + +* Sat Jan 20 2024 Justin M. Forbes [6.7.1-0] +- Fix up requires for UKI (Justin M. Forbes) +- Fix up libperf install (Justin M. Forbes) +- Drop soname for libcpupower.so since we reverted the bump (Justin M. Forbes) +- Turn on CONFIG_TCP_AO for Fedora (Justin M. Forbes) +- temporarily remove LIBBPF_DYNAMIC=1 from perf build (Thorsten Leemhuis) +- add libperf packages and enable perf, libperf, tools and bpftool packages (Thorsten Leemhuis) +- Revert "cpupower: Bump soname version" (Justin M. Forbes) +- Turn on Renesas RZ for Fedora IOT rhbz2257913 (Justin M. Forbes) +- Add bugs to BugsFixed (Justin M. Forbes) +- wifi: ath10k: fix NULL pointer dereference in ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev() (Xingyuan Mo) +- drivers/firmware: skip simpledrm if nvidia-drm.modeset=1 is set (Javier Martinez Canillas) +- Basic scaffolding to create a kernel-headers package (Justin M. Forbes) +- Initial config for fedora-6.7 branch (Justin M. Forbes) +- Reset RHEL_RELEASE for 6.8 series (Justin M. Forbes) +- common: cleanup MX3_IPU (Peter Robinson) +- all: The Octeon MDIO driver is aarch64/mips (Peter Robinson) +- common: rtc: remove bq4802 config (Peter Robinson) +- common: de-dupe MARVELL_GTI_WDT (Peter Robinson) +- all: Remove CAN_BXCAN (Peter Robinson) +- common: cleanup SND_SOC_ROCKCHIP (Peter Robinson) +- common: move RHEL DP83867_PHY to common (Peter Robinson) +- common: Make ASYMMETRIC_KEY_TYPE enable explicit (Peter Robinson) +- common: Disable aarch64 ARCH_MA35 universally (Peter Robinson) +- common: arm64: enable Tegra234 pinctrl driver (Peter Robinson) +- rhel: arm64: Enable qoriq thermal driver (Peter Robinson) +- common: aarch64: Cleanup some i.MX8 config options (Peter Robinson) +- all: EEPROM_LEGACY has been removed (Peter Robinson) +- all: rmeove AppleTalk hardware configs (Peter Robinson) +- all: cleanup: remove references to SLOB (Peter Robinson) +- all: cleanup: Drop unnessary BRCMSTB configs (Peter Robinson) +- all: net: remove retired network schedulers (Peter Robinson) +- all: cleanup removed CONFIG_IMA_TRUSTED_KEYRING (Peter Robinson) +- BuildRequires: lld for build with selftests for x86 (Jan Stancek) +- spec: add keyutils to selftest-internal subpackage requirements (Artem Savkov) [2166911] +- redhat/spec: exclude liburandom_read.so from requires (Artem Savkov) [2120968] +- rtla: sync summary text with upstream and update Requires (Jan Stancek) +- uki-virt: add systemd-sysext dracut module (Gerd Hoffmann) +- uki-virt: add virtiofs dracut module (Gerd Hoffmann) +- common: disable the FB device creation (Peter Robinson) +- s390x: There's no FB on Z-series (Peter Robinson) +- Linux v6.7.1 +Resolves: rhbz#2120968, rhbz#2166911 + +* Mon Jan 08 2024 Fedora Kernel Team [6.7.0-68] +- fedora: aarch64: enable SM_VIDEOCC_8350 (Peter Robinson) +- Linux v6.7.0 +Resolves: + +* Sun Jan 07 2024 Fedora Kernel Team [6.7.0-0.rc8.52b1853b080a.67] +- Linux v6.7.0-0.rc8.52b1853b080a +Resolves: + +* Sat Jan 06 2024 Fedora Kernel Team [6.7.0-0.rc8.95c8a35f1c01.66] +- fedora: arm64: enable ethernet on newer TI industrial (Peter Robinson) +- fedora: arm64: Disable VIDEO_IMX_MEDIA (Peter Robinson) +- fedora: use common config for Siemens Simatic IPC (Peter Robinson) +- fedora: arm: enable Rockchip SPI flash (Peter Robinson) +- fedora: arm64: enable DRM_TI_SN65DSI83 (Peter Robinson) +- Linux v6.7.0-0.rc8.95c8a35f1c01 +Resolves: + +* Fri Jan 05 2024 Fedora Kernel Team [6.7.0-0.rc8.1f874787ed9a.65] +- Linux v6.7.0-0.rc8.1f874787ed9a +Resolves: + +* Thu Jan 04 2024 Fedora Kernel Team [6.7.0-0.rc8.ac865f00af29.64] +- Linux v6.7.0-0.rc8.ac865f00af29 +Resolves: + +* Wed Jan 03 2024 Fedora Kernel Team [6.7.0-0.rc8.63] +- kernel.spec: remove kernel-smp reference from scripts (Jan Stancek) +Resolves: + +* Tue Jan 02 2024 Fedora Kernel Team [6.7.0-0.rc8.62] +- redhat: do not compress the full kernel changelog in the src.rpm (Herton R. Krzesinski) +Resolves: + +* Mon Jan 01 2024 Fedora Kernel Team [6.7.0-0.rc8.61] +- Linux v6.7.0-0.rc8 +Resolves: + +* Sun Dec 31 2023 Fedora Kernel Team [6.7.0-0.rc7.453f5db0619e.60] +- Linux v6.7.0-0.rc7.453f5db0619e +Resolves: + +* Sat Dec 30 2023 Fedora Kernel Team [6.7.0-0.rc7.f016f7547aee.59] +- Auto consolidate configs for the 6.7 cycle (Justin M. Forbes) +- Linux v6.7.0-0.rc7.f016f7547aee +Resolves: + +* Fri Dec 29 2023 Fedora Kernel Team [6.7.0-0.rc7.8735c7c84d1b.58] +- Linux v6.7.0-0.rc7.8735c7c84d1b +Resolves: + +* Thu Dec 28 2023 Fedora Kernel Team [6.7.0-0.rc7.f5837722ffec.57] +- Linux v6.7.0-0.rc7.f5837722ffec +Resolves: + +* Tue Dec 26 2023 Fedora Kernel Team [6.7.0-0.rc7.fbafc3e621c3.56] +- Linux v6.7.0-0.rc7.fbafc3e621c3 +Resolves: + +* Mon Dec 25 2023 Fedora Kernel Team [6.7.0-0.rc7.55] +- Enable sound for a line of Huawei laptops (TomZanna) +Resolves: + +* Sun Dec 24 2023 Fedora Kernel Team [6.7.0-0.rc7.54] +- Linux v6.7.0-0.rc7 +Resolves: + +* Sat Dec 23 2023 Fedora Kernel Team [6.7.0-0.rc6.5254c0cbc92d.53] +- Linux v6.7.0-0.rc6.5254c0cbc92d +Resolves: + +* Fri Dec 22 2023 Fedora Kernel Team [6.7.0-0.rc6.24e0d2e527a3.52] +- fedora: a few cleanups and driver enablements (Peter Robinson) +- fedora: arm64: cleanup Allwinner Pinctrl drivers (Peter Robinson) +- fedora: aarch64: Enable some DW drivers (Peter Robinson) +- Linux v6.7.0-0.rc6.24e0d2e527a3 +Resolves: + +* Thu Dec 21 2023 Fedora Kernel Team [6.7.0-0.rc6.a4aebe936554.51] +- redhat: ship all the changelog from source git into kernel-doc (Herton R. Krzesinski) +- redhat: create an empty changelog file when changing its name (Herton R. Krzesinski) +- Linux v6.7.0-0.rc6.a4aebe936554 +Resolves: + +* Wed Dec 20 2023 Fedora Kernel Team [6.7.0-0.rc6.55cb5f43689d.50] +- redhat/self-test: Remove --all from git query (Prarit Bhargava) +- Linux v6.7.0-0.rc6.55cb5f43689d +Resolves: + +* Tue Dec 19 2023 Fedora Kernel Team [6.7.0-0.rc6.2cf4f94d8e86.49] +- Linux v6.7.0-0.rc6.2cf4f94d8e86 +Resolves: + +* Mon Dec 18 2023 Fedora Kernel Team [6.7.0-0.rc6.48] +- Disable accel drivers for Fedora x86 (Kate Hsuan) +- redhat: scripts: An automation script for disabling unused driver for x86 (Kate Hsuan) +- Fix up Fedora LJCA configs and filters (Justin M. Forbes) +- Linux v6.7.0-0.rc6 +Resolves: + +* Sun Dec 17 2023 Fedora Kernel Team [6.7.0-0.rc5.3b8a9b2e6809.47] +- Linux v6.7.0-0.rc5.3b8a9b2e6809 +Resolves: + +* Sat Dec 16 2023 Fedora Kernel Team [6.7.0-0.rc5.c8e97fc6b4c0.46] +- Fedora configs for 6.7 (Justin M. Forbes) +- Linux v6.7.0-0.rc5.c8e97fc6b4c0 +Resolves: + +* Fri Dec 15 2023 Fedora Kernel Team [6.7.0-0.rc5.3f7168591ebf.45] +- Linux v6.7.0-0.rc5.3f7168591ebf +Resolves: + +* Thu Dec 14 2023 Fedora Kernel Team [6.7.0-0.rc5.5bd7ef53ffe5.44] +- Linux v6.7.0-0.rc5.5bd7ef53ffe5 +Resolves: + +* Wed Dec 13 2023 Fedora Kernel Team [6.7.0-0.rc5.88035e5694a8.43] +- Some Fedora config updates for MLX5 (Justin M. Forbes) +- Turn on DRM_ACCEL drivers for Fedora (Justin M. Forbes) +- Linux v6.7.0-0.rc5.88035e5694a8 +Resolves: + +* Tue Dec 12 2023 Fedora Kernel Team [6.7.0-0.rc5.26aff849438c.42] +- redhat: enable the kfence test (Nico Pache) +- Linux v6.7.0-0.rc5.26aff849438c +Resolves: + +* Mon Dec 11 2023 Fedora Kernel Team [6.7.0-0.rc5.41] +- redhat/configs: Enable UCLAMP_TASK for PipeWire and WirePlumber (Neal Gompa) +- Linux v6.7.0-0.rc5 +Resolves: + +* Sun Dec 10 2023 Fedora Kernel Team [6.7.0-0.rc4.c527f5606aa5.40] +- Linux v6.7.0-0.rc4.c527f5606aa5 +Resolves: + +* Sat Dec 09 2023 Fedora Kernel Team [6.7.0-0.rc4.f2e8a57ee903.39] +- Linux v6.7.0-0.rc4.f2e8a57ee903 +Resolves: + +* Fri Dec 08 2023 Fedora Kernel Team [6.7.0-0.rc4.5e3f5b81de80.38] +- Turn on CONFIG_SECURITY_DMESG_RESTRICT for Fedora (Justin M. Forbes) +- Linux v6.7.0-0.rc4.5e3f5b81de80 +Resolves: + +* Wed Dec 06 2023 Fedora Kernel Team [6.7.0-0.rc4.bee0e7762ad2.37] +- Turn off shellcheck for the fedora-stable-release script (Justin M. Forbes) +Resolves: + +* Tue Dec 05 2023 Fedora Kernel Team [6.7.0-0.rc4.bee0e7762ad2.36] +- Add some initial Fedora stable branch script to redhat/scripts/fedora/ (Justin M. Forbes) +- Linux v6.7.0-0.rc4.bee0e7762ad2 +Resolves: + +* Mon Dec 04 2023 Fedora Kernel Team [6.7.0-0.rc4.35] +- Linux v6.7.0-0.rc4 +Resolves: + +* Sun Dec 03 2023 Fedora Kernel Team [6.7.0-0.rc3.968f35f4ab1c.34] +- Linux v6.7.0-0.rc3.968f35f4ab1c +Resolves: + +* Sat Dec 02 2023 Fedora Kernel Team [6.7.0-0.rc3.815fb87b7530.33] +- redhat: disable iptables-legacy compatibility layer (Florian Westphal) +- redhat: disable dccp conntrack support (Florian Westphal) +- configs: enable netfilter_netlink_hook in fedora too (Florian Westphal) +- Linux v6.7.0-0.rc3.815fb87b7530 +Resolves: + +* Fri Dec 01 2023 Fedora Kernel Team [6.7.0-0.rc3.994d5c58e50e.32] +- ext4: Mark mounting fs-verity filesystems as tech-preview (Alexander Larsson) +- erofs: Add tech preview markers at mount (Alexander Larsson) +- Enable fs-verity (Alexander Larsson) +- Enable erofs (Alexander Larsson) +- aarch64: enable uki (Gerd Hoffmann) +- redhat: enable CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH as a module for x86 (Patrick Talbert) +- Turn CONFIG_MFD_CS42L43_SDW on for RHEL (Justin M. Forbes) +- Linux v6.7.0-0.rc3.994d5c58e50e +Resolves: + +* Thu Nov 30 2023 Fedora Kernel Team [6.7.0-0.rc3.3b47bc037bd4.31] +- Linux v6.7.0-0.rc3.3b47bc037bd4 +Resolves: + +* Wed Nov 29 2023 Fedora Kernel Team [6.7.0-0.rc3.18d46e76d7c2.30] +- Enable cryptographic acceleration config flags for PowerPC (Mamatha Inamdar) +- Also make vmlinuz-virt.efi world readable (Zbigniew Jędrzejewski-Szmek) +- Drop custom mode for System.map file (Zbigniew Jędrzejewski-Szmek) +- Linux v6.7.0-0.rc3.18d46e76d7c2 +Resolves: + +* Tue Nov 28 2023 Fedora Kernel Team [6.7.0-0.rc3.df60cee26a2e.29] +- Add drm_exec_test to mod-internal.list for depmod to succeed (Mika Penttilä) +- RHEL 9.4 DRM backport (upto v6.6 kernel), sync Kconfigs (Mika Penttilä) +- Linux v6.7.0-0.rc3.df60cee26a2e +Resolves: + +* Mon Nov 27 2023 Fedora Kernel Team [6.7.0-0.rc3.28] +- Linux v6.7.0-0.rc3 +Resolves: + +* Sun Nov 26 2023 Fedora Kernel Team [6.7.0-0.rc2.090472ed9c92.27] +- Linux v6.7.0-0.rc2.090472ed9c92 +Resolves: + +* Sat Nov 25 2023 Fedora Kernel Team [6.7.0-0.rc2.0f5cc96c367f.26] +- Linux v6.7.0-0.rc2.0f5cc96c367f +Resolves: + +* Fri Nov 24 2023 Fedora Kernel Team [6.7.0-0.rc2.f1a09972a45a.25] +- Linux v6.7.0-0.rc2.f1a09972a45a +Resolves: + +* Thu Nov 23 2023 Fedora Kernel Team [6.7.0-0.rc2.9b6de136b5f0.24] +- Turn on USB_DWC3 for Fedora (rhbz 2250955) (Justin M. Forbes) +- Linux v6.7.0-0.rc2.9b6de136b5f0 +Resolves: + +* Wed Nov 22 2023 Fedora Kernel Team [6.7.0-0.rc2.c2d5304e6c64.23] +- redhat/configs: Move IOMMUFD to common (Alex Williamson) +- redhat: Really remove cpupower files (Prarit Bhargava) +- redhat: remove update_scripts.sh (Prarit Bhargava) +- Linux v6.7.0-0.rc2.c2d5304e6c64 +Resolves: + +* Mon Nov 20 2023 Fedora Kernel Team [6.7.0-0.rc2.22] +- Fix s390 zfcpfdump bpf build failures for cgroups (Don Zickus) +- Linux v6.7.0-0.rc2 +Resolves: + +* Sun Nov 19 2023 Fedora Kernel Team [6.7.0-0.rc1.037266a5f723.21] +- Linux v6.7.0-0.rc1.037266a5f723 +Resolves: + +* Sat Nov 18 2023 Fedora Kernel Team [6.7.0-0.rc1.791c8ab095f7.20] +- Linux v6.7.0-0.rc1.791c8ab095f7 +Resolves: + +* Fri Nov 17 2023 Fedora Kernel Team [6.7.0-0.rc1.7475e51b8796.19] +- Linux v6.7.0-0.rc1.7475e51b8796 +Resolves: + +* Wed Nov 15 2023 Fedora Kernel Team [6.7.0-0.rc1.c42d9eeef8e5.18] +- Linux v6.7.0-0.rc1.c42d9eeef8e5 +Resolves: + +* Tue Nov 14 2023 Fedora Kernel Team [6.7.0-0.rc1.9bacdd8996c7.17] +- Linux v6.7.0-0.rc1.9bacdd8996c7 +Resolves: + +* Mon Nov 13 2023 Fedora Kernel Team [6.7.0-0.rc1.16] +- Linux v6.7.0-0.rc1 +Resolves: + +* Sun Nov 12 2023 Fedora Kernel Team [6.7.0-0.rc0.1b907d050735.15] +- Linux v6.7.0-0.rc0.1b907d050735 +Resolves: + +* Sat Nov 11 2023 Fedora Kernel Team [6.7.0-0.rc0.3ca112b71f35.14] +- Flip CONFIG_NVME_AUTH to m in pending (Justin M. Forbes) +- Linux v6.7.0-0.rc0.3ca112b71f35 +Resolves: + +* Fri Nov 10 2023 Fedora Kernel Team [6.7.0-0.rc0.89cdf9d55601.13] +- Linux v6.7.0-0.rc0.89cdf9d55601 +Resolves: + +* Thu Nov 09 2023 Fedora Kernel Team [6.7.0-0.rc0.6bc986ab839c.12] +- Linux v6.7.0-0.rc0.6bc986ab839c +Resolves: + +* Wed Nov 08 2023 Fedora Kernel Team [6.7.0-0.rc0.305230142ae0.11] +- Turn CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 on for Fedora x86 (Jason Montleon) +- kernel/rh_messages.c: Mark functions as possibly unused (Prarit Bhargava) +- Add snd-hda-cirrus-scodec-test to mod-internal.list (Scott Weaver) +- Linux v6.7.0-0.rc0.305230142ae0 +Resolves: + +* Tue Nov 07 2023 Fedora Kernel Team [6.7.0-0.rc0.be3ca57cfb77.10] +- Turn off BPF_SYSCALL in pending for zfcpdump (Justin M. Forbes) +- Linux v6.7.0-0.rc0.be3ca57cfb77 +Resolves: + +* Mon Nov 06 2023 Fedora Kernel Team [6.7.0-0.rc0.d2f51b3516da.9] +- Linux v6.7.0-0.rc0.d2f51b3516da +Resolves: + +* Sun Nov 05 2023 Fedora Kernel Team [6.7.0-0.rc0.1c41041124bd.8] +- Linux v6.7.0-0.rc0.1c41041124bd +Resolves: + +* Sat Nov 04 2023 Fedora Kernel Team [6.7.0-0.rc0.90b0c2b2edd1.7] +- Add mean_and_variance_test to mod-internal.list (Justin M. Forbes) +- Add cfg80211-tests and mac80211-tests to mod-internal.list (Justin M. Forbes) +- Linux v6.7.0-0.rc0.90b0c2b2edd1 +Resolves: + +* Fri Nov 03 2023 Fedora Kernel Team [6.7.0-0.rc0.8f6f76a6a29f.6] +- Turn on CONFIG_MFD_CS42L43_SDW for RHEL in pending (Justin M. Forbes) +- Linux v6.7.0-0.rc0.8f6f76a6a29f +Resolves: + +* Fri Nov 03 2023 Fedora Kernel Team [6.7.0-0.rc0.21e80f3841c0.5] +- Turn on bcachefs for Fedora (Justin M. Forbes) +- redhat: configs: fedora: Enable QSEECOM and friends (Andrew Halaney) +Resolves: + +* Thu Nov 02 2023 Fedora Kernel Team [6.7.0-0.rc0.21e80f3841c0.4] +- Add clk-fractional-divider_test to mod-internal.list (Thorsten Leemhuis) +- Add gso_test to mod-internal.list (Thorsten Leemhuis) +- Add property-entry-test to mod-internal.list (Thorsten Leemhuis) +- Linux v6.7.0-0.rc0.21e80f3841c0 +Resolves: + +* Wed Nov 01 2023 Fedora Kernel Team [6.7.0-0.rc0.8bc9e6515183.3] +- Fedora 6.7 configs part 1 (Justin M. Forbes) +- Trim changelog after version bump (Justin M. Forbes) +- Linux v6.7.0-0.rc0.8bc9e6515183 +Resolves: + +* Tue Oct 31 2023 Fedora Kernel Team [6.7.0-0.rc0.5a6a09e97199.2] +- Reset RHEL_RELEASE for rebase (Justin M. Forbes) +- [Scheduled job] Catch config mismatches early during upstream merge (Don Zickus) +- redhat/self-test: Update data for KABI xz change (Prarit Bhargava) +- redhat/scripts: Switch KABI tarballs to xz (Prarit Bhargava) +- redhat/kernel.spec.template: Switch KABI compression to xz (Prarit Bhargava) +- redhat: self-test: Use a more complete SRPM file suffix (Andrew Halaney) +- redhat: makefile: remove stray rpmbuild --without (Eric Chanudet) +- Consolidate configs into common for 6.6 (Justin M. Forbes) +- Updated Fedora configs (Justin M. Forbes) +- Turn on UFSHCD for Fedora x86 (Justin M. Forbes) +- redhat: configs: generic: x86: Disable CONFIG_VIDEO_OV01A10 for x86 platform (Hans de Goede) +- redhat: remove pending-rhel CONFIG_XFS_ASSERT_FATAL file (Patrick Talbert) +- New configs in fs/xfs (Fedora Kernel Team) +- crypto: rng - Override drivers/char/random in FIPS mode (Herbert Xu) +- random: Add hook to override device reads and getrandom(2) (Herbert Xu) +- redhat/configs: share CONFIG_ARM64_ERRATUM_2966298 between rhel and fedora (Mark Salter) +- configs: Remove S390 IOMMU config options that no longer exist (Jerry Snitselaar) +- redhat: docs: clarify where bugs and issues are created (Scott Weaver) +- redhat/scripts/rh-dist-git.sh does not take any arguments: fix error message (Denys Vlasenko) +- Add target_branch for gen_config_patches.sh (Don Zickus) +- redhat: disable kunit by default (Nico Pache) +- redhat/configs: enable the AMD_PMF driver for RHEL (David Arcari) +- Make CONFIG_ADDRESS_MASKING consistent between fedora and rhel (Chris von Recklinghausen) +- CI: add ark-latest baseline job to tag cki-gating for successful pipelines (Michael Hofmann) +- CI: provide child pipelines for CKI container image gating (Michael Hofmann) +- CI: allow to run as child pipeline (Michael Hofmann) +- CI: provide descriptive pipeline name for scheduled pipelines (Michael Hofmann) +- CI: use job templates for variant variables (Michael Hofmann) +- redhat/kernel.spec.template: simplify __modsign_install_post (Jan Stancek) +- Fedora filter updates after configs (Justin M. Forbes) +- Fedora configs for 6.6 (Justin M. Forbes) +- redhat/configs: Freescale Layerscape SoC family (Steve Best) +- Add clang MR/baseline pipelines (Michael Hofmann) +- CI: Remove unused kpet_tree_family (Nikolai Kondrashov) +- Add clang config framework (Don Zickus) +- Apply partial snippet configs to all configs (Don Zickus) +- Remove unpackaged kgcov config files (Don Zickus) +- redhat/configs: enable missing Kconfig options for Qualcomm RideSX4 (Brian Masney) +- enable CONFIG_ADDRESS_MASKING for x86_64 (Chris von Recklinghausen) +- common: aarch64: enable NXP Flex SPI (Peter Robinson) +- fedora: Switch TI_SCI_CLK and TI_SCI_PM_DOMAINS symbols to built-in (Javier Martinez Canillas) +- kernel.spec: adjust build option comment (Michael Hofmann) +- kernel.spec: allow to enable arm64_16k variant (Michael Hofmann) +- gitlab-ci: enable build-only pipelines for Rawhide/16k/aarch64 (Michael Hofmann) +- kernel.spec.template: Fix --without bpftool (Prarit Bhargava) +- redhat/configs: NXP BBNSM Power Key Driver (Steve Best) +- redhat/self-test: Update data for cross compile fields (Prarit Bhargava) +- redhat/Makefile.cross: Add message for disabled subpackages (Prarit Bhargava) +- redhat/Makefile.cross: Update cross targets with disabled subpackages (Prarit Bhargava) +- Remove XFS_ASSERT_FATAL from pending-fedora (Justin M. Forbes) +- Change default pending for XFS_ONLINE_SCRUB_STATSas it now selects XFS_DEBUG (Justin M. Forbes) +- gitlab-ci: use --with debug/base to select kernel variants (Michael Hofmann) +- kernel.spec: add rpmbuild --without base option (Michael Hofmann) +- redhat: spec: Fix typo for kernel_variant_preun for 16k-debug flavor (Neal Gompa) +- Turn off appletalk for fedora (Justin M. Forbes) +- New configs in drivers/media (Fedora Kernel Team) +- redhat/docs: Add a mention of bugzilla for bugs (Prarit Bhargava) +- Fix the fixup of Fedora release (Don Zickus) +- Fix Fedora release scheduled job (Don Zickus) +- Move squashfs to kernel-modules-core (Justin M. Forbes) +- redhat: Explicitly disable CONFIG_COPS (Vitaly Kuznetsov) +- redhat: Add dist-check-licenses target (Vitaly Kuznetsov) +- redhat: Introduce "Verify SPDX-License-Identifier tags" selftest (Vitaly Kuznetsov) +- redhat: Use kspdx-tool output for the License: field (Vitaly Kuznetsov) +- Rename pipeline repo branch and DW tree names (Michael Hofmann) +- Adjust comments that refer to ARK in a Rawhide context (Michael Hofmann) +- Rename variable names starting with ark- to rawhide- (Michael Hofmann) +- Rename trigger-ark to trigger-rawhide (Michael Hofmann) +- Fix up config mismatches for Fedora (Justin M. Forbes) +- redhat/configs: Texas Instruments Inc. K3 multicore SoC architecture (Steve Best) +- Flip CONFIG_VIDEO_V4L2_SUBDEV_API in pending RHEL due to mismatch (Justin M. Forbes) +- CONFIG_HW_RANDOM_HISI: move to common and set to m (Scott Weaver) +- Turn off CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE for Fedora s390x (Justin M. Forbes) +- Disable tests for ELN realtime pipelines (Michael Hofmann) +- New configs in mm/Kconfig (Fedora Kernel Team) +- Flip CONFIG_SND_SOC_CS35L56_SDW to m and clean up (Justin M. Forbes) +- Add drm_exec_test to mod-internal.list (Thorsten Leemhuis) +- Add new pending entry for CONFIG_SND_SOC_CS35L56_SDW to fix mismatch (Justin M. Forbes) +- Fix tarball creation logic (Don Zickus) +- redhat: bump libcpupower soname to match upstream (Patrick Talbert) +- Turn on MEMFD_CREATE in pending as it is selected by CONFIG_TMPFS (Justin M. Forbes) +- redhat: drop unneeded build-time dependency gcc-plugin-devel (Coiby Xu) +- Reset RHEL release and trim changelog after rebase (Justin M. Forbes) +- all: x86: move wayward x86 specific config home (Peter Robinson) +- all: de-dupe non standard config options (Peter Robinson) +- all: x86: clean up microcode loading options (Peter Robinson) +- common: remove unnessary CONFIG_SND_MESON_AXG* (Peter Robinson) +- redhat: Fix UKI install with systemd >= 254 (Vitaly Kuznetsov) +- redhat: Use named parameters for kernel_variant_posttrans()/kernel_variant_preun() (Vitaly Kuznetsov) +- redhat/kernel.spec.template: update compression variables to support zstd (Brian Masney) +- Consolidate configs to common for 6.5 (Justin M. Forbes) +- Remove unused config entry for Fedora (Justin M. Forbes) +- redhat/self-test: Remove rpmlint test (Prarit Bhargava) +- Remove the armv7 config directory from Fedora again (Justin M. Forbes) +- Enable CONFIG_EXPERT for both RHEL and Fedora (Justin M. Forbes) +- redhat/configs: Enable CONFIG_DEVICE_PRIVATE on aarch64 (David Hildenbrand) [2231407] +- redhat/configs: disable CONFIG_ROCKCHIP_ERRATUM_3588001 for RHEL (Mark Salter) +- redhat: shellcheck fixes (Prarit Bhargava) +- redhat/configs: enable tegra114 SPI (Mark Salter) +- all: properly cleanup firewire once and for all (Peter Robinson) +- Fix up filters for Fedora (Justin M. Forbes) +- New configs in arch/x86 (Fedora Kernel Team) +- Add an armv7 directory back for the Fedora configs (Justin M. Forbes) +- Fedora 6.5 config updates (Justin M. Forbes) +- Turn off DMABUF_SYSFS_STATS (Justin M. Forbes) +- CI: rawhide_release: switch to using script to push (Don Zickus) +- redhat/self-test: Update self-test data (Prarit Bhargava) +- redhat/scripts/cross-compile: Update download_cross.sh (Prarit Bhargava) +- redhat/Makefile.cross: Remove ARCH selection code (Prarit Bhargava) +- redhat/Makefile.cross: Update script (Prarit Bhargava) +- Fix interruptible non MR jobs (Michael Hofmann) +- all: run evaluate_configs to de-dupe merged aarch64 (Peter Robinson) +- all: arm: merge the arm and arm/aarch64 (Peter Robinson) +- fedora: remove ARMv7 AKA armhfp configurations (Peter Robinson) +- fedora: remove ARMv7 AKA armhfp support (Peter Robinson) +- redhat/configs: enable CONFIG_VIRTIO_MEM on aarch64 (David Hildenbrand) [2044155] +- redhat/configs: enable CONFIG_MEMORY_HOTREMOVE aarch64 (David Hildenbrand) [2062054] +- redhat: Add arm64-16k kernel flavor scaffold for 16K page-size'd AArch64 (Neal Gompa) +- fedora: enable i3c on aarch64 (Peter Robinson) +- redhat/configs: Remove `CONFIG_HZ_1000 is not set` for aarch64 (Enric Balletbo i Serra) +- redhat/configs: turn on the framework for SPI NOR for ARM (Steve Best) +- configs: add new ChromeOS UART driver (Mark Langsdorf) +- configs: add new ChromeOS Human Presence Sensor (Mark Langsdorf) +- redhat/configs: Enable CONFIG_NVIDIA_WMI_EC_BACKLIGHT for both Fedora and RHEL (Kate Hsuan) +- redhat/configs: Texas Instruments INA3221 driver (Steve Best) +- arm: i.MX: Some minor NXP i.MX cleanups (Peter Robinson) +- Description: Set config for Tegra234 pinctrl driver (Joel Slebodnick) +- Update RPM Scriptlet for kernel-install Changes (Jonathan Steffan) +- [CI] add exit 0 to the end of CI scripts (Don Zickus) +- redhat: configs: Disable CONFIG_CRYPTO_STATS since performance issue for storage (Kate Hsuan) [2227793] +- Remove obsolete variable from gitlab-ci.yml (Ondrej Kinst) +- redhat/configs: Move GVT-g to Fedora only (Alex Williamson) +- [CI] Make sure we are on correct branch before running script (Don Zickus) +- CI: ark-update-configs: sync push command and output (Don Zickus) +- CI: ark-update-configs: misc changes (Don Zickus) +- CI: sync ark-create-release push commands with output (Don Zickus) +- CI: ark-create-release: Add a robust check if nothing changed (Don Zickus) +- CI: Remove legacy tag check cruft (Don Zickus) +- CI: Introduce simple environment script (Don Zickus) +- redhat/configs: Disable FIREWIRE for RHEL (Prarit Bhargava) +- redhat/scripts/rh-dist-git.sh: print list of uploaded files (Denys Vlasenko) +- redhat/scripts/expand_srpm.sh: add missing function, robustify (Denys Vlasenko) +- redhat: Enable HSR and PRP (Felix Maurer) +- redhat/scripts/rh-dist-git.sh: fix outdated message and comment (Denys Vlasenko) +- redhat/configs: Disable CONFIG_I8K (Prarit Bhargava) +- Make sure posttrans script doesn't fail if restorecon is not installed (Daan De Meyer) +- Update filters for new config items (Justin M. Forbes) +- More Fedora 6.5 configs (Justin M. Forbes) +- redhat/configs: disable pre-UVC cameras for RHEL on aarch64 (Dean Nelson) +- redhat/configs: enable CONFIG_MEDIA_SUPPORT for RHEL on aarch64 (Dean Nelson) +- move ownership of /lib/modules// to kernel-core (Thorsten Leemhuis) +- Let kernel-modules-core own the files depmod generates. (Thorsten Leemhuis) +- redhat: configs: Enable CONFIG_TYPEC_STUSB160X for rhel on aarch64 (Desnes Nunes) +- Add filters for ptp_dfl_tod on Fedora (Justin M. Forbes) +- Fedora 6.5 configs part 1 (Justin M. Forbes) +- fedora: enable CONFIG_ZYNQMP_IPI_MBOX as a builtin in pending-fedora (Patrick Talbert) +- fedora: arm: some minor updates (Peter Robinson) +- fedora: bluetooth: enable AOSP extensions (Peter Robinson) +- fedora: wifi: tweak ZYDAS WiFI config options (Peter Robinson) +- scsi: sd: Add "probe_type" module parameter to allow synchronous probing (Ewan D. Milne) [2140017] +- redhat/configs: allow IMA to use MOK keys (Coiby Xu) +- Simplify documentation jobs (Michael Hofmann) +- Auto-cancel pipelines only on MRs (Michael Hofmann) +- CI: Call script directly (Don Zickus) +- CI: Remove stale TAG and Makefile cruft (Don Zickus) +- CI: Move os-build tracking to common area (Don Zickus) +- redhat: use the eln builder for daily jobs (Patrick Talbert) +- redhat: set CONFIG_XILINX_WINDOW_WATCHDOG as disabled in pending (Patrick Talbert) +- Add baseline ARK/ELN pipelines (Michael Hofmann) +- Simplify job rules (Michael Hofmann) +- Build ELN srpm for bot changes (Michael Hofmann) +- Run RH selftests for ELN (Michael Hofmann) +- Simplify job templates (Michael Hofmann) +- Extract rules to allow orthogonal configuration (Michael Hofmann) +- Require ELN pipelines if started automatically (Michael Hofmann) +- Add ARK debug pipeline (Michael Hofmann) +- Extract common parts of child pipeline job (Michael Hofmann) +- Move ARK pipeline variables into job template (Michael Hofmann) +- Simplify ARK pipeline rules (Michael Hofmann) +- Change pathfix.py to %%py3_shebang_fix (Justin M. Forbes) +- Turn on NET_VENDOR_QUALCOMM for Fedora to enable rmnet (Justin M. Forbes) +- redhat: add intel-m10-bmc-hwmon to filter-modules singlemods list (Patrick Talbert) +- fedira: enable pending-fedora CONFIG_CPUFREQ_DT_PLATDEV as a module (Patrick Talbert) +- redhat: fix the 'eln BUILD_TARGET' self-test (Patrick Talbert) +- redhat: update the self-test-data (Patrick Talbert) +- redhat: remove trailing space in dist-dump-variables output (Patrick Talbert) +- Allow ELN pipelines failures (Michael Hofmann) +- Enable cs-like CI (Michael Hofmann) +- Allow to auto-cancel redundant pipelines (Michael Hofmann) +- Remove obsolete unused trigger variable (Michael Hofmann) +- Fix linter warnings in .gitlab-ci.yml (Michael Hofmann) +- config: wifi: debug options for ath11k, brcm80211 and iwlwifi (Íñigo Huguet) +- redhat: allow dbgonly cross builds (Jan Stancek) +- redhat/configs: Clean up x86-64 call depth tracking configs (Waiman Long) +- redhat: move SND configs from pending-rhel to rhel (Patrick Talbert) +- Fix up armv7 configs for Fedora (Justin M. Forbes) +- redhat: Set pending-rhel x86 values for various SND configs (Patrick Talbert) +- redhat: update self-test data (Patrick Talbert) +- redhat: ignore SPECBPFTOOLVERSION/bpftoolversion in self-test create-data.sh (Patrick Talbert) +- fedora/rhel: Move I2C_DESIGNWARE_PLATFORM, I2C_SLAVE, & GPIOLIB from pending (Patrick Talbert) +- redhat/filter-modules.sh.rhel: add needed deps for intel_rapl_tpmi (Jan Stancek) +- fedora: Enable CONFIG_SPI_SLAVE (Patrick Talbert) +- fedora/rhel: enable I2C_DESIGNWARE_PLATFORM, I2C_SLAVE, and GPIOLIB (Patrick Talbert) +- fedora: Enable CONFIG_SPI_SLAVE in fedora-pending (Patrick Talbert) +- redhat: remove extra + (plus) from meta package Requires definitions (Patrick Talbert) +- Add intel-m10-bmc-hwmon to singlemods (Thorsten Leemhuis) +- Add hid-uclogic-test to mod-internal.list (Thorsten Leemhuis) +- Add checksum_kunit.ko to mod-internal.list (Thorsten Leemhuis) +- Add strcat_kunit to mod-internal.list (Thorsten Leemhuis) +- Add input_test to mod-intenal.list (Thorsten Leemhuis) +- Revert "Remove EXPERT from ARCH_FORCE_MAX_ORDER for aarch64" (Justin M. Forbes) +- Reset the release number and dedup the changelog after rebase (Justin M. Forbes) +- Fix up rebase issue with CONFIG_ARCH_FORCE_MAX_ORDER (Justin M. Forbes) +- redhat/kernel.spec.template: Disable 'extracting debug info' messages (Prarit Bhargava) +- kernel/rh_messages.c: Another gcc12 warning on redundant NULL test (Florian Weimer) [2216678] +- redhat: fix signing for realtime and arm64_64k non-debug variants (Jan Stancek) +- redhat: treat with_up consistently (Jan Stancek) +- redhat: make with_realtime opt-in (Jan Stancek) +- redhat/configs: Disable qcom armv7 drippings in the aarch64 tree (Jeremy Linton) +- kernel.spec: drop obsolete ldconfig (Jan Stancek) +- Consolidate config items to common for 6.4 cycle (Justin M. Forbes) +- Turn on CO?NFIg_RMNET for Fedora (Justin M. Forbes) +- redhat/configs: enable CONFIG_MANA_INFINIBAND=m for ARK (Vitaly Kuznetsov) +- redhat/config: common: Enable CONFIG_GPIO_SIM for software development (Kate Hsuan) +- redhat: fix problem with RT kvm modules listed twice in rpm generation (Clark Williams) +- redhat: turn off 64k kernel builds with rtonly (Clark Williams) +- redhat: turn off zfcpdump for rtonly (Clark Williams) +- redhat: don't allow with_rtonly to turn on unsupported arches (Clark Williams) +- redhat: update self-test data for addition of RT and 64k-page variants (Clark Williams) +- redhat: fix realtime and efiuki build conflict (Jan Stancek) +- arm64-64k: Add new kernel variant to RHEL9/CS9 for 64K page-size'd ARM64 (Donald Dutile) [2153073] +- redhat: TEMPORARY set configs to deal with PREEMPT_RT not available (Clark Williams) +- redhat: TEMPORARY default realtime to off (Clark Williams) +- redhat: moved ARM errata configs to arm dir (Clark Williams) +- redhat: RT packaging changes (Clark Williams) +- redhat: miscellaneous commits needed due to CONFIG_EXPERT (Clark Williams) +- redhat: realtime config entries (Clark Williams) +- common: remove deleted USB PCCARD drivers (Peter Robinson) +- fedora: further cleanup of pccard/cardbus subsystem (Peter Robinson) +- common: properly disable PCCARD subsystem (Peter Robinson) +- redhat/configs: arm: enable SERIAL_TEGRA UART for RHEL (Mark Salter) +- redhat/configs: enable CONFIG_X86_AMD_PSTATE_UT (David Arcari) +- redhat/configs: Enable CONFIG_TCG_VTPM_PROXY for RHEL (Štěpán Horáček) +- redhat: do not package *.mod.c generated files (Denys Vlasenko) +- ALSA configuration changes for ARK/RHEL 9.3 (Jaroslav Kysela) +- spec: remove resolve_btfids from kernel-devel (Viktor Malik) +- Fix typo in filter-modules (Justin M. Forbes) +- redhat/configs: Enable CONFIG_INIT_STACK_ALL_ZERO for RHEL (Josh Poimboeuf) +- Remove CONFIG_ARCH_FORCE_MAX_ORDER for aarch64 (Justin M. Forbes) +- Fix up config and filter for PTP_DFL_TOD (Justin M. Forbes) +- redhat/configs: IMX8ULP pinctrl driver (Steve Best) +- redhat/configs: increase CONFIG_FRAME_WARN for Fedora on aarch64 (Brian Masney) +- redhat/configs: add two missing Kconfig options for the Thinkpad x13s (Brian Masney) +- Fedora configs for 6.4 (Justin M. Forbes) +- Change aarch64 CONFIG_ARCH_FORCE_MAX_ORDER to 10 for 4K pages (Justin M. Forbes) +- kernel.spec: remove "RPM_VMLINUX_H=$DevelDir/vmlinux.h" code chunk in %%install (Denys Vlasenko) +- redhat/configs: aarch64: Turn on Display for OnePlus 6 (Eric Curtin) +- redhat/configs: NXP i.MX93 pinctrl, clk, analog to digital converters (Steve Best) +- redhat/configs: Enable CONFIG_SC_GPUCC_8280XP for fedora (Andrew Halaney) +- redhat/configs: Enable CONFIG_QCOM_IPCC for fedora (Andrew Halaney) +- Add rv subpackage for kernel-tools (John Kacur) [2188441] +- redhat/configs: NXP i.MX9 family (Steve Best) +- redhat/genlog.py: add support to list/process zstream Jira tickets (Herton R. Krzesinski) +- redhat: fix duplicate jira issues in the resolves line (Herton R. Krzesinski) +- redhat: add support for Jira issues in changelog (Herton R. Krzesinski) +- redhat/configs: turn on IMX8ULP CCM Clock Driver (Steve Best) +- redhat: update filter-modules fsdrvs list to reference smb instead of cifs (Patrick Talbert) +- Turn off some debug options found to impact performance (Justin M. Forbes) +- wifi: rtw89: enable RTL8852BE card in RHEL (Íñigo Huguet) +- redhat/configs: enable TEGRA186_GPC_DMA for RHEL (Mark Salter) +- Move imx8m configs from fedora to common (Mark Salter) +- redhat/configs: turn on lpuart serial port support Driver (Steve Best) [2208834] +- Turn off DEBUG_VM for non debug Fedora kernels (Justin M. Forbes) +- Enable CONFIG_BT on aarch64 (Charles Mirabile) +- redhat/configs: turn on CONFIG_MARVELL_CN10K_TAD_PMU (Michal Schmidt) [2042240] +- redhat/configs: Fix enabling MANA Infiniband (Kamal Heib) +- Fix file listing for symvers in uki (Justin M. Forbes) +- Fix up some Fedora config items (Justin M. Forbes) +- enable efifb for Nvidia (Justin M. Forbes) +- kernel.spec: package unstripped test_progs-no_alu32 (Felix Maurer) +- Turn on NFT_CONNLIMIT for Fedora (Justin M. Forbes) +- Include the information about builtin symbols into kernel-uki-virt package too (Vitaly Kuznetsov) +- redhat/configs: Fix incorrect configs location and content (Vladis Dronov) +- redhat/configs: turn on CONFIG_MARVELL_CN10K_DDR_PMU (Michal Schmidt) [2042241] +- redhat: configs: generic: x86: Disable CONFIG_VIDEO_OV2740 for x86 platform (Kate Hsuan) +- Enable IO_URING for RHEL (Justin M. Forbes) +- Turn on IO_URING for RHEL in pending (Justin M. Forbes) +- redhat: Remove editconfig (Prarit Bhargava) +- redhat: configs: fix CONFIG_WERROR replace in build_configs (Jan Stancek) +- redhat/configs: enable Maxim MAX77620 PMIC for RHEL (Mark Salter) +- kernel.spec: skip kernel meta package when building without up (Jan Stancek) +- redhat/configs: enable RDMA_RXE for RHEL (Kamal Heib) [2022578] +- redhat/configs: update RPCSEC_GSS_KRB5 configs (Scott Mayhew) +- redhat/Makefile: Support building linux-next (Thorsten Leemhuis) +- redhat/Makefile: support building stable-rc versions (Thorsten Leemhuis) +- redhat/Makefile: Add target to print DISTRELEASETAG (Thorsten Leemhuis) +- Remove EXPERT from ARCH_FORCE_MAX_ORDER for aarch64 (Justin M. Forbes) +- Revert "Merge branch 'unstripped-no_alu32' into 'os-build'" (Patrick Talbert) +- configs: Enable CONFIG_PAGE_POOL_STATS for common/generic (Patrick Talbert) +- redhat/configs: enable CONFIG_DELL_WMI_PRIVACY for both RHEL and Fedora (David Arcari) +- kernel.spec: package unstripped test_progs-no_alu32 (Felix Maurer) +- bpf/selftests: fix bpf selftests install (Jerome Marchand) +- kernel.spec: add bonding selftest (Hangbin Liu) +- Change FORCE_MAX_ORDER for ppc64 to be 8 (Justin M. Forbes) +- kernel.spec.template: Add global compression variables (Prarit Bhargava) +- kernel.spec.template: Use xz for KABI (Prarit Bhargava) +- kernel.spec.template: Remove gzip related aarch64 code (Prarit Bhargava) +- Add apple_bl to filter-modules (Justin M. Forbes) +- Add handshake-test to mod-intenal.list (Justin M. Forbes) +- Add regmap-kunit to mod-internal.list (Justin M. Forbes) +- configs: set CONFIG_PAGE_POOL_STATS (Patrick Talbert) +- Add apple_bl to fedora module_filter (Justin M. Forbes) +- Fix up some config mismatches in new Fedora config items (Justin M. Forbes) +- redhat/configs: disable CONFIG_USB_NET_SR9700 for aarch64 (Jose Ignacio Tornos Martinez) +- Reset changelog for 6.4 series (Justin M. Forbes) +- Reset RHEL_RELEASE for the 6.4 cycle (Justin M. Forbes) +- Fix up the RHEL configs for xtables and ipset (Justin M. Forbes) +- ark: enable wifi on aarch64 (Íñigo Huguet) +- fedora: wifi: hermes: disable 802.11b driver (Peter Robinson) +- fedora: wifi: libertas: use the LIBERTAS_THINFIRM driver (Peter Robinson) +- fedora: wifi: disable Zydas vendor (Peter Robinson) +- redhat: fix python ValueError in error path of merge.py (Clark Williams) +- fedora: arm: minor updates (Peter Robinson) +- kernel.spec: Fix UKI naming to comply with BLS (Philipp Rudo) +- redhat/kernel.spec.template: Suppress 'extracting debug info' noise in build log (Prarit Bhargava) +- Fedora 6.3 configs part 2 (Justin M. Forbes) +- redhat/configs: Enable CONFIG_X86_KERNEL_IBT for Fedora and ARK (Josh Poimboeuf) +- kernel.spec: gcov: make gcov subpackages per variant (Jan Stancek) +- kernel.spec: Gemini: add Epoch to perf and rtla subpackages (Jan Stancek) +- kernel.spec: Gemini: fix header provides for upgrade path (Jan Stancek) +- redhat: introduce Gemini versioning (Jan Stancek) +- redhat: separate RPM version from uname version (Jan Stancek) +- redhat: introduce GEMINI and RHEL_REBASE_NUM variable (Jan Stancek) +- ipmi: ssif_bmc: Add SSIF BMC driver (Tony Camuso) +- common: minor de-dupe of parallel port configs (Peter Robinson) +- Fedora 6.3 configs part 1 (Justin M. Forbes) +- redhat: configs: Enable CONFIG_MEMTEST to enable memory test (Kate Hsuan) +- Update Fedora arm filters after config updates (Nicolas Chauvet) +- redhat/kernel.spec.template: Fix kernel-tools-libs-devel dependency (Prarit Bhargava) +- redhat: fix the check for the n option (Patrick Talbert) +- common: de-dupe some options that are the same (Peter Robinson) +- generic: remove deleted options (Peter Robinson) +- redhat/configs: enable CONFIG_INTEL_TCC_COOLING for RHEL (David Arcari) +- Update Fedora ppc filters after config updates (Justin M. Forbes) +- Update Fedora aarch64 filters after config updates (Justin M. Forbes) +- fedora: arm: Updates for 6.3 (Peter Robinson) +- redhat: kunit: cleanup NITRO config and enable rescale test (Nico Pache) +- kernel.spec: use %%{package_name} to fix kernel-devel-matched Requires (Jan Stancek) +- kernel.spec: use %%{package_name} also for abi-stablelist subpackages (Jan Stancek) +- kernel.spec: use %%{package_name} also for tools subpackages (Jan Stancek) +- generic: common: Parport and paride/ata cleanups (Peter Robinson) +- CONFIG_SND_SOC_CS42L83 is no longer common (Justin M. Forbes) +- configs: arm: bring some configs in line with rhel configs in c9s (Mark Salter) +- arm64/configs: Put some arm64 configs in the right place (Mark Salter) +- cleanup removed R8188EU config (Peter Robinson) +- Make RHJOBS container friendly (Don Zickus) +- Remove scmversion from kernel.spec.template (Don Zickus) +- redhat/configs: Enable CONFIG_SND_SOC_CS42L83 (Neal Gompa) +- Use RHJOBS for create-tarball (Don Zickus) +- Enable CONFIG_NET_SCH_FQ_PIE for Fedora (Justin M. Forbes) +- Make Fedora debug configs more useful for debug (Justin M. Forbes) +- redhat/configs: enable Octeon TX2 network drivers for RHEL (Michal Schmidt) [2040643] +- redhat/kernel.spec.template: fix installonlypkg for meta package (Jan Stancek) +- redhat: version two of Makefile.rhelver tweaks (Clark Williams) +- redhat/configs: Disable CONFIG_GCC_PLUGINS (Prarit Bhargava) +- redhat/kernel.spec.template: Fix typo for process_configs.sh call (Neal Gompa) +- redhat/configs: CONFIG_CRYPTO_SM3_AVX_X86_64 is x86 only (Vladis Dronov) +- redhat/configs: Enable CONFIG_PINCTRL_METEORLAKE in RHEL (Prarit Bhargava) +- fedora: enable new image sensors (Peter Robinson) +- redhat/self-test: Update self-test data (Prarit Bhargava) +- redhat/kernel.spec.template: Fix hardcoded "kernel" (Prarit Bhargava) +- redhat/configs/generate_all_configs.sh: Fix config naming (Prarit Bhargava) +- redhat/kernel.spec.template: Pass SPECPACKAGE_NAME to generate_all_configs.sh (Prarit Bhargava) +- kernel.spec.template: Use SPECPACKAGE_NAME (Prarit Bhargava) +- redhat/Makefile: Copy spec file (Prarit Bhargava) +- redhat: Change PACKAGE_NAME to SPECPACKAGE_NAME (Prarit Bhargava) +- redhat/configs: Support the virtio_mmio.device parameter in Fedora (David Michael) +- Revert "Merge branch 'systemd-boot-unsigned' into 'os-build'" (Patrick Talbert) +- redhat/Makefile: fix default values for dist-brew's DISTRO and DIST (Íñigo Huguet) +- Remove cc lines from automatic configs (Don Zickus) +- Add rtla-hwnoise files (Justin M. Forbes) +- redhat/kernel.spec.template: Mark it as a non-executable file (Neal Gompa) +- fedora: arm: Enable DRM_PANEL_HIMAX_HX8394 (Javier Martinez Canillas) +- redhat/configs: CONFIG_HP_ILO location fix (Vladis Dronov) +- redhat: Fix build for kselftests mm (Nico Pache) +- fix tools build after vm to mm rename (Justin M. Forbes) +- redhat/spec: Update bpftool versioning scheme (Viktor Malik) +- redhat/configs: CONFIG_CRYPTO_SM4_AESNI_AVX*_X86_64 is x86 only (Prarit Bhargava) +- redhat: adapt to upstream Makefile change (Clark Williams) +- redhat: modify efiuki specfile changes to use variants convention (Clark Williams) +- Turn off DEBUG_INFO_COMPRESSED_ZLIB for Fedora (Justin M. Forbes) +- redhat/kernel.spec.template: Fix RHEL systemd-boot-unsigned dependency (Prarit Bhargava) +- Add hashtable_test to mod-internal.list (Justin M. Forbes) +- Add more kunit tests to mod-internal.list for 6.3 (Justin M. Forbes) +- Flip CONFIG_I2C_ALGOBIT to m (Justin M. Forbes) +- Flip I2C_ALGOBIT to m to avoid mismatch (Justin M. Forbes) +- kernel.spec: move modules.builtin to kernel-core (Jan Stancek) +- Turn on IDLE_INJECT for x86 (Justin M. Forbes) +- Flip CONFIG_IDLE_INJECT in pending (Justin M. Forbes) +- Trim Changelog for 6.3 series (Justin M. Forbes) +- Reset RHEL_RELEASE to 0 for the 6.3 cycle (Justin M. Forbes) +- redhat/configs: Enable CONFIG_V4L_TEST_DRIVERS related drivers (Enric Balletbo i Serra) +- redhat/configs: Enable UCSI_CCG support (David Marlin) +- Fix underline mark-up after text change (Justin M. Forbes) +- Turn on CONFIG_XFS_RT for Fedora (Justin M. Forbes) +- Consolidate common configs for 6.2 (Justin M. Forbes) +- aarch64: enable zboot (Gerd Hoffmann) +- redhat: remove duplicate pending-rhel config items (Patrick Talbert) +- Disable frame pointers (Justin M. Forbes) +- redhat/configs: update scripts and docs for ark -> rhel rename (Clark Williams) +- redhat/configs: rename ark configs dir to rhel (Clark Williams) +- Turn off CONFIG_DEBUG_INFO_COMPRESSED_ZLIB for ppc64le (Justin M. Forbes) +- kernel.spec: package unstripped kselftests/bpf/test_progs (Jan Stancek) +- kernel.spec: allow to package some binaries as unstripped (Jan Stancek) +- redhat/configs: Make merge.py portable for older python (Desnes Nunes) +- Fedora configs for 6.2 (Justin M. Forbes) +- redhat: Repair ELN build broken by the recent UKI changes (Vitaly Kuznetsov) +- redhat/configs: enable CONFIG_INET_DIAG_DESTROY (Andrea Claudi) +- Enable TDX Guest driver (Vitaly Kuznetsov) +- redhat/configs: Enable CONFIG_PCIE_PTM generically (Corinna Vinschen) +- redhat: Add sub-RPM with a EFI unified kernel image for virtual machines (Vitaly Kuznetsov) +- redhat/Makefile: Remove GIT deprecated message (Prarit Bhargava) +- Revert "redhat: configs: Disable xtables and ipset" (Phil Sutter) +- redhat/configs: Enable CONFIG_SENSORS_LM90 for RHEL (Mark Salter) +- Fix up SQUASHFS decompression configs (Justin M. Forbes) +- redhat/configs: enable CONFIG_OCTEON_EP as a module in ARK (Michal Schmidt) [2041990] +- redhat: ignore rpminspect runpath report on urandom_read selftest binaries (Herton R. Krzesinski) +- kernel.spec: add llvm-devel build requirement (Scott Weaver) +- Update self-test data to not expect debugbuildsenabled 0 (Justin M. Forbes) +- Turn off forced debug builds (Justin M. Forbes) +- Turn on debug builds for aarch64 Fedora (Justin M. Forbes) +- redhat/configs: modify merge.py to match old overrides input (Clark Williams) +- redhat: fixup pylint complaints (Clark Williams) +- redhat: remove merge.pl and references to it (Clark Williams) +- redhat: update merge.py to handle merge.pl corner cases (Clark Williams) +- Revert "redhat: fix elf got hardening for vm tools" (Don Zickus) +- Update rebase notes for Fedora (Justin M. Forbes) +- Update CONFIG_LOCKDEP_CHAINS_BITS to 19 (cmurf) +- redhat/configs: Turn on CONFIG_SPI_TEGRA210_QUAD for RHEL (Mark Salter) +- ark: aarch64: drop CONFIG_SMC911X (Peter Robinson) +- all: cleanup and de-dupe CDROM_PKTCDVD options. (Peter Robinson) +- all: remove CRYPTO_GF128MUL (Peter Robinson) +- all: cleanup UEFI options (Peter Robinson) +- common: arm64: Enable Ampere Altra SMpro Hardware Monitoring (Peter Robinson) +- fedora: enable STACKPROTECTOR_STRONG (Peter Robinson) +- fedora: enable STACKPROTECTOR on arm platforms (Peter Robinson) +- redhat/self-test: Update data with ENABLE_WERROR (Prarit Bhargava) +- redhat/Makefile.variables: Add ENABLE_WERROR (Prarit Bhargava) +- makefile: Add -Werror support for RHEL (Prarit Bhargava) +- redhat/Makefile.variables: Remove mention of Makefile.rhpkg (Prarit Bhargava) +- redhat/Makefile.variables: Alphabetize variables (Prarit Bhargava) +- gitlab-ci: use CI templates from production branch (Michael Hofmann) +- redhat/kernel.spec.template: Fix internal "File listed twice" errors (Prarit Bhargava) +- redhat: Remove stale .tmp_versions code and comments (Prarit Bhargava) +- redhat/kernel.spec.template: Fix vmlinux_decompressor on !s390x (Prarit Bhargava) +- redhat/kernel.spec.template: Remove unnecessary output from pathfix.py (Prarit Bhargava) +- Modularize CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU (Mark Salter) +- redhat/kernel.spec.template: Parallelize compression (Prarit Bhargava) +- config: Enable Security Path (Ricardo Robaina) +- redhat/self-test/data: Regenerate self-test data for make change (Prarit Bhargava) +- Update module filters for nvmem_u-boot-env (Justin M. Forbes) +- fedora: Updates for 6.2 merge (Peter Robinson) +- fedora: Updates for 6.1 merge (Peter Robinson) +- modules-core: use %%posttrans (Gerd Hoffmann) +- split sub-rpm kernel-modules-core from kernel-core (Gerd Hoffmann) +- Turn off CONFIG_MTK_T7XX for S390x (Justin M. Forbes) +- CI: add variable for variant handling (Veronika Kabatova) +- Fix up configs with SND_SOC_NAU8315 mismatch (Justin M. Forbes) +- CI: Do a full build for non-bot runs (Veronika Kabatova) +- Fix up configs with SND_SOC_NAU8315 mismatch (Justin M. Forbes) +- kernel/rh_messages.c: gcc12 warning on redundant NULL test (Eric Chanudet) [2142658] +- redhat/configs: Enable CRYPTO_CURVE25519 in ark (Prarit Bhargava) +- general: arm: cleanup ASPEED options (Peter Robinson) +- redhat/configs: ALSA - cleanups for the AMD Pink Sardine DMIC driver (Jaroslav Kysela) +- redhat/docs: Add FAQ entry for booting between Fedora & ELN/RHEL kernels (Prarit Bhargava) +- spec: add missing BuildRequires: python3-docutils for tools (Ondrej Mosnacek) +- config: enable RCU_TRACE for debug kernels (Wander Lairson Costa) +- Add siphash_kunit and strscpy_kunit to mod-internal.list (Justin M. Forbes) +- Add drm_kunit_helpers to mod-internal.list (Justin M. Forbes) +- Fix up configs for Fedora so we don't have a mismatch (Justin M. Forbes) +- Turn on CONFIG_SQUASHFS_DECOMP_SINGLE in pending (Justin M. Forbes) +- Trim changelog for 6.2 cycle (Justin M. Forbes) +- Reset RHEL_RELEASE for the 6.2 window. (Justin M. Forbes) +- redhat/kernel.spec.template: Fix cpupower file error (Prarit Bhargava) +- redhat/configs: aarhc64: clean up some erratum configs (Mark Salter) +- More Fedora configs for 6.1 as deps were switched on (Justin M. Forbes) +- redhat/configs: make SOC_TEGRA_CBB a module (Mark Salter) +- redhat/configs: aarch64: reorganize tegra configs to common dir (Mark Salter) +- Enforces buildroot if cross_arm (Nicolas Chauvet) +- Handle automated case when config generation works correctly (Don Zickus) +- Turn off CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64 (Justin M. Forbes) +- Turn off CONFIG_EFI_ZBOOT as it makes CKI choke (Justin M. Forbes) +- Fedora config updates for 6.1 (Justin M. Forbes) +- redhat: Remove cpupower files (Prarit Bhargava) +- redhat/configs: update CXL-related options to match what RHEL will use (John W. Linville) +- Clean up the config for the Tegra186 timer (Al Stone) +- redhat/configs: move CONFIG_TEGRA186_GPC_DMA config (Mark Salter) +- Check for kernel config git-push failures (Don Zickus) +- redhat: genlog.sh failures should interrupt the recipe (Patrick Talbert) +- Turn CONFIG_GNSS back on for Fedora (Justin M. Forbes) +- redhat/configs: enable CONFIG_GNSS for RHEL (Michal Schmidt) +- Turn off NVMEM_U_BOOT_ENV for fedora (Justin M. Forbes) +- Consolidate matching fedora and ark entries to common (Justin M. Forbes) +- Empty out redhat/configs/common (Justin M. Forbes) +- Adjust path to compressed vmlinux kernel image for s390x (Justin M. Forbes) [2149273] +- Fedora config updates for 6.1 (Justin M. Forbes) +- redhat: genlog.sh should expect genlog.py in the current directory (Patrick Talbert) +- redhat/configs: consolidate CONFIG_TEST_LIVEPATCH=m (Joe Lawrence) +- redhat/configs: enable CONFIG_TEST_LIVEPATCH=m for s390x (Julia Denham) +- Revert "Merge branch 'ark-make-help' into 'os-build'" (Scott Weaver) +- Remove recommendation to use 'common' for config changes. (Don Zickus) +- Update config to add i3c support for AArch64 (Mark Charlebois) +- redhat: Move cross-compile scripts into their own directory (Prarit Bhargava) +- redhat: Move yaml files into their own directory (Prarit Bhargava) +- redhat: Move update_scripts.sh into redhat/scripts (Prarit Bhargava) +- redhat: Move kernel-tools scripts into their own directory (Prarit Bhargava) +- redhat: Move gen-* scripts into their own directory (Prarit Bhargava) +- redhat: Move mod-* scripts into their own directory (Prarit Bhargava) +- redhat/Makefile: Fix RHJOBS grep warning (Prarit Bhargava) +- redhat: Force remove tmp file (Prarit Bhargava) +- redhat/configs: ALSA - cleanups for the CentOS 9.2 update (Jaroslav Kysela) +- CI: Use CKI container images from quay.io (Veronika Kabatova) +- redhat: clean up the partial-kgcov-snip.config file (Patrick Talbert) +- redhat: avoid picking up stray editor backups when processing configs (Clark Williams) +- CI: Remove old configs (Veronika Kabatova) +- redhat: override `make help` to include dist-help (Jonathan Toppins) +- redhat: make RHTEST stricter (Jonathan Toppins) +- redhat: Enable support for SN2201 system (Ivan Vecera) +- redhat/docs/index.rst: Add FLAVOR information to generate configs for local builds (Enric Balletbo i Serra) +- redhat: fix selftest git command so it picks the right commit (Patrick Talbert) +- redhat/configs: enable HP_WATCHDOG for aarch64 (Mark Salter) +- redhat: disable Kfence Kunit Test (Nico Pache) +- configs: enable CONFIG_LRU_GEN_ENABLED everywhere (Patrick Talbert) +- redhat: Enable WWAN feature and support for Intel, Qualcomm and Mediatek devices (Jose Ignacio Tornos Martinez) +- Turn on dln2 support (RHBZ 2110372) (Justin M. Forbes) +- Enable configs for imx8m PHYs (Al Stone) +- configs/fedora: Build some SC7180 clock controllers as modules (Javier Martinez Canillas) +- redhat/configs: Disable fbdev drivers and use simpledrm everywhere (Javier Martinez Canillas) [1986223] +- redhat: fix the branch we pull from the documentation tree (Herton R. Krzesinski) +- redhat/configs: change so watchdog is module versus builtin (Steve Best) +- redhat/configs: move CONFIG_ACPI_VIDEO to common/generic (Mark Langsdorf) +- enable imx8xm I2C configs properly (Al Stone) +- configs/fedora: Enable a few more drivers needed by the HP X2 Chromebook (Javier Martinez Canillas) +- enable the rtc-rv8803 driver on RHEL and Fedora (David Arcari) +- redhat/Makefile: Remove BUILD_SCRATCH_TARGET (Prarit Bhargava) +- configs: move CONFIG_INTEL_TDX_GUEST to common directory (Wander Lairson Costa) +- redhat/Makefile: Use new BUILD_TARGET for RHEL dist[g]-brew target (Prarit Bhargava) +- redhat: method.py: change the output loop to use 'values' method (Patrick Talbert) +- redhat: use 'update' method in merge.py (Patrick Talbert) +- redhat: Use a context manager in merge.py for opening the config file for reading (Patrick Talbert) +- redhat: automatically strip newlines in merge.py (Clark Williams) +- redhat: python replacement for merge.pl (Clark Williams) +- redhat/docs: Update with DISTLOCALVERSION (Prarit Bhargava) +- redhat/Makefile: Rename LOCALVERSION to DISTLOCALVERSION (Akihiko Odaki) +- Adjust FIPS module name in RHEL (Vladis Dronov) +- spec: prevent git apply from searching for the .git directory (Ondrej Mosnacek) +- redhat: Remove parallel_xz.sh (Prarit Bhargava) +- Turn on Multi-Gen LRU for Fedora (Justin M. Forbes) +- Add kasan_test to mod-internal.list (Justin M. Forbes) +- redhat/Makefile.variables: Fix typo with RHDISTGIT_TMP (Prarit Bhargava) +- spec: fix path to `installing_core` stamp file for subpackages (Jonathan Lebon) +- Remove unused ci scripts (Don Zickus) +- Rename rename FORCE_MAX_ZONEORDER to ARCH_FORCE_MAX_ORDER in configs (Justin M. Forbes) +- redhat: Add new fortify_kunit & is_signed_type_kunit to mod-internal.list (Patrick Talbert) +- Rename rename FORCE_MAX_ZONEORDER to ARCH_FORCE_MAX_ORDER in pending (Justin M. Forbes) +- Add acpi video to the filter_modules.sh for rhel (Justin M. Forbes) +- Change acpi_bus_get_acpi_device to acpi_get_acpi_dev (Justin M. Forbes) +- Turn on ACPI_VIDEO for arm (Justin M. Forbes) +- Turn on CONFIG_PRIME_NUMBERS as a module (Justin M. Forbes) +- Add new drm kunit tests to mod-internal.list (Justin M. Forbes) +- redhat: fix elf got hardening for vm tools (Frantisek Hrbata) +- kernel.spec.template: remove some temporary files early (Ondrej Mosnacek) +- kernel.spec.template: avoid keeping two copies of vmlinux (Ondrej Mosnacek) +- Add fortify_kunit to mod-internal.list (Justin M. Forbes) +- Add module filters for Fedora as acpi video has new deps (Justin M. Forbes) +- One more mismatch (Justin M. Forbes) +- Fix up pending for mismatches (Justin M. Forbes) +- Trim changelog with the reset (Justin M. Forbes) +- Reset the RHEL_RELEASE in Makefile.rhelver (Justin M. Forbes) +- Forgot too remove this from pending, it is set properly in ark (Justin M. Forbes) +- redhat/Makefile: Add DIST to git tags for RHEL (Prarit Bhargava) +- redhat/configs: Move CONFIG_ARM_SMMU_QCOM_DEBUG to common (Jerry Snitselaar) +- Common config cleanup for 6.0 (Justin M. Forbes) +- Allow selftests to fail without killing the build (Justin M. Forbes) +- redhat: Remove redhat/Makefile.rhpkg (Prarit Bhargava) +- redhat/Makefile: Move RHDISTGIT_CACHE and RHDISTGIT_TMP (Prarit Bhargava) +- redhat/Makefile.rhpkg: Remove RHDISTGIT_USER (Prarit Bhargava) +- redhat/Makefile: Move RHPKG_BIN to redhat/Makefile (Prarit Bhargava) +- common: clean up Android option with removal of CONFIG_ANDROID (Peter Robinson) +- redhat/configs: Remove x86_64 from priority files (Prarit Bhargava) +- redhat/configs/pending-ark: Remove x86_64 directory (Prarit Bhargava) +- redhat/configs/pending-fedora: Remove x86_64 directory (Prarit Bhargava) +- redhat/configs/fedora: Remove x86_64 directory (Prarit Bhargava) +- redhat/configs/common: Remove x86_64 directory (Prarit Bhargava) +- redhat/configs/ark: Remove x86_64 directory (Prarit Bhargava) +- redhat/configs/custom-overrides: Remove x86_64 directory (Prarit Bhargava) +- configs: use common CONFIG_ARM64_SME for ark and fedora (Mark Salter) +- redhat/configs: Add a warning message to priority.common (Prarit Bhargava) +- redhat/configs: Enable INIT_STACK_ALL_ZERO for Fedora (Miko Larsson) +- redhat: Set CONFIG_MAXLINEAR_GPHY to =m (Petr Oros) +- redhat/configs enable CONFIG_INTEL_IFS (David Arcari) +- redhat: Remove filter-i686.sh.rhel (Prarit Bhargava) +- redhat/Makefile: Set PATCHLIST_URL to none for RHEL/cs9 (Prarit Bhargava) +- redhat: remove GL_DISTGIT_USER, RHDISTGIT and unify dist-git cloning (Prarit Bhargava) +- redhat/Makefile.variables: Add ADD_COMMITID_TO_VERSION (Prarit Bhargava) +- kernel.spec: disable vmlinux.h generation for s390 zfcpdump config (Prarit Bhargava) +- perf: Require libbpf 0.6.0 or newer (Prarit Bhargava) +- kabi: add stablelist helpers (Prarit Bhargava) +- Makefile: add kabi targets (Prarit Bhargava) +- kabi: add support for symbol namespaces into check-kabi (Prarit Bhargava) +- kabi: ignore new stablelist metadata in show-kabi (Prarit Bhargava) +- redhat/Makefile: add dist-assert-tree-clean target (Prarit Bhargava) +- redhat/kernel.spec.template: Specify vmlinux.h path when building samples/bpf (Prarit Bhargava) [2041365] +- spec: Fix separate tools build (Prarit Bhargava) [2054579] +- redhat/scripts: Update merge-subtrees.sh with new subtree location (Prarit Bhargava) +- redhat/kernel.spec.template: enable dependencies generation (Prarit Bhargava) +- redhat: build and include memfd to kernel-selftests-internal (Prarit Bhargava) [2027506] +- redhat/kernel.spec.template: Link perf with --export-dynamic (Prarit Bhargava) +- redhat: kernel.spec: selftests: abort on build failure (Prarit Bhargava) +- redhat: configs: move CONFIG_SERIAL_MULTI_INSTANTIATE=m settings to common/x86 (Jaroslav Kysela) +- configs: enable CONFIG_HP_ILO for aarch64 (Mark Salter) +- all: cleanup dell config options (Peter Robinson) +- redhat: Include more kunit tests (Nico Pache) +- common: some minor cleanups/de-dupe (Peter Robinson) +- common: enable INTEGRITY_MACHINE_KEYRING on all configuraitons (Peter Robinson) +- Fedora 6.0 configs update (Justin M. Forbes) +- redhat/self-test: Ignore .rhpkg.mk files (Prarit Bhargava) +- redhat/configs: Enable CONFIG_PRINTK_INDEX on Fedora (Prarit Bhargava) +- redhat/configs: Cleanup CONFIG_X86_KERNEL_IBT (Prarit Bhargava) +- Fix up SND_CTL debug options (Justin M. Forbes) +- redhat: create /boot symvers link if it doesn't exist (Jan Stancek) +- redhat: remove duplicate kunit tests in mod-internal.list (Nico Pache) +- configs/fedora: Make Fedora work with HNS3 network adapter (Zamir SUN) +- redhat/configs/fedora/generic: Enable CONFIG_BLK_DEV_UBLK on Fedora (Richard W.M. Jones) [2122595] +- fedora: disable IWLMEI (Peter Robinson) +- redhat/configs: enable UINPUT on aarch64 (Benjamin Tissoires) +- Fedora 6.0 configs part 1 (Justin M. Forbes) +- redhat/Makefile: Always set UPSTREAM (Prarit Bhargava) +- redhat/configs: aarch64: Turn on Apple Silicon configs for Fedora (Eric Curtin) +- Add cpumask_kunit to mod-internal.list (Justin M. Forbes) +- config - consolidate disabled MARCH options on s390x (Dan Horák) +- move the baseline arch to z13 for s390x in F-37+ (Dan Horák) +- redhat/scripts/rh-dist-git.sh: Fix outdated cvs reference (Prarit Bhargava) +- redhat/scripts/expand_srpm.sh: Use Makefile variables (Prarit Bhargava) +- redhat/scripts/clone_tree.sh: Use Makefile variables (Prarit Bhargava) +- Fedora: arm changes for 6.0, part 1, with some ACPI (Peter Robinson) +- redhat/self-test: Fix shellcheck errors (Prarit Bhargava) +- redhat/docs: Add dist-brew BUILD_FLAGS information (Prarit Bhargava) +- redhat: change the changelog item for upstream merges (Herton R. Krzesinski) +- redhat: fix dist-release build number test (Herton R. Krzesinski) +- redhat: fix release number bump when dist-release-changed runs (Herton R. Krzesinski) +- redhat: use new genlog.sh script to detect changes for dist-release (Herton R. Krzesinski) +- redhat: move changelog addition to the spec file back into genspec.sh (Herton R. Krzesinski) +- redhat: always add a rebase entry when ark merges from upstream (Herton R. Krzesinski) +- redhat: drop merge ark patches hack (Herton R. Krzesinski) +- redhat: don't hardcode temporary changelog file (Herton R. Krzesinski) +- redhat: split changelog generation from genspec.sh (Herton R. Krzesinski) +- redhat: configs: Disable FIE on arm (Jeremy Linton) [2012226] +- redhat/Makefile: Clean linux tarballs (Prarit Bhargava) +- redhat/configs: Cleanup CONFIG_ACPI_AGDI (Prarit Bhargava) +- spec: add cpupower daemon reload on install/upgrade (Jarod Wilson) +- redhat: properly handle binary files in patches (Ondrej Mosnacek) +- Add python3-setuptools buildreq for perf (Justin M. Forbes) +- Add cros_kunit to mod-internal.list (Justin M. Forbes) +- Add new tests to mod-internal.list (Justin M. Forbes) +- Turn off some Kunit tests in pending (Justin M. Forbes) +- Clean up a mismatch in Fedora configs (Justin M. Forbes) +- redhat/configs: Sync up Retbleed configs with centos-stream (Waiman Long) +- Change CRYPTO_BLAKE2S_X86 from m to y (Justin M. Forbes) +- Leave CONFIG_ACPI_VIDEO on for x86 only (Justin M. Forbes) +- Fix BLAKE2S_ARM and BLAKE2S_X86 configs in pending (Justin M. Forbes) +- Fix pending for ACPI_VIDEO (Justin M. Forbes) +- Reset release (Justin M. Forbes) +- redhat/configs: Fix rm warning on config warnings (Eric Chanudet) +- redhat/Makefile: Deprecate PREBUILD_GIT_ONLY variable (Prarit Bhargava) +- redhat/Makefile: Deprecate SINGLE_TARBALL variable (Prarit Bhargava) +- redhat/Makefile: Deprecate GIT variable (Prarit Bhargava) +- Update CONFIG_LOCKDEP_CHAINS_BITS to 18 (cmurf) +- Add new FIPS module name and version configs (Vladis Dronov) +- redhat/configs/fedora: Make PowerPC's nx-gzip buildin (Jakub Čajka) +- omit unused Provides (Dan Horák) +- self-test: Add test for DIST=".eln" (Prarit Bhargava) +- redhat: Enable CONFIG_LZ4_COMPRESS on Fedora (Prarit Bhargava) +- fedora: armv7: enable MMC_STM32_SDMMC (Peter Robinson) +- .gitlab-ci.yaml: Add test for dist-get-buildreqs target (Prarit Bhargava) +- redhat/docs: Add information on build dependencies (Prarit Bhargava) +- redhat/Makefile: Add better pass message for dist-get-buildreqs (Prarit Bhargava) +- redhat/Makefile: Provide a better message for system-sb-certs (Prarit Bhargava) +- redhat/Makefile: Change dist-buildreq-check to a non-blocking target (Prarit Bhargava) +- create-data: Parallelize spec file data (Prarit Bhargava) +- create-data.sh: Store SOURCES Makefile variable (Prarit Bhargava) +- redhat/Makefile: Split up setup-source target (Prarit Bhargava) +- create-data.sh: Redefine varfilename (Prarit Bhargava) +- create-data.sh: Parallelize variable file creation (Prarit Bhargava) +- redhat/configs: Enable CONFIG_LZ4_COMPRESS (Prarit Bhargava) +- redhat/docs: Update brew information (Prarit Bhargava) +- redhat/Makefile: Fix eln BUILD_TARGET (Prarit Bhargava) +- redhat/Makefile: Set BUILD_TARGET for dist-brew (Prarit Bhargava) +- kernel.spec.template: update (s390x) expoline.o path (Joe Lawrence) +- fedora: enable BCM_NET_PHYPTP (Peter Robinson) +- Fedora 5.19 configs update part 2 (Justin M. Forbes) +- redhat/Makefile: Change fedora BUILD_TARGET (Prarit Bhargava) +- New configs in security/keys (Fedora Kernel Team) +- Fedora: arm: enable a pair of drivers (Peter Robinson) +- redhat: make kernel-zfcpdump-core to not provide kernel-core/kernel (Herton R. Krzesinski) +- redhat/configs: Enable QAT devices for arches other than x86 (Vladis Dronov) +- Fedora 5.19 configs pt 1 (Justin M. Forbes) +- redhat: Exclude cpufreq.h from kernel-headers (Patrick Talbert) +- Add rtla subpackage for kernel-tools (Justin M. Forbes) +- fedora: arm: enable a couple of QCom drivers (Peter Robinson) +- redhat/Makefile: Deprecate BUILD_SCRATCH_TARGET (Prarit Bhargava) +- redhat: enable CONFIG_DEVTMPFS_SAFE (Mark Langsdorf) +- redhat/Makefile: Remove deprecated variables and targets (Prarit Bhargava) +- Split partner modules into a sub-package (Alice Mitchell) +- Enable kAFS and it's dependancies in RHEL (Alice Mitchell) +- Enable Marvell OcteonTX2 crypto device in ARK (Vladis Dronov) +- redhat/Makefile: Remove --scratch from BUILD_TARGET (Prarit Bhargava) +- redhat/Makefile: Fix dist-brew and distg-brew targets (Prarit Bhargava) +- fedora: arm64: Initial support for TI Keystone 3 (ARCH_K3) (Peter Robinson) +- fedora: arm: enable Hardware Timestamping Engine support (Peter Robinson) +- fedora: wireless: disable SiLabs and PureLiFi (Peter Robinson) +- fedora: updates for 5.19 (Peter Robinson) +- fedora: minor updates for Fedora configs (Peter Robinson) +- configs/fedora: Enable the pinctrl SC7180 driver built-in (Enric Balletbo i Serra) +- redhat/configs: enable CONFIG_DEBUG_NET for debug kernel (Hangbin Liu) +- redhat/Makefile: Add SPECKABIVERSION variable (Prarit Bhargava) +- redhat/self-test: Provide better failure output (Prarit Bhargava) +- redhat/self-test: Reformat tests to kernel standard (Prarit Bhargava) +- redhat/self-test: Add purpose and header to each test (Prarit Bhargava) +- Drop outdated CRYPTO_ECDH configs (Vladis Dronov) +- Brush up crypto SHA512 and USER configs (Vladis Dronov) +- Brush up crypto ECDH and ECDSA configs (Vladis Dronov) +- redhat/self-test: Update data set (Prarit Bhargava) +- create-data.sh: Reduce specfile data output (Prarit Bhargava) +- redhat/configs: restore/fix core INTEL_LPSS configs to be builtin again (Hans de Goede) +- Enable CKI on os-build MRs only (Don Zickus) +- self-test: Fixup Makefile contents test (Prarit Bhargava) +- redhat/self-test: self-test data update (Prarit Bhargava) +- redhat/self-test: Fix up create-data.sh to not report local variables (Prarit Bhargava) +- redhat/configs/fedora: Enable a set of modules used on some x86 tablets (Hans de Goede) +- redhat/configs: Make INTEL_SOC_PMIC_CHTDC_TI builtin (Hans de Goede) +- redhat/configs/fedora: enable missing modules modules for Intel IPU3 camera support (Hans de Goede) +- Common: minor cleanups (Peter Robinson) +- fedora: some minor Fedora cleanups (Peter Robinson) +- fedora: drop X86_PLATFORM_DRIVERS_DELL dupe (Peter Robinson) +- redhat: change tools_make macro to avoid full override of variables in Makefile (Herton R. Krzesinski) +- Fix typo in Makefile for Fedora Stable Versioning (Justin M. Forbes) +- Remove duplicates from ark/generic/s390x/zfcpdump/ (Vladis Dronov) +- Move common/debug/s390x/zfcpdump/ configs to ark/debug/s390x/zfcpdump/ (Vladis Dronov) +- Move common/generic/s390x/zfcpdump/ configs to ark/generic/s390x/zfcpdump/ (Vladis Dronov) +- Drop RCU_EXP_CPU_STALL_TIMEOUT to 0, we are not really android (Justin M. Forbes) +- redhat/configs/README: Update the README (Prarit Bhargava) +- redhat/docs: fix hyperlink typo (Patrick Talbert) +- all: net: remove old NIC/ATM drivers that use virt_to_bus() (Peter Robinson) +- Explicitly turn off CONFIG_KASAN_INLINE for ppc (Justin M. Forbes) +- redhat/docs: Add a description of kernel naming (Prarit Bhargava) +- Change CRYPTO_CHACHA_S390 from m to y (Justin M. Forbes) +- enable CONFIG_NET_ACT_CTINFO in ark (Davide Caratti) +- redhat/configs: enable CONFIG_SP5100_TCO (David Arcari) +- redhat/configs: Set CONFIG_VIRTIO_IOMMU on x86_64 (Eric Auger) [2089765] +- Turn off KASAN_INLINE for RHEL ppc in pending (Justin M. Forbes) +- redhat/kernel.spec.template: update selftest data via "make dist-self-test-data" (Denys Vlasenko) +- redhat/kernel.spec.template: remove stray *.hardlink-temporary files, if any (Denys Vlasenko) +- Fix up ZSMALLOC config for s390 (Justin M. Forbes) +- Turn on KASAN_OUTLINE for ppc debug (Justin M. Forbes) +- Turn on KASAN_OUTLINE for PPC debug to avoid mismatch (Justin M. Forbes) +- Fix up crypto config mistmatches (Justin M. Forbes) +- Fix up config mismatches (Justin M. Forbes) +- generic/fedora: cleanup and disable Lightning Moutain SoC (Peter Robinson) +- redhat: Set SND_SOC_SOF_HDA_PROBES to =m (Patrick Talbert) +- Fix versioning on stable Fedora (Justin M. Forbes) +- Enable PAGE_POOL_STATS for arm only (Justin M. Forbes) +- Revert "Merge branch 'fix-ci-20220523' into 'os-build'" (Patrick Talbert) +- Fix changelog one more time post rebase (Justin M. Forbes) +- Flip CONFIG_RADIO_ADAPTERS to module for Fedora (Justin M. Forbes) +- Reset Release for 5.19 (Justin M. Forbes) +- redhat/Makefile: Drop quotation marks around string definitions (Prarit Bhargava) +- Fedora: arm: Updates for QCom devices (Peter Robinson) +- Fedora arm and generic updates for 5.17 (Peter Robinson) +- enable COMMON_CLK_SI5341 for Xilinx ZYNQ-MP (Peter Robinson) +- Turn on CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING for Fedora (Justin M. Forbes) +- redhat/self-test/data: Update data set (Prarit Bhargava) +- Revert variable switch for lasttag (Justin M. Forbes) +- redhat: Add self-tests to .gitlab-ci.yml (Prarit Bhargava) +- redhat/self-test: Update data (Prarit Bhargava) +- redhat/self-test: Unset Makefile variables (Prarit Bhargava) +- redhat/self-test: Omit SHELL variable from test data (Prarit Bhargava) +- Add CONFIG_EFI_DXE_MEM_ATTRIBUTES (Justin M. Forbes) +- Update filter-modules for mlx5-vfio-pci (Justin M. Forbes) +- Fedora configs for 5.18 (Justin M. Forbes) +- self-test/data/create-data.sh: Avoid SINGLE_TARBALL warning (Prarit Bhargava) +- redhat/Makefile: Rename PREBUILD to UPSTREAMBUILD (Prarit Bhargava) +- redhat/Makefile: Rename BUILDID to LOCALVERSION (Prarit Bhargava) +- redhat/Makefile: Fix dist-brew & distg-brew targets (Prarit Bhargava) +- redhat/Makefile: Reorganize MARKER code (Prarit Bhargava) +- redhat/scripts/new_release.sh: Use Makefile variables (Prarit Bhargava) +- redhat/Makefile: Rename __YSTREAM and __ZSTREAM (Prarit Bhargava) +- redhat/genspec.sh: Add comment about SPECBUILDID variable (Prarit Bhargava) +- redhat/kernel.spec.template: Move genspec variables into one section (Prarit Bhargava) +- redhat/kernel.spec.template: Remove kversion (Prarit Bhargava) +- redhat/Makefile: Add SPECTARFILE_RELEASE comment (Prarit Bhargava) +- redhat/Makefile: Rename RPMVERSION to BASEVERSION (Prarit Bhargava) +- redhat/Makefile: Target whitespace cleanup (Prarit Bhargava) +- redhat/Makefile: Move SPECRELEASE to genspec.sh (Prarit Bhargava) +- redhat/Makefile: Add kernel-NVR comment (Prarit Bhargava) +- redhat/Makefile: Use SPECFILE variable (Prarit Bhargava) +- redhat/Makefile: Remove KEXTRAVERSION (Prarit Bhargava) +- redhat: Enable VM kselftests (Nico Pache) [1978539] +- redhat: enable CONFIG_TEST_VMALLOC for vm selftests (Nico Pache) +- redhat: Enable HMM test to be used by the kselftest test suite (Nico Pache) +- redhat/Makefile.variables: Change git hash length to default (Prarit Bhargava) +- redhat/Makefile: Drop quotation marks around string definitions (Prarit Bhargava) +- Turn on INTEGRITY_MACHINE_KEYRING for Fedora (Justin M. Forbes) +- redhat/configs: fix CONFIG_INTEL_ISHTP_ECLITE (David Arcari) +- redhat/configs: Fix rm warning on error (Prarit Bhargava) +- Fix nightly merge CI (Don Zickus) +- redhat/kernel.spec.template: fix standalone tools build (Jan Stancek) +- Add system-sb-certs for RHEL-9 (Don Zickus) +- Fix dist-buildcheck-reqs (Don Zickus) +- move DAMON configs to correct directory (Chris von Recklinghausen) +- redhat: indicate HEAD state in tarball/rpm name (Jarod Wilson) +- Fedora 5.18 config set part 1 (Justin M. Forbes) +- fedora: arm: Enable new Rockchip 356x series drivers (Peter Robinson) +- fedora: arm: enable DRM_I2C_NXP_TDA998X on aarch64 (Peter Robinson) +- redhat/self-test: Add test to verify Makefile declarations. (Prarit Bhargava) +- redhat/Makefile: Add RHTEST (Prarit Bhargava) +- redhat: shellcheck cleanup (Prarit Bhargava) +- redhat/self-test/data: Cleanup data (Prarit Bhargava) +- redhat/self-test: Add test to verify SPEC variables (Prarit Bhargava) +- redhat/Makefile: Add 'duplicate' SPEC entries for user set variables (Prarit Bhargava) +- redhat/Makefile: Rename TARFILE_RELEASE to SPECTARFILE_RELEASE (Prarit Bhargava) +- redhat/genspec: Rename PATCHLIST_CHANGELOG to SPECPATCHLIST_CHANGELOG (Prarit Bhargava) +- redhat/genspec: Rename DEBUG_BUILDS_ENABLED to SPECDEBUG_BUILDS_ENABLED (Prarit Bhargava) +- redhat/Makefile: Rename PKGRELEASE to SPECBUILD (Prarit Bhargava) +- redhat/genspec: Rename BUILDID_DEFINE to SPECBUILDID (Prarit Bhargava) +- redhat/Makefile: Rename CHANGELOG to SPECCHANGELOG (Prarit Bhargava) +- redhat/Makefile: Rename RPMKEXTRAVERSION to SPECKEXTRAVERSION (Prarit Bhargava) +- redhat/Makefile: Rename RPMKSUBLEVEL to SPECKSUBLEVEL (Prarit Bhargava) +- redhat/Makefile: Rename RPMKPATCHLEVEL to SPECKPATCHLEVEL (Prarit Bhargava) +- redhat/Makefile: Rename RPMKVERSION to SPECKVERSION (Prarit Bhargava) +- redhat/Makefile: Rename KVERSION to SPECVERSION (Prarit Bhargava) +- redhat/Makefile: Deprecate some simple targets (Prarit Bhargava) +- redhat/Makefile: Use KVERSION (Prarit Bhargava) +- redhat/configs: Set GUP_TEST in debug kernel (Joel Savitz) +- enable DAMON configs (Chris von Recklinghausen) [2004233] +- redhat: add zstream switch for zstream release numbering (Herton R. Krzesinski) +- redhat: change kabi tarballs to use the package release (Herton R. Krzesinski) +- redhat: generate distgit changelog in genspec.sh as well (Herton R. Krzesinski) +- redhat: make genspec prefer metadata from git notes (Herton R. Krzesinski) +- redhat: use tags from git notes for zstream to generate changelog (Herton R. Krzesinski) +- ARK: Remove code marking devices unmaintained (Peter Georg) +- rh_message: Fix function name (Peter Georg) [2019377] +- Turn on CONFIG_RANDOM_TRUST_BOOTLOADER (Justin M. Forbes) +- redhat/configs: aarch64: enable CPU_FREQ_GOV_SCHEDUTIL (Mark Salter) +- Move CONFIG_HW_RANDOM_CN10K to a proper place (Vladis Dronov) +- redhat/self-test: Clean up data set (Prarit Bhargava) +- redhat/Makefile.rhpkg: Remove quotes for RHDISTGIT (Prarit Bhargava) +- redhat/scripts/create-tarball.sh: Use Makefile variables (Prarit Bhargava) +- redhat/Makefile: Deprecate SINGLE_TARBALL (Prarit Bhargava) +- redhat/Makefile: Move SINGLE_TARBALL to Makefile.variables (Prarit Bhargava) +- redhat/Makefile: Use RPMVERSION (Prarit Bhargava) +- redhat/scripts/rh-dist-git.sh: Use Makefile variables (Prarit Bhargava) +- redhat/configs/build_configs.sh: Use Makefile variables (Prarit Bhargava) +- redhat/configs/process_configs.sh: Use Makefile variables (Prarit Bhargava) +- redhat/kernel.spec.template: Use RPM_BUILD_NCPUS (Prarit Bhargava) +- redhat/configs/generate_all_configs.sh: Use Makefile variables (Prarit Bhargava) +- redhat/configs: enable nf_tables SYNPROXY extension on ark (Davide Caratti) +- fedora: Disable fbdev drivers missed before (Javier Martinez Canillas) +- Redhat: enable Kfence on production servers (Nico Pache) +- redhat: ignore known empty patches on the patches rpminspect test (Herton R. Krzesinski) +- kernel-ark: arch_hw Update CONFIG_MOUSE_VSXXXAA=m (Tony Camuso) [2062909] +- spec: keep .BTF section in modules for s390 (Yauheni Kaliuta) [2071969] +- kernel.spec.template: Ship arch/s390/lib/expoline.o in kernel-devel (Ondrej Mosnacek) +- redhat: disable tv/radio media device infrastructure (Jarod Wilson) +- redhat/configs: clean up INTEL_LPSS configuration (David Arcari) +- Have to rename the actual contents too (Justin M. Forbes) +- The CONFIG_SATA_MOBILE_LPM_POLICY rebane was reverted (Justin M. Forbes) +- redhat: Enable KASAN on all ELN debug kernels (Nico Pache) +- redhat: configs: Enable INTEL_IOMMU_DEBUGFS for debug builds (Jerry Snitselaar) +- generic: can: disable CAN_SOFTING everywhere (Peter Robinson) +- redhat/configs: Enable CONFIG_DM_ERA=m for all (Yanko Kaneti) +- redhat/configs: enable CONFIG_SAMPLE_VFIO_MDEV_MTTY (Patrick Talbert) +- Build intel_sdsi with %%{tools_make} (Justin M. Forbes) +- configs: remove redundant Fedora config for INTEL_IDXD_COMPAT (Jerry Snitselaar) +- redhat/configs: enable CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT (Joel Savitz) [2026319] +- configs: enable CONFIG_RMI4_F3A (Benjamin Tissoires) +- redhat: configs: Disable TPM 1.2 specific drivers (Jerry Snitselaar) +- redhat/configs: Enable cr50 I2C TPM interface (Akihiko Odaki) +- spec: make HMAC file encode relative path (Jonathan Lebon) +- redhat/kernel.spec.template: Add intel_sdsi utility (Prarit Bhargava) +- Spec fixes for intel-speed-select (Justin M. Forbes) +- Add Partner Supported taint flag to kAFS (Alice Mitchell) [2038999] +- Add Partner Supported taint flag (Alice Mitchell) [2038999] +- Enabled INTEGRITY_MACHINE_KEYRING for all configs. (Peter Robinson) +- redhat/configs: Enable CONFIG_RCU_SCALE_TEST & CONFIG_RCU_REF_SCALE_TEST (Waiman Long) +- Add clk_test and clk-gate_test to mod-internal.list (Justin M. Forbes) +- redhat/self-tests: Ignore UPSTREAM (Prarit Bhargava) +- redhat/self-tests: Ignore RHGITURL (Prarit Bhargava) +- redhat/Makefile.variables: Extend git hash length to 15 (Prarit Bhargava) +- redhat/self-test: Remove changelog from spec files (Prarit Bhargava) +- redhat/genspec.sh: Rearrange genspec.sh (Prarit Bhargava) +- redhat/self-test: Add spec file data (Prarit Bhargava) +- redhat/self-test: Add better dist-dump-variables test (Prarit Bhargava) +- redhat/self-test: Add variable test data (Prarit Bhargava) +- redhat/config: Remove obsolete CONFIG_MFD_INTEL_PMT (David Arcari) +- redhat/configs: enable CONFIG_INTEL_ISHTP_ECLITE (David Arcari) +- Avoid creating files in $RPM_SOURCE_DIR (Nicolas Chauvet) +- Flip CRC64 from off to y (Justin M. Forbes) +- New configs in lib/Kconfig (Fedora Kernel Team) +- disable redundant assignment of CONFIG_BQL on ARK (Davide Caratti) +- redhat/configs: remove unnecessary GPIO options for aarch64 (Brian Masney) +- redhat/configs: remove viperboard related Kconfig options (Brian Masney) +- redhat/configs/process_configs.sh: Avoid race with find (Prarit Bhargava) +- redhat/configs/process_configs.sh: Remove CONTINUEONERROR (Prarit Bhargava) +- Remove i686 configs and filters (Justin M. Forbes) +- redhat/configs: Set CONFIG_X86_AMD_PSTATE built-in on Fedora (Prarit Bhargava) +- Fix up mismatch with CRC64 (Justin M. Forbes) +- Fedora config updates to fix process_configs (Justin M. Forbes) +- redhat: Fix release tagging (Prarit Bhargava) +- redhat/self-test: Fix version tag test (Prarit Bhargava) +- redhat/self-test: Fix BUILD verification test (Prarit Bhargava) +- redhat/self-test: Cleanup SRPM related self-tests (Prarit Bhargava) +- redhat/self-test: Fix shellcheck test (Prarit Bhargava) +- redhat/configs: Disable watchdog components (Prarit Bhargava) +- redhat/README.Makefile: Add a Makefile README file (Prarit Bhargava) +- redhat/Makefile: Remove duplicated code (Prarit Bhargava) +- Add BuildRequires libnl3-devel for intel-speed-select (Justin M. Forbes) +- Add new kunit tests for 5.18 to mod-internal.list (Justin M. Forbes) +- Fix RHDISTGIT for Fedora (Justin M. Forbes) +- redhat/configs/process_configs.sh: Fix race with tools generation (Prarit Bhargava) +- New configs in drivers/dax (Fedora Kernel Team) +- Fix up CONFIG_SND_AMD_ACP_CONFIG files (Patrick Talbert) +- Remove CONFIG_SND_SOC_SOF_DEBUG_PROBES files (Patrick Talbert) +- SATA_MOBILE_LPM_POLICY is now SATA_LPM_POLICY (Justin M. Forbes) +- Define SNAPSHOT correctly when VERSION_ON_UPSTREAM is 0 (Justin M. Forbes) +- redhat/Makefile: Fix dist-git (Prarit Bhargava) +- Clean up the changelog (Justin M. Forbes) +- Change the pending-ark CONFIG_DAX to y due to mismatch (Justin M. Forbes) +- Reset Makefile.rhelver for the 5.18 cycle (Justin M. Forbes) +- Enable net reference count trackers in all debug kernels (Jiri Benc) +- redhat/Makefile: Reorganize variables (Prarit Bhargava) +- redhat/Makefile: Add some descriptions (Prarit Bhargava) +- redhat/Makefile: Move SNAPSHOT check (Prarit Bhargava) +- redhat/Makefile: Deprecate BREW_FLAGS, KOJI_FLAGS, and TEST_FLAGS (Prarit Bhargava) +- redhat/genspec.sh: Rework RPMVERSION variable (Prarit Bhargava) +- redhat/Makefile: Remove dead comment (Prarit Bhargava) +- redhat/Makefile: Cleanup KABI* variables. (Prarit Bhargava) +- redhat/Makefile.variables: Default RHGITCOMMIT to HEAD (Prarit Bhargava) +- redhat/scripts/create-tarball.sh: Use Makefile TARBALL variable (Prarit Bhargava) +- redhat/Makefile: Remove extra DIST_BRANCH (Prarit Bhargava) +- redhat/Makefile: Remove STAMP_VERSION (Prarit Bhargava) +- redhat/Makefile: Move NO_CONFIGCHECKS to Makefile.variables (Prarit Bhargava) +- redhat/Makefile: Move RHJOBS to Makefile.variables (Prarit Bhargava) +- redhat/Makefile: Move RHGIT* variables to Makefile.variables (Prarit Bhargava) +- redhat/Makefile: Move PREBUILD_GIT_ONLY to Makefile.variables (Prarit Bhargava) +- redhat/Makefile: Move BUILD to Makefile.variables (Prarit Bhargava) +- redhat/Makefile: Move BUILD_FLAGS to Makefile.variables. (Prarit Bhargava) +- redhat/Makefile: Move BUILD_PROFILE to Makefile.variables (Prarit Bhargava) +- redhat/Makefile: Move BUILD_TARGET and BUILD_SCRATCH_TARGET to Makefile.variables (Prarit Bhargava) +- redhat/Makefile: Remove RHPRODUCT variable (Prarit Bhargava) +- redhat/Makefile: Cleanup DISTRO variable (Prarit Bhargava) +- redhat/Makefile: Move HEAD to Makefile.variables. (Prarit Bhargava) +- redhat: Combine Makefile and Makefile.common (Prarit Bhargava) +- redhat/koji/Makefile: Decouple koji Makefile from Makefile.common (Prarit Bhargava) +- Set CONFIG_SND_SOC_SOF_MT8195 for Fedora and turn on VDPA_SIM_BLOCK (Justin M. Forbes) +- Add asus_wmi_sensors modules to filters for Fedora (Justin M. Forbes) +- redhat: spec: trigger dracut when modules are installed separately (Jan Stancek) +- Last of the Fedora 5.17 configs initial pass (Justin M. Forbes) +- redhat/Makefile: Silence dist-clean-configs output (Prarit Bhargava) +- Fedora 5.17 config updates (Justin M. Forbes) +- Setting CONFIG_I2C_SMBUS to "m" for ark (Gopal Tiwari) +- Print arch with process_configs errors (Justin M. Forbes) +- Pass RHJOBS to process_configs for dist-configs-check as well (Justin M. Forbes) +- redhat/configs/process_configs.sh: Fix issue with old error files (Prarit Bhargava) +- redhat/configs/build_configs.sh: Parallelize execution (Prarit Bhargava) +- redhat/configs/build_configs.sh: Provide better messages (Prarit Bhargava) +- redhat/configs/build_configs.sh: Create unique output files (Prarit Bhargava) +- redhat/configs/build_configs.sh: Add local variables (Prarit Bhargava) +- redhat/configs/process_configs.sh: Parallelize execution (Prarit Bhargava) +- redhat/configs/process_configs.sh: Provide better messages (Prarit Bhargava) +- redhat/configs/process_configs.sh: Create unique output files (Prarit Bhargava) +- redhat/configs/process_configs.sh: Add processing config function (Prarit Bhargava) +- redhat: Unify genspec.sh and kernel.spec variable names (Prarit Bhargava) +- redhat/genspec.sh: Remove options and use Makefile variables (Prarit Bhargava) +- Add rebase note for 5.17 on Fedora stable (Justin M. Forbes) +- More Fedora config updates for 5.17 (Justin M. Forbes) +- redhat/configs: Disable CONFIG_MACINTOSH_DRIVERS in RHEL. (Prarit Bhargava) +- redhat: Fix "make dist-release-finish" to use the correct NVR variables (Neal Gompa) [2053836] +- Build CROS_EC Modules (Jason Montleon) +- redhat: configs: change aarch64 default dma domain to lazy (Jerry Snitselaar) +- redhat: configs: disable ATM protocols (Davide Caratti) +- configs/fedora: Enable the interconnect SC7180 driver built-in (Enric Balletbo i Serra) +- configs: clean up CONFIG_PAGE_TABLE_ISOLATION files (Ondrej Mosnacek) +- redhat: configs: enable CONFIG_INTEL_PCH_THERMAL for RHEL x86 (David Arcari) +- redhat/Makefile: Fix dist-dump-variables target (Prarit Bhargava) +- redhat/configs: Enable DEV_DAX and DEV_DAX_PMEM modules on aarch64 for fedora (D Scott Phillips) +- redhat/configs: Enable CONFIG_TRANSPARENT_HUGEPAGE on aarch64 for fedora (D Scott Phillips) +- configs/process_configs.sh: Remove orig files (Prarit Bhargava) +- redhat: configs: Disable CONFIG_MPLS for s390x/zfcpdump (Guillaume Nault) +- Fedora 5.17 configs round 1 (Justin M. Forbes) +- redhat: configs: disable the surface platform (David Arcari) +- redhat: configs: Disable team driver (Hangbin Liu) [1945477] +- configs: enable LOGITECH_FF for RHEL/CentOS too (Benjamin Tissoires) +- redhat/configs: Disable CONFIG_SENSORS_NCT6683 in RHEL for arm/aarch64 (Dean Nelson) [2041186] +- redhat: fix make {distg-brew,distg-koji} (Andrea Claudi) +- [fedora] Turn on CONFIG_VIDEO_OV5693 for sensor support (Dave Olsthoorn) +- Cleanup 'disabled' config options for RHEL (Prarit Bhargava) +- redhat: move CONFIG_ARM64_MTE to aarch64 config directory (Herton R. Krzesinski) +- Change CONFIG_TEST_BPF to a module (Justin M. Forbes) +- Change CONFIG_TEST_BPF to module in pending MR coming for proper review (Justin M. Forbes) +- redhat/configs: Enable CONFIG_TEST_BPF (Viktor Malik) +- Enable KUNIT tests for testing (Nico Pache) +- Makefile: Check PKGRELEASE size on dist-brew targets (Prarit Bhargava) +- kernel.spec: Add glibc-static build requirement (Prarit Bhargava) +- Enable iSER on s390x (Stefan Schulze Frielinghaus) +- redhat/configs: Enable CONFIG_ACER_WIRELESS (Peter Georg) [2025985] +- kabi: Add kABI macros for enum type (Čestmír Kalina) [2024595] +- kabi: expand and clarify documentation of aux structs (Čestmír Kalina) [2024595] +- kabi: introduce RH_KABI_USE_AUX_PTR (Čestmír Kalina) [2024595] +- kabi: rename RH_KABI_SIZE_AND_EXTEND to AUX (Čestmír Kalina) [2024595] +- kabi: more consistent _RH_KABI_SIZE_AND_EXTEND (Čestmír Kalina) [2024595] +- kabi: use fixed field name for extended part (Čestmír Kalina) [2024595] +- kabi: fix dereference in RH_KABI_CHECK_EXT (Čestmír Kalina) [2024595] +- kabi: fix RH_KABI_SET_SIZE macro (Čestmír Kalina) [2024595] +- kabi: expand and clarify documentation (Čestmír Kalina) [2024595] +- kabi: make RH_KABI_USE replace any number of reserved fields (Čestmír Kalina) [2024595] +- kabi: rename RH_KABI_USE2 to RH_KABI_USE_SPLIT (Čestmír Kalina) [2024595] +- kabi: change RH_KABI_REPLACE2 to RH_KABI_REPLACE_SPLIT (Čestmír Kalina) [2024595] +- kabi: change RH_KABI_REPLACE_UNSAFE to RH_KABI_BROKEN_REPLACE (Čestmír Kalina) [2024595] +- kabi: introduce RH_KABI_ADD_MODIFIER (Čestmír Kalina) [2024595] +- kabi: Include kconfig.h (Čestmír Kalina) [2024595] +- kabi: macros for intentional kABI breakage (Čestmír Kalina) [2024595] +- kabi: fix the note about terminating semicolon (Čestmír Kalina) [2024595] +- kabi: introduce RH_KABI_HIDE_INCLUDE and RH_KABI_FAKE_INCLUDE (Čestmír Kalina) [2024595] +- spec: don't overwrite auto.conf with .config (Ondrej Mosnacek) +- New configs in drivers/crypto (Fedora Kernel Team) +- Add test_hash to the mod-internal.list (Justin M. Forbes) +- configs: disable CONFIG_CRAMFS (Abhi Das) [2041184] +- spec: speed up "cp -r" when it overwrites existing files. (Denys Vlasenko) +- redhat: use centos x509.genkey file if building under centos (Herton R. Krzesinski) +- Revert "[redhat] Generate a crashkernel.default for each kernel build" (Coiby Xu) +- spec: make linux-firmware weak(er) dependency (Jan Stancek) +- rtw89: enable new driver rtw89 and device RTK8852AE (Íñigo Huguet) +- Config consolidation into common (Justin M. Forbes) +- Add packaged but empty /lib/modules//systemtap/ (Justin M. Forbes) +- filter-modules.sh.rhel: Add ntc_thermistor to singlemods (Prarit Bhargava) +- Move CONFIG_SND_SOC_TLV320AIC31XX as it is now selected by CONFIG_SND_SOC_FSL_ASOC_CARD (Justin M. Forbes) +- Add dev_addr_lists_test to mod-internal.list (Justin M. Forbes) +- configs/fedora: Enable CONFIG_NFC_PN532_UART for use PN532 NFC module (Ziqian SUN (Zamir)) +- redhat: ignore ksamples and kselftests on the badfuncs rpminspect test (Herton R. Krzesinski) +- redhat: disable upstream check for rpminspect (Herton R. Krzesinski) +- redhat: switch the vsyscall config to CONFIG_LEGACY_VSYSCALL_XONLY=y (Herton R. Krzesinski) [1876977] +- redhat: configs: increase CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE (Rafael Aquini) +- move CONFIG_STRICT_SIGALTSTACK_SIZE to the appropriate directory (David Arcari) +- redhat/configs: Enable CONFIG_DM_MULTIPATH_IOA for fedora (Benjamin Marzinski) +- redhat/configs: Enable CONFIG_DM_MULTIPATH_HST (Benjamin Marzinski) [2000835] +- redhat: Pull in openssl-devel as a build dependency correctly (Neal Gompa) [2034670] +- redhat/configs: Migrate ZRAM_DEF_* configs to common/ (Neal Gompa) +- redhat/configs: Enable CONFIG_CRYPTO_ZSTD (Neal Gompa) [2032758] +- Turn CONFIG_DEVMEM back off for aarch64 (Justin M. Forbes) +- Clean up excess text in Fedora config files (Justin M. Forbes) +- Fedora config updates for 5.16 (Justin M. Forbes) +- redhat/configs: enable CONFIG_INPUT_KEYBOARD for AARCH64 (Vitaly Kuznetsov) +- Fedora configs for 5.16 pt 1 (Justin M. Forbes) +- redhat/configs: NFS: disable UDP, insecure enctypes (Benjamin Coddington) [1952863] +- Update rebase-notes with dracut 5.17 information (Justin M. Forbes) +- redhat/configs: Enable CONFIG_CRYPTO_BLAKE2B (Neal Gompa) [2031547] +- Enable CONFIG_BPF_SYSCALL for zfcpdump (Jiri Olsa) +- Enable CONFIG_CIFS_SMB_DIRECT for ARK (Ronnie Sahlberg) +- mt76: enable new device MT7921E in CentOs/RHEL (Íñigo Huguet) [2004821] +- Disable CONFIG_DEBUG_PREEMPT on normal builds (Phil Auld) +- redhat/configs: Enable CONFIG_PCI_P2PDMA for ark (Myron Stowe) +- pci.h: Fix static include (Prarit Bhargava) +- Enable CONFIG_VFIO_NOIOMMU for Fedora (Justin M. Forbes) +- redhat/configs: enable CONFIG_NTB_NETDEV for ark (John W. Linville) +- drivers/pci/pci-driver.c: Fix if/ifdef typo (Prarit Bhargava) +- common: arm64: ensure all the required arm64 errata are enabled (Peter Robinson) +- kernel/rh_taint.c: Update to new messaging (Prarit Bhargava) [2019377] +- redhat/configs: enable CONFIG_AMD_PTDMA for ark (John W. Linville) +- redhat/configs: enable CONFIG_RD_ZSTD for rhel (Tao Liu) [2020132] +- fedora: build TEE as a module for all arches (Peter Robinson) +- common: build TRUSTED_KEYS in everywhere (Peter Robinson) +- redhat: make Patchlist.changelog generation conditional (Herton R. Krzesinski) +- redhat/configs: Add two new CONFIGs (Prarit Bhargava) +- redhat/configs: Remove dead CONFIG files (Prarit Bhargava) +- redhat/configs/evaluate_configs: Add find dead configs option (Prarit Bhargava) +- Add more rebase notes for Fedora 5.16 (Justin M. Forbes) +- Fedora: Feature: Retire wireless Extensions (Peter Robinson) +- fedora: arm: some SoC enablement pieces (Peter Robinson) +- fedora: arm: enable PCIE_ROCKCHIP_DW for rk35xx series (Peter Robinson) +- fedora: enable RTW89 802.11 WiFi driver (Peter Robinson) +- fedora: arm: Enable DRM_PANEL_EDP (Peter Robinson) +- fedora: sound: enable new sound drivers (Peter Robinson) +- redhat/configs: unset KEXEC_SIG for s390x zfcpdump (Coiby Xu) +- spec: Keep .BTF section in modules (Jiri Olsa) +- Fix up PREEMPT configs (Justin M. Forbes) +- New configs in drivers/media (Fedora Kernel Team) +- New configs in drivers/net/ethernet/litex (Fedora Kernel Team) +- spec: add bpf_testmod.ko to kselftests/bpf (Viktor Malik) +- New configs in drivers/net/wwan (Fedora Kernel Team) +- New configs in drivers/i2c (Fedora Kernel Team) +- redhat/docs/index.rst: Add local build information. (Prarit Bhargava) +- Fix up preempt configs (Justin M. Forbes) +- Turn on CONFIG_HID_NINTENDO for controller support (Dave Olsthoorn) +- Fedora: Enable MediaTek bluetooth pieces (Peter Robinson) +- Add rebase notes to check for PCI patches (Justin M. Forbes) +- redhat: configs: move CONFIG_ACCESSIBILITY from fedora to common (John W. Linville) +- Filter updates for hid-playstation on Fedora (Justin M. Forbes) +- Enable CONFIG_VIRT_DRIVERS for ARK (Vitaly Kuznetsov) +- redhat/configs: Enable Nitro Enclaves on aarch64 (Vitaly Kuznetsov) +- Enable e1000 in rhel9 as unsupported (Ken Cox) [2002344] +- Turn on COMMON_CLK_AXG_AUDIO for Fedora rhbz 2020481 (Justin M. Forbes) +- Fix up fedora config options from mismatch (Justin M. Forbes) +- Add nct6775 to filter-modules.sh.rhel (Justin M. Forbes) +- Enable PREEMPT_DYNAMIC for all but s390x (Justin M. Forbes) +- Add memcpy_kunit to mod-internal.list (Justin M. Forbes) +- New configs in fs/ksmbd (Fedora Kernel Team) +- Add nct6775 to Fedora filter-modules.sh (Justin M. Forbes) +- New configs in fs/ntfs3 (Fedora Kernel Team) +- Make CONFIG_IOMMU_DEFAULT_DMA_STRICT default for all but x86 (Justin M. Forbes) +- redhat/configs: enable KEXEC_IMAGE_VERIFY_SIG for RHEL (Coiby Xu) +- redhat/configs: enable KEXEC_SIG for aarch64 RHEL (Coiby Xu) [1994858] +- Fix up fedora and pending configs for PREEMPT to end mismatch (Justin M. Forbes) +- Enable binder for fedora (Justin M. Forbes) +- Reset RHEL_RELEASE for 5.16 (Justin M. Forbes) +- redhat: configs: Update configs for vmware (Kamal Heib) +- Fedora configs for 5.15 (Justin M. Forbes) +- redhat/kernel.spec.template: don't hardcode gcov arches (Jan Stancek) +- redhat/configs: create a separate config for gcov options (Jan Stancek) +- Update documentation with FAQ and update frequency (Don Zickus) +- Document force pull option for mirroring (Don Zickus) +- Ignore the rhel9 kabi files (Don Zickus) +- Remove legacy elrdy cruft (Don Zickus) +- redhat/configs/evaluate_configs: walk cfgvariants line by line (Jan Stancek) +- redhat/configs/evaluate_configs: insert EMPTY tags at correct place (Jan Stancek) +- redhat: make dist-srpm-gcov add to BUILDOPTS (Jan Stancek) +- Build CONFIG_SPI_PXA2XX as a module on x86 (Justin M. Forbes) +- redhat/configs: enable CONFIG_BCMGENET as module (Joel Savitz) +- Fedora config updates (Justin M. Forbes) +- Enable CONFIG_FAIL_SUNRPC for debug builds (Justin M. Forbes) +- fedora: Disable fbdev drivers and use simpledrm instead (Javier Martinez Canillas) +- spec: Don't fail spec build if ksamples fails (Jiri Olsa) +- Enable CONFIG_QCOM_SCM for arm (Justin M. Forbes) +- redhat: Disable clang's integrated assembler on ppc64le and s390x (Tom Stellard) +- redhat/configs: enable CONFIG_IMA_WRITE_POLICY (Bruno Meneguele) +- Fix dist-srpm-gcov (Don Zickus) +- redhat: configs: add CONFIG_NTB and related items (John W. Linville) +- Add kfence_test to mod-internal.list (Justin M. Forbes) +- Enable KUNIT tests for redhat kernel-modules-internal (Nico Pache) +- redhat: add *-matched meta packages to rpminspect emptyrpm config (Herton R. Krzesinski) +- Use common config for NODES_SHIFT (Mark Salter) +- redhat: fix typo and make the output more silent for dist-git sync (Herton R. Krzesinski) +- Fedora NTFS config updates (Justin M. Forbes) +- Fedora 5.15 configs part 1 (Justin M. Forbes) +- Fix ordering in genspec args (Justin M. Forbes) +- redhat/configs: Enable Hyper-V guests on ARM64 (Vitaly Kuznetsov) [2007430] +- redhat: configs: Enable CONFIG_THINKPAD_LMI (Hans de Goede) +- redhat/docs: update Koji link to avoid redirect (Joel Savitz) +- redhat: add support for different profiles with dist*-brew (Herton R. Krzesinski) +- redhat: configs: Disable xtables and ipset (Phil Sutter) [1945179] +- redhat: Add mark_driver_deprecated() (Phil Sutter) [1945179] +- Change s390x CONFIG_NODES_SHIFT from 4 to 1 (Justin M. Forbes) +- Build CRYPTO_SHA3_*_S390 inline for s390 zfcpdump (Justin M. Forbes) +- redhat: move the DIST variable setting to Makefile.variables (Herton R. Krzesinski) +- redhat/kernel.spec.template: Cleanup source numbering (Prarit Bhargava) +- redhat/kernel.spec.template: Reorganize RHEL and Fedora specific files (Prarit Bhargava) +- redhat/kernel.spec.template: Add include_fedora and include_rhel variables (Prarit Bhargava) +- redhat/Makefile: Make kernel-local global (Prarit Bhargava) +- redhat/Makefile: Use flavors file (Prarit Bhargava) +- Turn on CONFIG_CPU_FREQ_GOV_SCHEDUTIL for x86 (Justin M. Forbes) +- redhat/configs: Remove CONFIG_INFINIBAND_I40IW (Kamal Heib) +- cleanup CONFIG_X86_PLATFORM_DRIVERS_INTEL (David Arcari) +- redhat: rename usage of .rhel8git.mk to .rhpkg.mk (Herton R. Krzesinski) +- Manually add pending items that need to be set due to mismatch (Justin M. Forbes) +- Clean up pending common (Justin M. Forbes) +- redhat/configs: Enable CONFIG_BLK_CGROUP_IOLATENCY & CONFIG_BLK_CGROUP_FC_APPID (Waiman Long) [2006813] +- redhat: remove kernel.changelog-8.99 file (Herton R. Krzesinski) +- redhat/configs: enable CONFIG_SQUASHFS_ZSTD which is already enabled in Fedora 34 (Tao Liu) [1998953] +- redhat: bump RHEL_MAJOR and add the changelog file for it (Herton R. Krzesinski) +- redhat: add documentation about the os-build rebase process (Herton R. Krzesinski) +- redhat/configs: enable SYSTEM_BLACKLIST_KEYRING which is already enabled in rhel8 and Fedora 34 (Coiby Xu) +- Build kernel-doc for Fedora (Justin M. Forbes) +- x86_64: Enable Elkhart Lake Quadrature Encoder Peripheral support (Prarit Bhargava) +- Update CONFIG_WERROR to disabled as it can cause issue with out of tree modules. (Justin M. Forbes) +- Fixup IOMMU configs in pending so that configs are sane again (Justin M. Forbes) +- Some initial Fedora config items for 5.15 (Justin M. Forbes) +- arm64: use common CONFIG_MAX_ZONEORDER for arm kernel (Mark Salter) +- Create Makefile.variables for a single point of configuration change (Justin M. Forbes) +- rpmspec: drop traceevent files instead of just excluding them from files list (Herton R. Krzesinski) [1967640] +- redhat/config: Enablement of CONFIG_PAPR_SCM for PowerPC (Gustavo Walbon) [1962936] +- Attempt to fix Intel PMT code (David Arcari) +- CI: Enable realtime branch testing (Veronika Kabatova) +- CI: Enable realtime checks for c9s and RHEL9 (Veronika Kabatova) +- ark: wireless: enable all rtw88 pcie wirless variants (Peter Robinson) +- wireless: rtw88: move debug options to common/debug (Peter Robinson) +- fedora: minor PTP clock driver cleanups (Peter Robinson) +- common: x86: enable VMware PTP support on ark (Peter Robinson) +- [scsi] megaraid_sas: re-add certain pci-ids (Tomas Henzl) +- Disable liquidio driver on ark/rhel (Herton R. Krzesinski) [1993393] +- More Fedora config updates (Justin M. Forbes) +- Fedora config updates for 5.14 (Justin M. Forbes) +- CI: Rename ARK CI pipeline type (Veronika Kabatova) +- CI: Finish up c9s config (Veronika Kabatova) +- CI: Update ppc64le config (Veronika Kabatova) +- CI: use more templates (Veronika Kabatova) +- Filter updates for aarch64 (Justin M. Forbes) +- increase CONFIG_NODES_SHIFT for aarch64 (Chris von Recklinghausen) [1890304] +- redhat: configs: Enable CONFIG_WIRELESS_HOTKEY (Hans de Goede) +- redhat/configs: Update CONFIG_NVRAM (Desnes A. Nunes do Rosario) [1988254] +- common: serial: build in SERIAL_8250_LPSS for x86 (Peter Robinson) +- powerpc: enable CONFIG_FUNCTION_PROFILER (Diego Domingos) [1831065] +- redhat/configs: Disable Soft-RoCE driver (Kamal Heib) +- redhat/configs/evaluate_configs: Update help output (Prarit Bhargava) +- redhat/configs: Double MAX_LOCKDEP_CHAINS (Justin M. Forbes) +- fedora: configs: Fix WM5102 Kconfig (Hans de Goede) +- powerpc: enable CONFIG_POWER9_CPU (Diego Domingos) [1876436] +- redhat/configs: Fix CONFIG_VIRTIO_IOMMU to 'y' on aarch64 (Eric Auger) [1972795] +- filter-modules.sh: add more sound modules to filter (Jaroslav Kysela) +- redhat/configs: sound configuration cleanups and updates (Jaroslav Kysela) +- common: Update for CXL (Compute Express Link) configs (Peter Robinson) +- redhat: configs: disable CRYPTO_SM modules (Herton R. Krzesinski) [1990040] +- Remove fedora version of the LOCKDEP_BITS, we should use common (Justin M. Forbes) +- Re-enable sermouse for x86 (rhbz 1974002) (Justin M. Forbes) +- Fedora 5.14 configs round 1 (Justin M. Forbes) +- redhat: add gating configuration for centos stream/rhel9 (Herton R. Krzesinski) +- x86: configs: Enable CONFIG_TEST_FPU for debug kernels (Vitaly Kuznetsov) [1988384] +- redhat/configs: Move CHACHA and POLY1305 to core kernel to allow BIG_KEYS=y (root) [1983298] +- kernel.spec: fix build of samples/bpf (Jiri Benc) +- Enable OSNOISE_TRACER and TIMERLAT_TRACER (Jerome Marchand) [1979379] +- rpmspec: switch iio and gpio tools to use tools_make (Herton R. Krzesinski) [1956988] +- configs/process_configs.sh: Handle config items with no help text (Patrick Talbert) +- fedora: sound config updates for 5.14 (Peter Robinson) +- fedora: Only enable FSI drivers on POWER platform (Peter Robinson) +- The CONFIG_RAW_DRIVER has been removed from upstream (Peter Robinson) +- fedora: updates for 5.14 with a few disables for common from pending (Peter Robinson) +- fedora: migrate from MFD_TPS68470 -> INTEL_SKL_INT3472 (Peter Robinson) +- fedora: Remove STAGING_GASKET_FRAMEWORK (Peter Robinson) +- Fedora: move DRM_VMWGFX configs from ark -> common (Peter Robinson) +- fedora: arm: disabled unused FB drivers (Peter Robinson) +- fedora: don't enable FB_VIRTUAL (Peter Robinson) +- redhat/configs: Double MAX_LOCKDEP_ENTRIES (Waiman Long) [1940075] +- rpmspec: fix verbose output on kernel-devel installation (Herton R. Krzesinski) [1981406] +- Build Fedora x86s kernels with bytcr-wm5102 (Marius Hoch) +- Deleted redhat/configs/fedora/generic/x86/CONFIG_FB_HYPERV (Patrick Lang) +- rpmspec: correct the ghost initramfs attributes (Herton R. Krzesinski) [1977056] +- rpmspec: amend removal of depmod created files to include modules.builtin.alias.bin (Herton R. Krzesinski) [1977056] +- configs: remove duplicate CONFIG_DRM_HYPERV file (Patrick Talbert) +- CI: use common code for merge and release (Don Zickus) +- rpmspec: add release string to kernel doc directory name (Jan Stancek) +- redhat/configs: Add CONFIG_INTEL_PMT_CRASHLOG (Michael Petlan) [1880486] +- redhat/configs: Add CONFIG_INTEL_PMT_TELEMETRY (Michael Petlan) [1880486] +- redhat/configs: Add CONFIG_MFD_INTEL_PMT (Michael Petlan) [1880486] +- redhat/configs: enable CONFIG_BLK_DEV_ZONED (Ming Lei) [1638087] +- Add --with clang_lto option to build the kernel with Link Time Optimizations (Tom Stellard) +- common: disable DVB_AV7110 and associated pieces (Peter Robinson) +- Fix fedora-only config updates (Don Zickus) +- Fedor config update for new option (Justin M. Forbes) +- redhat/configs: Enable stmmac NIC for x86_64 (Mark Salter) +- all: hyperv: use the DRM driver rather than FB (Peter Robinson) +- all: hyperv: unify the Microsoft HyperV configs (Peter Robinson) +- all: VMWare: clean up VMWare configs (Peter Robinson) +- Update CONFIG_ARM_FFA_TRANSPORT (Patrick Talbert) +- CI: Handle all mirrors (Veronika Kabatova) +- Turn on CONFIG_STACKTRACE for s390x zfpcdump kernels (Justin M. Forbes) +- arm64: switch ark kernel to 4K pagesize (Mark Salter) +- Disable AMIGA_PARTITION and KARMA_PARTITION (Prarit Bhargava) [1802694] +- all: unify and cleanup i2c TPM2 modules (Peter Robinson) +- redhat/configs: Set CONFIG_VIRTIO_IOMMU on aarch64 (Eric Auger) [1972795] +- redhat/configs: Disable CONFIG_RT_GROUP_SCHED in rhel config (Phil Auld) +- redhat/configs: enable KEXEC_SIG which is already enabled in RHEL8 for s390x and x86_64 (Coiby Xu) [1976835] +- rpmspec: do not BuildRequires bpftool on noarch (Herton R. Krzesinski) +- redhat/configs: disable {IMA,EVM}_LOAD_X509 (Bruno Meneguele) [1977529] +- redhat: add secureboot CA certificate to trusted kernel keyring (Bruno Meneguele) +- redhat/configs: enable IMA_ARCH_POLICY for aarch64 and s390x (Bruno Meneguele) +- redhat/configs: Enable CONFIG_MLXBF_GIGE on aarch64 (Alaa Hleihel) [1858599] +- common: enable STRICT_MODULE_RWX everywhere (Peter Robinson) +- COMMON_CLK_STM32MP157_SCMI is bool and selects COMMON_CLK_SCMI (Justin M. Forbes) +- kernel.spec: Add kernel{,-debug}-devel-matched meta packages (Timothée Ravier) +- Turn off with_selftests for Fedora (Justin M. Forbes) +- Don't build bpftool on Fedora (Justin M. Forbes) +- Fix location of syscall scripts for kernel-devel (Justin M. Forbes) +- fedora: arm: Enable some i.MX8 options (Peter Robinson) +- Enable Landlock for Fedora (Justin M. Forbes) +- Filter update for Fedora aarch64 (Justin M. Forbes) +- rpmspec: only build debug meta packages where we build debug ones (Herton R. Krzesinski) +- rpmspec: do not BuildRequires bpftool on nobuildarches (Herton R. Krzesinski) +- redhat/configs: Consolidate CONFIG_HMC_DRV in the common s390x folder (Thomas Huth) [1976270] +- redhat/configs: Consolidate CONFIG_EXPOLINE_OFF in the common folder (Thomas Huth) [1976270] +- redhat/configs: Move CONFIG_HW_RANDOM_S390 into the s390x/ subfolder (Thomas Huth) [1976270] +- redhat/configs: Disable CONFIG_HOTPLUG_PCI_SHPC in the Fedora settings (Thomas Huth) [1976270] +- redhat/configs: Remove the non-existent CONFIG_NO_BOOTMEM switch (Thomas Huth) [1976270] +- redhat/configs: Compile the virtio-console as a module on s390x (Thomas Huth) [1976270] +- redhat/configs: Enable CONFIG_S390_CCW_IOMMU and CONFIG_VFIO_CCW for ARK, too (Thomas Huth) [1976270] +- Revert "Merge branch 'ec_fips' into 'os-build'" (Vladis Dronov) [1947240] +- Fix typos in fedora filters (Justin M. Forbes) +- More filtering for Fedora (Justin M. Forbes) +- Fix Fedora module filtering for spi-altera-dfl (Justin M. Forbes) +- Fedora 5.13 config updates (Justin M. Forbes) +- fedora: cleanup TCG_TIS_I2C_CR50 (Peter Robinson) +- fedora: drop duplicate configs (Peter Robinson) +- More Fedora config updates for 5.13 (Justin M. Forbes) +- redhat/configs: Enable needed drivers for BlueField SoC on aarch64 (Alaa Hleihel) [1858592 1858594 1858596] +- redhat: Rename mod-blacklist.sh to mod-denylist.sh (Prarit Bhargava) +- redhat/configs: enable CONFIG_NET_ACT_MPLS (Marcelo Ricardo Leitner) +- configs: Enable CONFIG_DEBUG_KERNEL for zfcpdump (Jiri Olsa) +- kernel.spec: Add support to use vmlinux.h (Don Zickus) +- spec: Add vmlinux.h to kernel-devel package (Jiri Olsa) +- Turn off DRM_XEN_FRONTEND for Fedora as we had DRM_XEN off already (Justin M. Forbes) +- Fedora 5.13 config updates pt 3 (Justin M. Forbes) +- all: enable ath11k wireless modules (Peter Robinson) +- all: Enable WWAN and associated MHI bus pieces (Peter Robinson) +- spec: Enable sefltests rpm build (Jiri Olsa) +- spec: Allow bpf selftest/samples to fail (Jiri Olsa) +- kvm: Add kvm_stat.service file and kvm_stat logrotate config to the tools (Jiri Benc) +- kernel.spec: Add missing source files to kernel-selftests-internal (Jiri Benc) +- kernel.spec: selftests: add net/forwarding to TARGETS list (Jiri Benc) +- kernel.spec: selftests: add build requirement on libmnl-devel (Jiri Benc) +- kernel.spec: add action.o to kernel-selftests-internal (Jiri Benc) +- kernel.spec: avoid building bpftool repeatedly (Jiri Benc) +- kernel.spec: selftests require python3 (Jiri Benc) +- kernel.spec: skip selftests that failed to build (Jiri Benc) +- kernel.spec: fix installation of bpf selftests (Jiri Benc) +- redhat: fix samples and selftests make options (Jiri Benc) +- kernel.spec: enable mptcp selftests for kernel-selftests-internal (Jiri Benc) +- kernel.spec: Do not export shared objects from libexecdir to RPM Provides (Jiri Benc) +- kernel.spec: add missing dependency for the which package (Jiri Benc) +- kernel.spec: add netfilter selftests to kernel-selftests-internal (Jiri Benc) +- kernel.spec: move slabinfo and page_owner_sort debuginfo to tools-debuginfo (Jiri Benc) +- kernel.spec: package and ship VM tools (Jiri Benc) +- configs: enable CONFIG_PAGE_OWNER (Jiri Benc) +- kernel.spec: add coreutils (Jiri Benc) +- kernel.spec: add netdevsim driver selftests to kernel-selftests-internal (Jiri Benc) +- redhat/Makefile: Clean out the --without flags from the baseonly rule (Jiri Benc) +- kernel.spec: Stop building unnecessary rpms for baseonly builds (Jiri Benc) +- kernel.spec: disable more kabi switches for gcov build (Jiri Benc) +- kernel.spec: Rename kabi-dw base (Jiri Benc) +- kernel.spec: Fix error messages during build of zfcpdump kernel (Jiri Benc) +- kernel.spec: perf: remove bpf examples (Jiri Benc) +- kernel.spec: selftests should not depend on modules-internal (Jiri Benc) +- kernel.spec: build samples (Jiri Benc) +- kernel.spec: tools: sync missing options with RHEL 8 (Jiri Benc) +- redhat/configs: nftables: Enable extra flowtable symbols (Phil Sutter) +- redhat/configs: Sync netfilter options with RHEL8 (Phil Sutter) +- Fedora 5.13 config updates pt 2 (Justin M. Forbes) +- Move CONFIG_ARCH_INTEL_SOCFPGA up a level for Fedora (Justin M. Forbes) +- fedora: enable the Rockchip rk3399 pcie drivers (Peter Robinson) +- Fedora 5.13 config updates pt 1 (Justin M. Forbes) +- Fix version requirement from opencsd-devel buildreq (Justin M. Forbes) +- configs/ark/s390: set CONFIG_MARCH_Z14 and CONFIG_TUNE_Z15 (Philipp Rudo) [1876435] +- configs/common/s390: Clean up CONFIG_{MARCH,TUNE}_Z* (Philipp Rudo) +- configs/process_configs.sh: make use of dummy-tools (Philipp Rudo) +- configs/common: disable CONFIG_INIT_STACK_ALL_{PATTERN,ZERO} (Philipp Rudo) +- configs/common/aarch64: disable CONFIG_RELR (Philipp Rudo) +- redhat/config: enable STMICRO nic for RHEL (Mark Salter) +- redhat/configs: Enable ARCH_TEGRA on RHEL (Mark Salter) +- redhat/configs: enable IMA_KEXEC for supported arches (Bruno Meneguele) +- redhat/configs: enable INTEGRITY_SIGNATURE to all arches (Bruno Meneguele) +- configs: enable CONFIG_LEDS_BRIGHTNESS_HW_CHANGED (Benjamin Tissoires) +- RHEL: disable io_uring support (Jeff Moyer) [1964537] +- all: Changing CONFIG_UV_SYSFS to build uv_sysfs.ko as a loadable module. (Frank Ramsay) +- Enable NITRO_ENCLAVES on RHEL (Vitaly Kuznetsov) +- Update the Quick Start documentation (David Ward) +- redhat/configs: Set PVPANIC_MMIO for x86 and PVPANIC_PCI for aarch64 (Eric Auger) [1961178] +- bpf: Fix unprivileged_bpf_disabled setup (Jiri Olsa) +- Enable CONFIG_BPF_UNPRIV_DEFAULT_OFF (Jiri Olsa) +- configs/common/s390: disable CONFIG_QETH_{OSN,OSX} (Philipp Rudo) [1903201] +- nvme: nvme_mpath_init remove multipath check (Mike Snitzer) +- Make CRYPTO_EC also builtin (Simo Sorce) [1947240] +- Do not hard-code a default value for DIST (David Ward) +- Override %%{debugbuildsenabled} if the --with-release option is used (David Ward) +- Improve comments in SPEC file, and move some option tests and macros (David Ward) +- configs: enable CONFIG_EXFAT_FS (Pavel Reichl) [1943423] +- Revert s390x/zfcpdump part of a9d179c40281 and ecbfddd98621 (Vladis Dronov) +- Embed crypto algos, modes and templates needed in the FIPS mode (Vladis Dronov) [1947240] +- configs: Add and enable CONFIG_HYPERV_TESTING for debug kernels (Mohammed Gamal) +- configs: enable CONFIG_CMA on x86_64 in ARK (David Hildenbrand) [1945002] +- rpmspec: build debug-* meta-packages if debug builds are disabled (Herton R. Krzesinski) +- UIO: disable unused config options (Aristeu Rozanski) [1957819] +- ARK-config: Make amd_pinctrl module builtin (Hans de Goede) +- rpmspec: revert/drop content hash for kernel-headers (Herton R. Krzesinski) +- rpmspec: fix check that calls InitBuildVars (Herton R. Krzesinski) +- fedora: enable zonefs (Damien Le Moal) +- redhat: load specific ARCH keys to INTEGRITY_PLATFORM_KEYRING (Bruno Meneguele) +- redhat: enable INTEGRITY_TRUSTED_KEYRING across all variants (Bruno Meneguele) +- redhat: enable SYSTEM_BLACKLIST_KEYRING across all variants (Bruno Meneguele) +- redhat: enable INTEGRITY_ASYMMETRIC_KEYS across all variants (Bruno Meneguele) +- Remove unused boot loader specification files (David Ward) +- redhat/configs: Enable mlx5 IPsec and TLS offloads (Alaa Hleihel) [1869674 1957636] +- common: disable Apple Silicon generally (Peter Robinson) +- cleanup Intel's FPGA configs (Peter Robinson) +- common: move PTP KVM support from ark to common (Peter Robinson) +- Enable CONFIG_DRM_AMDGPU_USERPTR for everyone (Justin M. Forbes) +- redhat: add initial rpminspect configuration (Herton R. Krzesinski) +- fedora: arm updates for 5.13 (Peter Robinson) +- fedora: Enable WWAN and associated MHI bits (Peter Robinson) +- Update CONFIG_MODPROBE_PATH to /usr/sbin (Justin Forbes) +- Fedora set modprobe path (Justin M. Forbes) +- Keep sctp and l2tp modules in modules-extra (Don Zickus) +- Fix ppc64le cross build packaging (Don Zickus) +- Fedora: Make amd_pinctrl module builtin (Hans de Goede) +- Keep CONFIG_KASAN_HW_TAGS off for aarch64 debug configs (Justin M. Forbes) +- New configs in drivers/bus (Fedora Kernel Team) +- RHEL: Don't build KVM PR module on ppc64 (David Gibson) [1930649] +- Flip CONFIG_USB_ROLE_SWITCH from m to y (Justin M. Forbes) +- Set valid options for CONFIG_FW_LOADER_USER_HELPER (Justin M. Forbes) +- Clean up CONFIG_FB_MODE_HELPERS (Justin M. Forbes) +- Turn off CONFIG_VFIO for the s390x zfcpdump kernel (Justin M. Forbes) +- Delete unused CONFIG_SND_SOC_MAX98390 pending-common (Justin M. Forbes) +- Update pending-common configs, preparing to set correctly (Justin M. Forbes) +- Update fedora filters for surface (Justin M. Forbes) +- Build CONFIG_CRYPTO_ECDSA inline for s390x zfcpdump (Justin M. Forbes) +- Replace "flavour" where "variant" is meant instead (David Ward) +- Drop the %%{variant} macro and fix --with-vanilla (David Ward) +- Fix syntax of %%kernel_variant_files (David Ward) +- Change description of --without-vdso-install to fix typo (David Ward) +- Config updates to work around mismatches (Justin M. Forbes) +- CONFIG_SND_SOC_FSL_ASOC_CARD selects CONFIG_MFD_WM8994 now (Justin M. Forbes) +- wireguard: disable in FIPS mode (Hangbin Liu) [1940794] +- Enable mtdram for fedora (rhbz 1955916) (Justin M. Forbes) +- Remove reference to bpf-helpers man page (Justin M. Forbes) +- Fedora: enable more modules for surface devices (Dave Olsthoorn) +- Fix Fedora config mismatch for CONFIG_FSL_ENETC_IERB (Justin M. Forbes) +- hardlink is in /usr/bin/ now (Justin M. Forbes) +- Ensure CONFIG_KVM_BOOK3S_64_PR stays on in Fedora, even if it is turned off in RHEL (Justin M. Forbes) +- Set date in package release from repository commit, not system clock (David Ward) +- Use a better upstream tarball filename for snapshots (David Ward) +- Don't create empty pending-common files on pending-fedora commits (Don Zickus) +- nvme: decouple basic ANA log page re-read support from native multipathing (Mike Snitzer) +- nvme: allow local retry and proper failover for REQ_FAILFAST_TRANSPORT (Mike Snitzer) +- nvme: Return BLK_STS_TARGET if the DNR bit is set (Mike Snitzer) +- Add redhat/configs/pending-common/generic/s390x/zfcpdump/CONFIG_NETFS_SUPPORT (Justin M. Forbes) +- Create ark-latest branch last for CI scripts (Don Zickus) +- Replace /usr/libexec/platform-python with /usr/bin/python3 (David Ward) +- Turn off ADI_AXI_ADC and AD9467 which now require CONFIG_OF (Justin M. Forbes) +- Export ark infrastructure files (Don Zickus) +- docs: Update docs to reflect newer workflow. (Don Zickus) +- Use upstream/master for merge-base with fallback to master (Don Zickus) +- Fedora: Turn off the SND_INTEL_BYT_PREFER_SOF option (Hans de Goede) +- filter-modules.sh.fedora: clean up "netprots" (Paul Bolle) +- filter-modules.sh.fedora: clean up "scsidrvs" (Paul Bolle) +- filter-*.sh.fedora: clean up "ethdrvs" (Paul Bolle) +- filter-*.sh.fedora: clean up "driverdirs" (Paul Bolle) +- filter-*.sh.fedora: remove incorrect entries (Paul Bolle) +- filter-*.sh.fedora: clean up "singlemods" (Paul Bolle) +- filter-modules.sh.fedora: drop unused list "iiodrvs" (Paul Bolle) +- Update mod-internal to fix depmod issue (Nico Pache) +- Turn on CONFIG_VDPA_SIM_NET (rhbz 1942343) (Justin M. Forbes) +- New configs in drivers/power (Fedora Kernel Team) +- Turn on CONFIG_NOUVEAU_DEBUG_PUSH for debug configs (Justin M. Forbes) +- Turn off KFENCE sampling by default for Fedora (Justin M. Forbes) +- Fedora config updates round 2 (Justin M. Forbes) +- New configs in drivers/soc (Jeremy Cline) +- filter-modules.sh: Fix copy/paste error 'input' (Paul Bolle) +- Update module filtering for 5.12 kernels (Justin M. Forbes) +- Fix genlog.py to ensure that comments retain "%%" characters. (Mark Mielke) +- New configs in drivers/leds (Fedora Kernel Team) +- Limit CONFIG_USB_CDNS_SUPPORT to x86_64 and arm in Fedora (David Ward) +- Fedora: Enable CHARGER_GPIO on aarch64 too (Peter Robinson) +- Fedora config updates (Justin M. Forbes) +- configs: enable CONFIG_WIREGUARD in ARK (Hangbin Liu) [1613522] +- Remove duplicate configs acroos fedora, ark and common (Don Zickus) +- Combine duplicate configs across ark and fedora into common (Don Zickus) +- common/ark: cleanup and unify the parport configs (Peter Robinson) +- iommu/vt-d: enable INTEL_IDXD_SVM for both fedora and rhel (Jerry Snitselaar) +- REDHAT: coresight: etm4x: Disable coresight on HPE Apollo 70 (Jeremy Linton) +- configs/common/generic: disable CONFIG_SLAB_MERGE_DEFAULT (Rafael Aquini) +- Remove _legacy_common_support (Justin M. Forbes) +- redhat/mod-blacklist.sh: Fix floppy blacklisting (Hans de Goede) +- New configs in fs/pstore (CKI@GitLab) +- New configs in arch/powerpc (Fedora Kernel Team) +- configs: enable BPF LSM on Fedora and ARK (Ondrej Mosnacek) +- configs: clean up LSM configs (Ondrej Mosnacek) +- New configs in drivers/platform (CKI@GitLab) +- New configs in drivers/firmware (CKI@GitLab) +- New configs in drivers/mailbox (Fedora Kernel Team) +- New configs in drivers/net/phy (Justin M. Forbes) +- Update CONFIG_DM_MULTIPATH_IOA (Augusto Caringi) +- New configs in mm/Kconfig (CKI@GitLab) +- New configs in arch/powerpc (Jeremy Cline) +- New configs in arch/powerpc (Jeremy Cline) +- New configs in drivers/input (Fedora Kernel Team) +- New configs in net/bluetooth (Justin M. Forbes) +- New configs in drivers/clk (Fedora Kernel Team) +- New configs in init/Kconfig (Jeremy Cline) +- redhat: allow running fedora-configs and rh-configs targets outside of redhat/ (Herton R. Krzesinski) +- all: unify the disable of goldfish (android emulation platform) (Peter Robinson) +- common: minor cleanup/de-dupe of dma/dmabuf debug configs (Peter Robinson) +- common/ark: these drivers/arches were removed in 5.12 (Peter Robinson) +- Correct kernel-devel make prepare build for 5.12. (Paulo E. Castro) +- redhat: add initial support for centos stream dist-git sync on Makefiles (Herton R. Krzesinski) +- redhat/configs: Enable CONFIG_SCHED_STACK_END_CHECK for Fedora and ARK (Josh Poimboeuf) [1856174] +- CONFIG_VFIO now selects IOMMU_API instead of depending on it, causing several config mismatches for the zfcpdump kernel (Justin M. Forbes) +- Turn off weak-modules for Fedora (Justin M. Forbes) +- redhat: enable CONFIG_FW_LOADER_COMPRESS for ARK (Herton R. Krzesinski) [1939095] +- Fedora: filters: update to move dfl-emif to modules (Peter Robinson) +- drop duplicate DEVFREQ_GOV_SIMPLE_ONDEMAND config (Peter Robinson) +- efi: The EFI_VARS is legacy and now x86 only (Peter Robinson) +- common: enable RTC_SYSTOHC to supplement update_persistent_clock64 (Peter Robinson) +- generic: arm: enable SCMI for all options (Peter Robinson) +- fedora: the PCH_CAN driver is x86-32 only (Peter Robinson) +- common: disable legacy CAN device support (Peter Robinson) +- common: Enable Microchip MCP251x/MCP251xFD CAN controllers (Peter Robinson) +- common: Bosch MCAN support for Intel Elkhart Lake (Peter Robinson) +- common: enable CAN_PEAK_PCIEFD PCI-E driver (Peter Robinson) +- common: disable CAN_PEAK_PCIEC PCAN-ExpressCard (Peter Robinson) +- common: enable common CAN layer 2 protocols (Peter Robinson) +- ark: disable CAN_LEDS option (Peter Robinson) +- Fedora: Turn on SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC option (Hans de Goede) +- Fedora: enable modules for surface devices (Dave Olsthoorn) +- Turn on SND_SOC_INTEL_SOUNDWIRE_SOF_MACH for Fedora again (Justin M. Forbes) +- common: fix WM8804 codec dependencies (Peter Robinson) +- Build SERIO_SERPORT as a module (Peter Robinson) +- input: touchscreen: move ELO and Wacom serial touchscreens to x86 (Peter Robinson) +- Sync serio touchscreens for non x86 architectures to the same as ARK (Peter Robinson) +- Only enable SERIO_LIBPS2 on x86 (Peter Robinson) +- Only enable PC keyboard controller and associated keyboard on x86 (Peter Robinson) +- Generic: Mouse: Tweak generic serial mouse options (Peter Robinson) +- Only enable PS2 Mouse options on x86 (Peter Robinson) +- Disable bluetooth highspeed by default (Peter Robinson) +- Fedora: A few more general updates for 5.12 window (Peter Robinson) +- Fedora: Updates for 5.12 merge window (Peter Robinson) +- Fedora: remove dead options that were removed upstream (Peter Robinson) +- redhat: remove CONFIG_DRM_PANEL_XINGBANGDA_XBD599 (Herton R. Krzesinski) +- New configs in arch/powerpc (Fedora Kernel Team) +- Turn on CONFIG_PPC_QUEUED_SPINLOCKS as it is default upstream now (Justin M. Forbes) +- Update pending-common configs to address new upstream config deps (Justin M. Forbes) +- rpmspec: ship gpio-watch.debug in the proper debuginfo package (Herton R. Krzesinski) +- Removed description text as a comment confuses the config generation (Justin M. Forbes) +- New configs in drivers/dma-buf (Jeremy Cline) +- Fedora: ARMv7: build for 16 CPUs. (Peter Robinson) +- Fedora: only enable DEBUG_HIGHMEM on debug kernels (Peter Robinson) +- process_configs.sh: fix find/xargs data flow (Ondrej Mosnacek) +- Fedora config update (Justin M. Forbes) +- fedora: minor arm sound config updates (Peter Robinson) +- Fix trailing white space in redhat/configs/fedora/generic/CONFIG_SND_INTEL_BYT_PREFER_SOF (Justin M. Forbes) +- Add a redhat/rebase-notes.txt file (Hans de Goede) +- Turn on SND_INTEL_BYT_PREFER_SOF for Fedora (Hans de Goede) +- CI: Drop MR ID from the name variable (Veronika Kabatova) +- redhat: add DUP and kpatch certificates to system trusted keys for RHEL build (Herton R. Krzesinski) +- The comments in CONFIG_USB_RTL8153_ECM actually turn off CONFIG_USB_RTL8152 (Justin M. Forbes) +- Update CKI pipeline project (Veronika Kabatova) +- Turn off additional KASAN options for Fedora (Justin M. Forbes) +- Rename the master branch to rawhide for Fedora (Justin M. Forbes) +- Makefile targets for packit integration (Ben Crocker) +- Turn off KASAN for rawhide debug builds (Justin M. Forbes) +- New configs in arch/arm64 (Justin Forbes) +- Remove deprecated Intel MIC config options (Peter Robinson) +- redhat: replace inline awk script with genlog.py call (Herton R. Krzesinski) +- redhat: add genlog.py script (Herton R. Krzesinski) +- kernel.spec.template - fix use_vdso usage (Ben Crocker) +- redhat: remove remaining references of CONFIG_RH_DISABLE_DEPRECATED (Herton R. Krzesinski) +- Turn off vdso_install for ppc (Justin M. Forbes) +- Remove bpf-helpers.7 from bpftool package (Jiri Olsa) +- New configs in lib/Kconfig.debug (Fedora Kernel Team) +- Turn off CONFIG_VIRTIO_CONSOLE for s390x zfcpdump (Justin M. Forbes) +- New configs in drivers/clk (Justin M. Forbes) +- Keep VIRTIO_CONSOLE on s390x available. (Jakub Čajka) +- New configs in lib/Kconfig.debug (Jeremy Cline) +- Fedora 5.11 config updates part 4 (Justin M. Forbes) +- Fedora 5.11 config updates part 3 (Justin M. Forbes) +- Fedora 5.11 config updates part 2 (Justin M. Forbes) +- Update internal (test) module list from RHEL-8 (Joe Lawrence) [1915073] +- Fix USB_XHCI_PCI regression (Justin M. Forbes) +- fedora: fixes for ARMv7 build issue by disabling HIGHPTE (Peter Robinson) +- all: s390x: Increase CONFIG_PCI_NR_FUNCTIONS to 512 (#1888735) (Dan Horák) +- Fedora 5.11 configs pt 1 (Justin M. Forbes) +- redhat: avoid conflict with mod-blacklist.sh and released_kernel defined (Herton R. Krzesinski) +- redhat: handle certificate files conditionally as done for src.rpm (Herton R. Krzesinski) +- specfile: add %%{?_smp_mflags} to "make headers_install" in tools/testing/selftests (Denys Vlasenko) +- specfile: add %%{?_smp_mflags} to "make samples/bpf/" (Denys Vlasenko) +- Run MR testing in CKI pipeline (Veronika Kabatova) +- Reword comment (Nicolas Chauvet) +- Add with_cross_arm conditional (Nicolas Chauvet) +- Redefines __strip if with_cross (Nicolas Chauvet) +- fedora: only enable ACPI_CONFIGFS, ACPI_CUSTOM_METHOD in debug kernels (Peter Robinson) +- fedora: User the same EFI_CUSTOM_SSDT_OVERLAYS as ARK (Peter Robinson) +- all: all arches/kernels enable the same DMI options (Peter Robinson) +- all: move SENSORS_ACPI_POWER to common/generic (Peter Robinson) +- fedora: PCIE_HISI_ERR is already in common (Peter Robinson) +- all: all ACPI platforms enable ATA_ACPI so move it to common (Peter Robinson) +- all: x86: move shared x86 acpi config options to generic (Peter Robinson) +- All: x86: Move ACPI_VIDEO to common/x86 (Peter Robinson) +- All: x86: Enable ACPI_DPTF (Intel DPTF) (Peter Robinson) +- All: enable ACPI_BGRT for all ACPI platforms. (Peter Robinson) +- All: Only build ACPI_EC_DEBUGFS for debug kernels (Peter Robinson) +- All: Disable Intel Classmate PC ACPI_CMPC option (Peter Robinson) +- cleanup: ACPI_PROCFS_POWER was removed upstream (Peter Robinson) +- All: ACPI: De-dupe the ACPI options that are the same across ark/fedora on x86/arm (Peter Robinson) +- Enable the vkms module in Fedora (Jeremy Cline) +- Fedora: arm updates for 5.11 and general cross Fedora cleanups (Peter Robinson) +- Add gcc-c++ to BuildRequires (Justin M. Forbes) +- Update CONFIG_KASAN_HW_TAGS (Justin M. Forbes) +- fedora: arm: move generic power off/reset to all arm (Peter Robinson) +- fedora: ARMv7: build in DEVFREQ_GOV_SIMPLE_ONDEMAND until I work out why it's changed (Peter Robinson) +- fedora: cleanup joystick_adc (Peter Robinson) +- fedora: update some display options (Peter Robinson) +- fedora: arm: enable TI PRU options (Peter Robinson) +- fedora: arm: minor exynos plaform updates (Peter Robinson) +- arm: SoC: disable Toshiba Visconti SoC (Peter Robinson) +- common: disable ARCH_BCM4908 (NFC) (Peter Robinson) +- fedora: minor arm config updates (Peter Robinson) +- fedora: enable Tegra 234 SoC (Peter Robinson) +- fedora: arm: enable new Hikey 3xx options (Peter Robinson) +- Fedora: USB updates (Peter Robinson) +- fedora: enable the GNSS receiver subsystem (Peter Robinson) +- Remove POWER_AVS as no longer upstream (Peter Robinson) +- Cleanup RESET_RASPBERRYPI (Peter Robinson) +- Cleanup GPIO_CDEV_V1 options. (Peter Robinson) +- fedora: arm crypto updates (Peter Robinson) +- CONFIG_KASAN_HW_TAGS for aarch64 (Justin M. Forbes) +- Fedora: cleanup PCMCIA configs, move to x86 (Peter Robinson) +- New configs in drivers/rtc (Fedora Kernel Team) +- redhat/configs: Enable CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL (Josh Poimboeuf) [1856176] +- redhat/configs: Enable CONFIG_GCC_PLUGIN_STRUCTLEAK (Josh Poimboeuf) [1856176] +- redhat/configs: Enable CONFIG_GCC_PLUGINS on ARK (Josh Poimboeuf) [1856176] +- redhat/configs: Enable CONFIG_KASAN on Fedora (Josh Poimboeuf) [1856176] +- New configs in init/Kconfig (Fedora Kernel Team) +- build_configs.sh: Fix syntax flagged by shellcheck (Ben Crocker) +- genspec.sh: Fix syntax flagged by shellcheck (Ben Crocker) +- mod-blacklist.sh: Fix syntax flagged by shellcheck (Ben Crocker) +- Enable Speakup accessibility driver (Justin M. Forbes) +- New configs in init/Kconfig (Fedora Kernel Team) +- Fix fedora config mismatch due to dep changes (Justin M. Forbes) +- New configs in drivers/crypto (Jeremy Cline) +- Remove duplicate ENERGY_MODEL configs (Peter Robinson) +- This is selected by PCIE_QCOM so must match (Justin M. Forbes) +- drop unused BACKLIGHT_GENERIC (Peter Robinson) +- Remove cp instruction already handled in instruction below. (Paulo E. Castro) +- Add all the dependencies gleaned from running `make prepare` on a bloated devel kernel. (Paulo E. Castro) +- Add tools to path mangling script. (Paulo E. Castro) +- Remove duplicate cp statement which is also not specific to x86. (Paulo E. Castro) +- Correct orc_types failure whilst running `make prepare` https://bugzilla.redhat.com/show_bug.cgi?id=1882854 (Paulo E. Castro) +- redhat: ark: enable CONFIG_IKHEADERS (Jiri Olsa) +- Add missing '$' sign to (GIT) in redhat/Makefile (Augusto Caringi) +- Remove filterdiff and use native git instead (Don Zickus) +- New configs in net/sched (Justin M. Forbes) +- New configs in drivers/mfd (CKI@GitLab) +- New configs in drivers/mfd (Fedora Kernel Team) +- New configs in drivers/firmware (Fedora Kernel Team) +- Temporarily backout parallel xz script (Justin M. Forbes) +- redhat: explicitly disable CONFIG_IMA_APPRAISE_SIGNED_INIT (Bruno Meneguele) +- redhat: enable CONFIG_EVM_LOAD_X509 on ARK (Bruno Meneguele) +- redhat: enable CONFIG_EVM_ATTR_FSUUID on ARK (Bruno Meneguele) +- redhat: enable CONFIG_EVM in all arches and flavors (Bruno Meneguele) +- redhat: enable CONFIG_IMA_LOAD_X509 on ARK (Bruno Meneguele) +- redhat: set CONFIG_IMA_DEFAULT_HASH to SHA256 (Bruno Meneguele) +- redhat: enable CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT (Bruno Meneguele) +- redhat: enable CONFIG_IMA_READ_POLICY on ARK (Bruno Meneguele) +- redhat: set default IMA template for all ARK arches (Bruno Meneguele) +- redhat: enable CONFIG_IMA_DEFAULT_HASH_SHA256 for all flavors (Bruno Meneguele) +- redhat: disable CONFIG_IMA_DEFAULT_HASH_SHA1 (Bruno Meneguele) +- redhat: enable CONFIG_IMA_ARCH_POLICY for ppc and x86 (Bruno Meneguele) +- redhat: enable CONFIG_IMA_APPRAISE_MODSIG (Bruno Meneguele) +- redhat: enable CONFIG_IMA_APPRAISE_BOOTPARAM (Bruno Meneguele) +- redhat: enable CONFIG_IMA_APPRAISE (Bruno Meneguele) +- redhat: enable CONFIG_INTEGRITY for aarch64 (Bruno Meneguele) +- kernel: Update some missing KASAN/KCSAN options (Jeremy Linton) +- kernel: Enable coresight on aarch64 (Jeremy Linton) +- Update CONFIG_INET6_ESPINTCP (Justin Forbes) +- New configs in net/ipv6 (Justin M. Forbes) +- fedora: move CONFIG_RTC_NVMEM options from ark to common (Peter Robinson) +- configs: Enable CONFIG_DEBUG_INFO_BTF (Don Zickus) +- fedora: some minor arm audio config tweaks (Peter Robinson) +- Ship xpad with default modules on Fedora and RHEL (Bastien Nocera) +- Fedora: Only enable legacy serial/game port joysticks on x86 (Peter Robinson) +- Fedora: Enable the options required for the Librem 5 Phone (Peter Robinson) +- Fedora config update (Justin M. Forbes) +- Fedora config change because CONFIG_FSL_DPAA2_ETH now selects CONFIG_FSL_XGMAC_MDIO (Justin M. Forbes) +- redhat: generic enable CONFIG_INET_MPTCP_DIAG (Davide Caratti) +- Fedora config update (Justin M. Forbes) +- Enable NANDSIM for Fedora (Justin M. Forbes) +- Re-enable CONFIG_ACPI_TABLE_UPGRADE for Fedora since upstream disables this if secureboot is active (Justin M. Forbes) +- Ath11k related config updates (Justin M. Forbes) +- Fedora config updates for ath11k (Justin M. Forbes) +- Turn on ATH11K for Fedora (Justin M. Forbes) +- redhat: enable CONFIG_INTEL_IOMMU_SVM (Jerry Snitselaar) +- More Fedora config fixes (Justin M. Forbes) +- Fedora 5.10 config updates (Justin M. Forbes) +- Fedora 5.10 configs round 1 (Justin M. Forbes) +- Fedora config updates (Justin M. Forbes) +- Allow kernel-tools to build without selftests (Don Zickus) +- Allow building of kernel-tools standalone (Don Zickus) +- redhat: ark: disable CONFIG_NET_ACT_CTINFO (Davide Caratti) +- redhat: ark: disable CONFIG_NET_SCH_TEQL (Davide Caratti) +- redhat: ark: disable CONFIG_NET_SCH_SFB (Davide Caratti) +- redhat: ark: disable CONFIG_NET_SCH_QFQ (Davide Caratti) +- redhat: ark: disable CONFIG_NET_SCH_PLUG (Davide Caratti) +- redhat: ark: disable CONFIG_NET_SCH_PIE (Davide Caratti) +- redhat: ark: disable CONFIG_NET_SCH_HHF (Davide Caratti) +- redhat: ark: disable CONFIG_NET_SCH_DSMARK (Davide Caratti) +- redhat: ark: disable CONFIG_NET_SCH_DRR (Davide Caratti) +- redhat: ark: disable CONFIG_NET_SCH_CODEL (Davide Caratti) +- redhat: ark: disable CONFIG_NET_SCH_CHOKE (Davide Caratti) +- redhat: ark: disable CONFIG_NET_SCH_CBQ (Davide Caratti) +- redhat: ark: disable CONFIG_NET_SCH_ATM (Davide Caratti) +- redhat: ark: disable CONFIG_NET_EMATCH and sub-targets (Davide Caratti) +- redhat: ark: disable CONFIG_NET_CLS_TCINDEX (Davide Caratti) +- redhat: ark: disable CONFIG_NET_CLS_RSVP6 (Davide Caratti) +- redhat: ark: disable CONFIG_NET_CLS_RSVP (Davide Caratti) +- redhat: ark: disable CONFIG_NET_CLS_ROUTE4 (Davide Caratti) +- redhat: ark: disable CONFIG_NET_CLS_BASIC (Davide Caratti) +- redhat: ark: disable CONFIG_NET_ACT_SKBMOD (Davide Caratti) +- redhat: ark: disable CONFIG_NET_ACT_SIMP (Davide Caratti) +- redhat: ark: disable CONFIG_NET_ACT_NAT (Davide Caratti) +- arm64/defconfig: Enable CONFIG_KEXEC_FILE (Bhupesh Sharma) [1821565] +- redhat/configs: Cleanup CONFIG_CRYPTO_SHA512 (Prarit Bhargava) +- New configs in drivers/mfd (Fedora Kernel Team) +- Fix LTO issues with kernel-tools (Don Zickus) +- Point pathfix to the new location for gen_compile_commands.py (Justin M. Forbes) +- configs: Disable CONFIG_SECURITY_SELINUX_DISABLE (Ondrej Mosnacek) +- [Automatic] Handle config dependency changes (Don Zickus) +- configs/iommu: Add config comment to empty CONFIG_SUN50I_IOMMU file (Jerry Snitselaar) +- New configs in kernel/trace (Fedora Kernel Team) +- Fix Fedora config locations (Justin M. Forbes) +- Fedora config updates (Justin M. Forbes) +- configs: enable CONFIG_CRYPTO_CTS=y so cts(cbc(aes)) is available in FIPS mode (Vladis Dronov) [1855161] +- Partial revert: Add master merge check (Don Zickus) +- Update Maintainers doc to reflect workflow changes (Don Zickus) +- WIP: redhat/docs: Update documentation for single branch workflow (Prarit Bhargava) +- Add CONFIG_ARM64_MTE which is not picked up by the config scripts for some reason (Justin M. Forbes) +- Disable Speakup synth DECEXT (Justin M. Forbes) +- Enable Speakup for Fedora since it is out of staging (Justin M. Forbes) +- Modify patchlist changelog output (Don Zickus) +- process_configs.sh: Fix syntax flagged by shellcheck (Ben Crocker) +- generate_all_configs.sh: Fix syntax flagged by shellcheck (Ben Crocker) +- redhat/self-test: Initial commit (Ben Crocker) +- arch/x86: Remove vendor specific CPU ID checks (Prarit Bhargava) +- redhat: Replace hardware.redhat.com link in Unsupported message (Prarit Bhargava) [1810301] +- x86: Fix compile issues with rh_check_supported() (Don Zickus) +- KEYS: Make use of platform keyring for module signature verify (Robert Holmes) +- Input: rmi4 - remove the need for artificial IRQ in case of HID (Benjamin Tissoires) +- ARM: tegra: usb no reset (Peter Robinson) +- arm: make CONFIG_HIGHPTE optional without CONFIG_EXPERT (Jon Masters) +- redhat: rh_kabi: deduplication friendly structs (Jiri Benc) +- redhat: rh_kabi add a comment with warning about RH_KABI_EXCLUDE usage (Jiri Benc) +- redhat: rh_kabi: introduce RH_KABI_EXTEND_WITH_SIZE (Jiri Benc) +- redhat: rh_kabi: Indirect EXTEND macros so nesting of other macros will resolve. (Don Dutile) +- redhat: rh_kabi: Fix RH_KABI_SET_SIZE to use dereference operator (Tony Camuso) +- redhat: rh_kabi: Add macros to size and extend structs (Prarit Bhargava) +- Removing Obsolete hba pci-ids from rhel8 (Dick Kennedy) [1572321] +- mptsas: pci-id table changes (Laura Abbott) +- mptsas: Taint kernel if mptsas is loaded (Laura Abbott) +- mptspi: pci-id table changes (Laura Abbott) +- qla2xxx: Remove PCI IDs of deprecated adapter (Jeremy Cline) +- be2iscsi: remove unsupported device IDs (Chris Leech) [1574502 1598366] +- mptspi: Taint kernel if mptspi is loaded (Laura Abbott) +- hpsa: remove old cciss-based smartarray pci ids (Joseph Szczypek) [1471185] +- qla4xxx: Remove deprecated PCI IDs from RHEL 8 (Chad Dupuis) [1518874] +- aacraid: Remove depreciated device and vendor PCI id's (Raghava Aditya Renukunta) [1495307] +- megaraid_sas: remove deprecated pci-ids (Tomas Henzl) [1509329] +- mpt*: remove certain deprecated pci-ids (Jeremy Cline) +- kernel: add SUPPORT_REMOVED kernel taint (Tomas Henzl) [1602033] +- Rename RH_DISABLE_DEPRECATED to RHEL_DIFFERENCES (Don Zickus) +- s390: Lock down the kernel when the IPL secure flag is set (Jeremy Cline) +- efi: Lock down the kernel if booted in secure boot mode (David Howells) +- efi: Add an EFI_SECURE_BOOT flag to indicate secure boot mode (David Howells) +- security: lockdown: expose a hook to lock the kernel down (Jeremy Cline) +- Make get_cert_list() use efi_status_to_str() to print error messages. (Peter Jones) +- Add efi_status_to_str() and rework efi_status_to_err(). (Peter Jones) +- Add support for deprecating processors (Laura Abbott) [1565717 1595918 1609604 1610493] +- arm: aarch64: Drop the EXPERT setting from ARM64_FORCE_52BIT (Jeremy Cline) +- iommu/arm-smmu: workaround DMA mode issues (Laura Abbott) +- rh_kabi: introduce RH_KABI_EXCLUDE (Jakub Racek) [1652256] +- ipmi: do not configure ipmi for HPE m400 (Laura Abbott) [1670017] +- kABI: Add generic kABI macros to use for kABI workarounds (Myron Stowe) [1546831] +- add pci_hw_vendor_status() (Maurizio Lombardi) [1590829] +- ahci: thunderx2: Fix for errata that affects stop engine (Robert Richter) [1563590] +- Vulcan: AHCI PCI bar fix for Broadcom Vulcan early silicon (Robert Richter) [1563590] +- bpf: set unprivileged_bpf_disabled to 1 by default, add a boot parameter (Eugene Syromiatnikov) [1561171] +- add Red Hat-specific taint flags (Eugene Syromiatnikov) [1559877] +- tags.sh: Ignore redhat/rpm (Jeremy Cline) +- put RHEL info into generated headers (Laura Abbott) [1663728] +- aarch64: acpi scan: Fix regression related to X-Gene UARTs (Mark Salter) [1519554] +- ACPI / irq: Workaround firmware issue on X-Gene based m400 (Mark Salter) [1519554] +- modules: add rhelversion MODULE_INFO tag (Laura Abbott) +- ACPI: APEI: arm64: Ignore broken HPE moonshot APEI support (Al Stone) [1518076] +- Add Red Hat tainting (Laura Abbott) [1565704 1652266] +- Introduce CONFIG_RH_DISABLE_DEPRECATED (Laura Abbott) +- Stop merging ark-patches for release (Don Zickus) +- Fix path location for ark-update-configs.sh (Don Zickus) +- Combine Red Hat patches into single patch (Don Zickus) +- New configs in drivers/misc (Jeremy Cline) +- New configs in drivers/net/wireless (Justin M. Forbes) +- New configs in drivers/phy (Fedora Kernel Team) +- New configs in drivers/tty (Fedora Kernel Team) +- Set SquashFS decompression options for all flavors to match RHEL (Bohdan Khomutskyi) +- configs: Enable CONFIG_ENERGY_MODEL (Phil Auld) +- New configs in drivers/pinctrl (Fedora Kernel Team) +- Update CONFIG_THERMAL_NETLINK (Justin Forbes) +- Separate merge-upstream and release stages (Don Zickus) +- Re-enable CONFIG_IR_SERIAL on Fedora (Prarit Bhargava) +- Create Patchlist.changelog file (Don Zickus) +- Filter out upstream commits from changelog (Don Zickus) +- Merge Upstream script fixes (Don Zickus) +- kernel.spec: Remove kernel-keys directory on rpm erase (Prarit Bhargava) +- Add mlx5_vdpa to module filter for Fedora (Justin M. Forbes) +- Add python3-sphinx_rtd_theme buildreq for docs (Justin M. Forbes) +- redhat/configs/process_configs.sh: Remove *.config.orig files (Prarit Bhargava) +- redhat/configs/process_configs.sh: Add process_configs_known_broken flag (Prarit Bhargava) +- redhat/Makefile: Fix '*-configs' targets (Prarit Bhargava) +- dist-merge-upstream: Checkout known branch for ci scripts (Don Zickus) +- kernel.spec: don't override upstream compiler flags for ppc64le (Dan Horák) +- Fedora config updates (Justin M. Forbes) +- Fedora confi gupdate (Justin M. Forbes) +- mod-sign.sh: Fix syntax flagged by shellcheck (Ben Crocker) +- Swap how ark-latest is built (Don Zickus) +- Add extra version bump to os-build branch (Don Zickus) +- dist-release: Avoid needless version bump. (Don Zickus) +- Add dist-fedora-release target (Don Zickus) +- Remove redundant code in dist-release (Don Zickus) +- Makefile.common rename TAG to _TAG (Don Zickus) +- Fedora config change (Justin M. Forbes) +- Fedora filter update (Justin M. Forbes) +- Config update for Fedora (Justin M. Forbes) +- enable PROTECTED_VIRTUALIZATION_GUEST for all s390x kernels (Dan Horák) +- redhat: ark: enable CONFIG_NET_SCH_TAPRIO (Davide Caratti) +- redhat: ark: enable CONFIG_NET_SCH_ETF (Davide Caratti) +- More Fedora config updates (Justin M. Forbes) +- New config deps (Justin M. Forbes) +- Fedora config updates (Justin M. Forbes) +- First half of config updates for Fedora (Justin M. Forbes) +- Updates for Fedora arm architectures for the 5.9 window (Peter Robinson) +- Merge 5.9 config changes from Peter Robinson (Justin M. Forbes) +- Add config options that only show up when we prep on arm (Justin M. Forbes) +- Config updates for Fedora (Justin M. Forbes) +- fedora: enable enery model (Peter Robinson) +- Use the configs/generic config for SND_HDA_INTEL everywhere (Peter Robinson) +- Enable ZSTD compression algorithm on all kernels (Peter Robinson) +- Enable ARM_SMCCC_SOC_ID on all aarch64 kernels (Peter Robinson) +- iio: enable LTR-559 light and proximity sensor (Peter Robinson) +- iio: chemical: enable some popular chemical and partical sensors (Peter Robinson) +- More mismatches (Justin M. Forbes) +- Fedora config change due to deps (Justin M. Forbes) +- CONFIG_SND_SOC_MAX98390 is now selected by SND_SOC_INTEL_DA7219_MAX98357A_GENERIC (Justin M. Forbes) +- Config change required for build part 2 (Justin M. Forbes) +- Config change required for build (Justin M. Forbes) +- Fedora config update (Justin M. Forbes) +- Add ability to sync upstream through Makefile (Don Zickus) +- Add master merge check (Don Zickus) +- Replace hardcoded values 'os-build' and project id with variables (Don Zickus) +- redhat/Makefile.common: Fix MARKER (Prarit Bhargava) +- gitattributes: Remove unnecesary export restrictions (Prarit Bhargava) +- Add new certs for dual signing with boothole (Justin M. Forbes) +- Update secureboot signing for dual keys (Justin M. Forbes) +- fedora: enable LEDS_SGM3140 for arm configs (Peter Robinson) +- Enable CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG (Justin M. Forbes) +- redhat/configs: Fix common CONFIGs (Prarit Bhargava) +- redhat/configs: General CONFIG cleanups (Prarit Bhargava) +- redhat/configs: Update & generalize evaluate_configs (Prarit Bhargava) +- fedora: arm: Update some meson config options (Peter Robinson) +- redhat/docs: Add Fedora RPM tagging date (Prarit Bhargava) +- Update config for renamed panel driver. (Peter Robinson) +- Enable SERIAL_SC16IS7XX for SPI interfaces (Peter Robinson) +- s390x-zfcpdump: Handle missing Module.symvers file (Don Zickus) +- Fedora config updates (Justin M. Forbes) +- redhat/configs: Add .tmp files to .gitignore (Prarit Bhargava) +- disable uncommon TCP congestion control algorithms (Davide Caratti) +- Add new bpf man pages (Justin M. Forbes) +- Add default option for CONFIG_ARM64_BTI_KERNEL to pending-common so that eln kernels build (Justin M. Forbes) +- redhat/Makefile: Add fedora-configs and rh-configs make targets (Prarit Bhargava) +- redhat/configs: Use SHA512 for module signing (Prarit Bhargava) +- genspec.sh: 'touch' empty Patchlist file for single tarball (Don Zickus) +- Fedora config update for rc1 (Justin M. Forbes) +- Fedora config updates (Justin M. Forbes) +- Fedora config updates (Justin M. Forbes) +- redhat/Makefile.common: fix RPMKSUBLEVEL condition (Ondrej Mosnacek) +- redhat/Makefile: silence KABI tar output (Ondrej Mosnacek) +- One more Fedora config update (Justin M. Forbes) +- Fedora config updates (Justin M. Forbes) +- Fix PATCHLEVEL for merge window (Justin M. Forbes) +- Change ark CONFIG_COMMON_CLK to yes, it is selected already by other options (Justin M. Forbes) +- Fedora config updates (Justin M. Forbes) +- Fedora config updates (Justin M. Forbes) +- Fedora config updates (Justin M. Forbes) +- More module filtering for Fedora (Justin M. Forbes) +- Update filters for rnbd in Fedora (Justin M. Forbes) +- Fedora config updates (Justin M. Forbes) +- Fix up module filtering for 5.8 (Justin M. Forbes) +- Fedora config updates (Justin M. Forbes) +- More Fedora config work (Justin M. Forbes) +- RTW88BE and CE have been extracted to their own modules (Justin M. Forbes) +- Set CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK for Fedora (Justin M. Forbes) +- Fedora config updates (Justin M. Forbes) +- Arm64 Use Branch Target Identification for kernel (Justin M. Forbes) +- Change value of CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE (Justin M. Forbes) +- Fedora config updates (Justin M. Forbes) +- Fix configs for Fedora (Justin M. Forbes) +- Add zero-commit to format-patch options (Justin M. Forbes) +- Copy Makefile.rhelver as a source file rather than a patch (Jeremy Cline) +- Move the sed to clear the patch templating outside of conditionals (Justin M. Forbes) +- Match template format in kernel.spec.template (Justin M. Forbes) +- Break out the Patches into individual files for dist-git (Justin M. Forbes) +- Break the Red Hat patch into individual commits (Jeremy Cline) +- Fix update_scripts.sh unselective pattern sub (David Howells) +- Add cec to the filter overrides (Justin M. Forbes) +- Add overrides to filter-modules.sh (Justin M. Forbes) +- redhat/configs: Enable CONFIG_SMC91X and disable CONFIG_SMC911X (Prarit Bhargava) [1722136] +- Include bpftool-struct_ops man page in the bpftool package (Jeremy Cline) +- Add sharedbuffer_configuration.py to the pathfix.py script (Jeremy Cline) +- Use __make macro instead of make (Tom Stellard) +- Sign off generated configuration patches (Jeremy Cline) +- Drop the static path configuration for the Sphinx docs (Jeremy Cline) +- redhat: Add dummy-module kernel module (Prarit Bhargava) +- redhat: enable CONFIG_LWTUNNEL_BPF (Jiri Benc) +- Remove typoed config file aarch64CONFIG_SM_GCC_8150 (Justin M. Forbes) +- Add Documentation back to kernel-devel as it has Kconfig now (Justin M. Forbes) +- Copy distro files rather than moving them (Jeremy Cline) +- kernel.spec: fix 'make scripts' for kernel-devel package (Brian Masney) +- Makefile: correct help text for dist-cross--rpms (Brian Masney) +- redhat/Makefile: Fix RHEL8 python warning (Prarit Bhargava) +- redhat: Change Makefile target names to dist- (Prarit Bhargava) +- configs: Disable Serial IR driver (Prarit Bhargava) +- Fix "multiple %%files for package kernel-tools" (Pablo Greco) +- Introduce a Sphinx documentation project (Jeremy Cline) +- Build ARK against ELN (Don Zickus) +- Drop the requirement to have a remote called linus (Jeremy Cline) +- Rename 'internal' branch to 'os-build' (Don Zickus) +- Only include open merge requests with "Include in Releases" label (Jeremy Cline) +- Package gpio-watch in kernel-tools (Jeremy Cline) +- Exit non-zero if the tag already exists for a release (Jeremy Cline) +- Adjust the changelog update script to not push anything (Jeremy Cline) +- Drop --target noarch from the rh-rpms make target (Jeremy Cline) +- Add a script to generate release tags and branches (Jeremy Cline) +- Set CONFIG_VDPA for fedora (Justin M. Forbes) +- Add a README to the dist-git repository (Jeremy Cline) +- Provide defaults in ark-rebase-patches.sh (Jeremy Cline) +- Default ark-rebase-patches.sh to not report issues (Jeremy Cline) +- Drop DIST from release commits and tags (Jeremy Cline) +- Place the buildid before the dist in the release (Jeremy Cline) +- Sync up with Fedora arm configuration prior to merging (Jeremy Cline) +- Disable CONFIG_PROTECTED_VIRTUALIZATION_GUEST for zfcpdump (Jeremy Cline) +- Add RHMAINTAINERS file and supporting conf (Don Zickus) +- Add a script to test if all commits are signed off (Jeremy Cline) +- Fix make rh-configs-arch (Don Zickus) +- Drop RH_FEDORA in favor of the now-merged RHEL_DIFFERENCES (Jeremy Cline) +- Sync up Fedora configs from the first week of the merge window (Jeremy Cline) +- Migrate blacklisting floppy.ko to mod-blacklist.sh (Don Zickus) +- kernel packaging: Combine mod-blacklist.sh and mod-extra-blacklist.sh (Don Zickus) +- kernel packaging: Fix extra namespace collision (Don Zickus) +- mod-extra.sh: Rename to mod-blacklist.sh (Don Zickus) +- mod-extra.sh: Make file generic (Don Zickus) +- Fix a painfully obvious YAML syntax error in .gitlab-ci.yml (Jeremy Cline) +- Add in armv7hl kernel header support (Don Zickus) +- Disable all BuildKernel commands when only building headers (Don Zickus) +- Drop any gitlab-ci patches from ark-patches (Jeremy Cline) +- Build the srpm for internal branch CI using the vanilla tree (Jeremy Cline) +- Pull in the latest ARM configurations for Fedora (Jeremy Cline) +- Fix xz memory usage issue (Neil Horman) +- Use ark-latest instead of master for update script (Jeremy Cline) +- Move the CI jobs back into the ARK repository (Jeremy Cline) +- Sync up ARK's Fedora config with the dist-git repository (Jeremy Cline) +- Pull in the latest configuration changes from Fedora (Jeremy Cline) +- configs: enable CONFIG_NET_SCH_CBS (Marcelo Ricardo Leitner) +- Drop configuration options in fedora/ that no longer exist (Jeremy Cline) +- Set RH_FEDORA for ARK and Fedora (Jeremy Cline) +- redhat/kernel.spec: Include the release in the kernel COPYING file (Jeremy Cline) +- redhat/kernel.spec: add scripts/jobserver-exec to py3_shbang_opts list (Jeremy Cline) +- redhat/kernel.spec: package bpftool-gen man page (Jeremy Cline) +- distgit-changelog: handle multiple y-stream BZ numbers (Bruno Meneguele) +- redhat/kernel.spec: remove all inline comments (Bruno Meneguele) +- redhat/genspec: awk unknown whitespace regex pattern (Bruno Meneguele) +- Improve the readability of gen_config_patches.sh (Jeremy Cline) +- Fix some awkward edge cases in gen_config_patches.sh (Jeremy Cline) +- Update the CI environment to use Fedora 31 (Jeremy Cline) +- redhat: drop whitespace from with_gcov macro (Jan Stancek) +- configs: Enable CONFIG_KEY_DH_OPERATIONS on ARK (Ondrej Mosnacek) +- configs: Adjust CONFIG_MPLS_ROUTING and CONFIG_MPLS_IPTUNNEL (Laura Abbott) +- New configs in lib/crypto (Jeremy Cline) +- New configs in drivers/char (Jeremy Cline) +- Turn on BLAKE2B for Fedora (Jeremy Cline) +- kernel.spec.template: Clean up stray *.h.s files (Laura Abbott) +- Build the SRPM in the CI job (Jeremy Cline) +- New configs in net/tls (Jeremy Cline) +- New configs in net/tipc (Jeremy Cline) +- New configs in lib/kunit (Jeremy Cline) +- Fix up released_kernel case (Laura Abbott) +- New configs in lib/Kconfig.debug (Jeremy Cline) +- New configs in drivers/ptp (Jeremy Cline) +- New configs in drivers/nvme (Jeremy Cline) +- New configs in drivers/net/phy (Jeremy Cline) +- New configs in arch/arm64 (Jeremy Cline) +- New configs in drivers/crypto (Jeremy Cline) +- New configs in crypto/Kconfig (Jeremy Cline) +- Add label so the Gitlab to email bridge ignores the changelog (Jeremy Cline) +- Temporarily switch TUNE_DEFAULT to y (Jeremy Cline) +- Run config test for merge requests and internal (Jeremy Cline) +- Add missing licensedir line (Laura Abbott) +- redhat/scripts: Remove redhat/scripts/rh_get_maintainer.pl (Prarit Bhargava) +- configs: Take CONFIG_DEFAULT_MMAP_MIN_ADDR from Fedra (Laura Abbott) +- configs: Turn off ISDN (Laura Abbott) +- Add a script to generate configuration patches (Laura Abbott) +- Introduce rh-configs-commit (Laura Abbott) +- kernel-packaging: Remove kernel files from kernel-modules-extra package (Prarit Bhargava) +- configs: Enable CONFIG_DEBUG_WX (Laura Abbott) +- configs: Disable wireless USB (Laura Abbott) +- Clean up some temporary config files (Laura Abbott) +- configs: New config in drivers/gpu for v5.4-rc1 (Jeremy Cline) +- configs: New config in arch/powerpc for v5.4-rc1 (Jeremy Cline) +- configs: New config in crypto for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/usb for v5.4-rc1 (Jeremy Cline) +- AUTOMATIC: New configs (Jeremy Cline) +- Skip ksamples for bpf, they are broken (Jeremy Cline) +- configs: New config in fs/erofs for v5.4-rc1 (Jeremy Cline) +- configs: New config in mm for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/md for v5.4-rc1 (Jeremy Cline) +- configs: New config in init for v5.4-rc1 (Jeremy Cline) +- configs: New config in fs/fuse for v5.4-rc1 (Jeremy Cline) +- merge.pl: Avoid comments but do not skip them (Don Zickus) +- configs: New config in drivers/net/ethernet/pensando for v5.4-rc1 (Jeremy Cline) +- Update a comment about what released kernel means (Laura Abbott) +- Provide both Fedora and RHEL files in the SRPM (Laura Abbott) +- kernel.spec.template: Trim EXTRAVERSION in the Makefile (Laura Abbott) +- kernel.spec.template: Add macros for building with nopatches (Laura Abbott) +- kernel.spec.template: Add some macros for Fedora differences (Laura Abbott) +- kernel.spec.template: Consolodate the options (Laura Abbott) +- configs: Add pending direcory to Fedora (Laura Abbott) +- kernel.spec.template: Don't run hardlink if rpm-ostree is in use (Laura Abbott) +- configs: New config in net/can for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/net/phy for v5.4-rc1 (Jeremy Cline) +- configs: Increase x86_64 NR_UARTS to 64 (Prarit Bhargava) [1730649] +- configs: turn on ARM64_FORCE_52BIT for debug builds (Jeremy Cline) +- kernel.spec.template: Tweak the python3 mangling (Laura Abbott) +- kernel.spec.template: Add --with verbose option (Laura Abbott) +- kernel.spec.template: Switch to using %%install instead of %%__install (Laura Abbott) +- kernel.spec.template: Make the kernel.org URL https (Laura Abbott) +- kernel.spec.template: Update message about secure boot signing (Laura Abbott) +- kernel.spec.template: Move some with flags definitions up (Laura Abbott) +- kernel.spec.template: Update some BuildRequires (Laura Abbott) +- kernel.spec.template: Get rid of %%clean (Laura Abbott) +- configs: New config in drivers/char for v5.4-rc1 (Jeremy Cline) +- configs: New config in net/sched for v5.4-rc1 (Jeremy Cline) +- configs: New config in lib for v5.4-rc1 (Jeremy Cline) +- configs: New config in fs/verity for v5.4-rc1 (Jeremy Cline) +- configs: New config in arch/aarch64 for v5.4-rc4 (Jeremy Cline) +- configs: New config in arch/arm64 for v5.4-rc1 (Jeremy Cline) +- Flip off CONFIG_ARM64_VA_BITS_52 so the bundle that turns it on applies (Jeremy Cline) +- New configuration options for v5.4-rc4 (Jeremy Cline) +- Correctly name tarball for single tarball builds (Laura Abbott) +- configs: New config in drivers/pci for v5.4-rc1 (Jeremy Cline) +- Allow overriding the dist tag on the command line (Laura Abbott) +- Allow scratch branch target to be overridden (Laura Abbott) +- Remove long dead BUILD_DEFAULT_TARGET (Laura Abbott) +- Amend the changelog when rebasing (Laura Abbott) +- configs: New config in drivers/platform for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/pinctrl for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/net/wireless for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/net/ethernet/mellanox for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/net/can for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/hid for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/dma-buf for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/crypto for v5.4-rc1 (Jeremy Cline) +- configs: New config in arch/s390 for v5.4-rc1 (Jeremy Cline) +- configs: New config in block for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/cpuidle for v5.4-rc1 (Jeremy Cline) +- redhat: configs: Split CONFIG_CRYPTO_SHA512 (Laura Abbott) +- redhat: Set Fedora options (Laura Abbott) +- Set CRYPTO_SHA3_*_S390 to builtin on zfcpdump (Jeremy Cline) +- configs: New config in drivers/edac for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/firmware for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/hwmon for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/iio for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/mmc for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/tty for v5.4-rc1 (Jeremy Cline) +- configs: New config in arch/s390 for v5.4-rc1 (Jeremy Cline) +- configs: New config in drivers/bus for v5.4-rc1 (Jeremy Cline) +- Add option to allow mismatched configs on the command line (Laura Abbott) +- configs: New config in drivers/crypto for v5.4-rc1 (Jeremy Cline) +- configs: New config in sound/pci for v5.4-rc1 (Jeremy Cline) +- configs: New config in sound/soc for v5.4-rc1 (Jeremy Cline) +- gitlab: Add CI job for packaging scripts (Major Hayden) +- Speed up CI with CKI image (Major Hayden) +- Disable e1000 driver in ARK (Neil Horman) +- configs: Fix the pending default for CONFIG_ARM64_VA_BITS_52 (Jeremy Cline) +- configs: Turn on OPTIMIZE_INLINING for everything (Jeremy Cline) +- configs: Set valid pending defaults for CRYPTO_ESSIV (Jeremy Cline) +- Add an initial CI configuration for the internal branch (Jeremy Cline) +- New drop of configuration options for v5.4-rc1 (Jeremy Cline) +- New drop of configuration options for v5.4-rc1 (Jeremy Cline) +- Pull the RHEL version defines out of the Makefile (Jeremy Cline) +- Sync up the ARK build scripts (Jeremy Cline) +- Sync up the Fedora Rawhide configs (Jeremy Cline) +- Sync up the ARK config files (Jeremy Cline) +- configs: Adjust CONFIG_FORCE_MAX_ZONEORDER for Fedora (Laura Abbott) +- configs: Add README for some other arches (Laura Abbott) +- configs: Sync up Fedora configs (Laura Abbott) +- [initial commit] Add structure for building with git (Laura Abbott) +- [initial commit] Add Red Hat variables in the top level makefile (Laura Abbott) +- [initial commit] Red Hat gitignore and attributes (Laura Abbott) +- [initial commit] Add changelog (Laura Abbott) +- [initial commit] Add makefile (Laura Abbott) +- [initial commit] Add files for generating the kernel.spec (Laura Abbott) +- [initial commit] Add rpm directory (Laura Abbott) +- [initial commit] Add files for packaging (Laura Abbott) +- [initial commit] Add kabi files (Laura Abbott) +- [initial commit] Add scripts (Laura Abbott) +- [initial commit] Add configs (Laura Abbott) +- [initial commit] Add Makefiles (Laura Abbott) +- Linux v6.7.0-0.rc0.5a6a09e97199 +Resolves: rhbz#1471185, rhbz#1495307, rhbz#1509329, rhbz#1518076, rhbz#1518874, rhbz#1519554, rhbz#1546831, rhbz#1559877, rhbz#1561171, rhbz#1563590, rhbz#1565704, rhbz#1565717, rhbz#1572321, rhbz#1574502, rhbz#1590829, rhbz#1595918, rhbz#1598366, rhbz#1602033, rhbz#1609604, rhbz#1610493, rhbz#1613522, rhbz#1638087, rhbz#1652256, rhbz#1652266, rhbz#1663728, rhbz#1670017, rhbz#1722136, rhbz#1730649, rhbz#1802694, rhbz#1810301, rhbz#1821565, rhbz#1831065, rhbz#1855161, rhbz#1856174, rhbz#1856176, rhbz#1858592, rhbz#1858594, rhbz#1858596, rhbz#1858599, rhbz#1869674, rhbz#1871130, rhbz#1876435, rhbz#1876436, rhbz#1876977, rhbz#1877192, rhbz#1880486, rhbz#1890304, rhbz#1903201, rhbz#1915073, rhbz#1915290, rhbz#1930649, rhbz#1939095, rhbz#1940075, rhbz#1940794, rhbz#1943423, rhbz#1945002, rhbz#1945179, rhbz#1945477, rhbz#1947240, rhbz#1948340, rhbz#1952426, rhbz#1952863, rhbz#1953486, rhbz#1956988, rhbz#1957210, rhbz#1957219, rhbz#1957305, rhbz#1957636, rhbz#1957819, rhbz#1961178, rhbz#1962936, rhbz#1964537, rhbz#1967640, rhbz#1972795, rhbz#1976270, rhbz#1976835, rhbz#1976877, rhbz#1976884, rhbz#1977056, rhbz#1977529, rhbz#1978539, rhbz#1979379, rhbz#1981406, rhbz#1983298, rhbz#1986223, rhbz#1988254, rhbz#1988384, rhbz#1990040, rhbz#1993393, rhbz#1994858, rhbz#1998953, rhbz#2000835, rhbz#2002344, rhbz#2004233, rhbz#2004821, rhbz#2006813, rhbz#2007430, rhbz#2012226, rhbz#2014492, rhbz#2019377, rhbz#2020132, rhbz#2022578, rhbz#2023782, rhbz#2024595, rhbz#2025985, rhbz#2026319, rhbz#2027506, rhbz#2031547, rhbz#2032758, rhbz#2034670, rhbz#2038999, rhbz#2040643, rhbz#2041184, rhbz#2041186, rhbz#2041365, rhbz#2041990, rhbz#2042240, rhbz#2042241, rhbz#2043141, rhbz#2044155, rhbz#2053836, rhbz#2054579, rhbz#2062054, rhbz#2062909, rhbz#2071969, rhbz#2089765, rhbz#2115876, rhbz#2122595, rhbz#2140017, rhbz#2142658, rhbz#2149273, rhbz#2153073, rhbz#2188441, rhbz#2208834, rhbz#2216678, rhbz#2227793, rhbz#2231407 diff --git a/SOURCES/linux-surface.patch b/SOURCES/linux-surface.patch index 2de6bab..087417f 100644 --- a/SOURCES/linux-surface.patch +++ b/SOURCES/linux-surface.patch @@ -1,4 +1,4 @@ -From da55b6ffe4a98a4af6ced4074317ba9d026f84dd Mon Sep 17 00:00:00 2001 +From c9479d2ee549e4b5392c5f788d9905244404e207 Mon Sep 17 00:00:00 2001 From: Tsuchiya Yuto Date: Sun, 18 Oct 2020 16:42:44 +0900 Subject: [PATCH] (surface3-oemb) add DMI matches for Surface 3 with broken DMI @@ -40,7 +40,7 @@ Patchset: surface3-oemb 3 files changed, 24 insertions(+) diff --git a/drivers/platform/surface/surface3-wmi.c b/drivers/platform/surface/surface3-wmi.c -index ca4602bcc7dea..490b9731068ae 100644 +index c15ed7a12784..1ec8edb5aafa 100644 --- a/drivers/platform/surface/surface3-wmi.c +++ b/drivers/platform/surface/surface3-wmi.c @@ -37,6 +37,13 @@ static const struct dmi_system_id surface3_dmi_table[] = { @@ -58,10 +58,10 @@ index ca4602bcc7dea..490b9731068ae 100644 { } }; diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c -index 7938b52d741d8..2d5f83b0cdb0b 100644 +index edcb85bd8ea7..cea19fa3fa56 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c -@@ -3746,6 +3746,15 @@ static const struct dmi_system_id dmi_platform_data[] = { +@@ -3753,6 +3753,15 @@ static const struct dmi_system_id dmi_platform_data[] = { }, .driver_data = (void *)&intel_braswell_platform_data, }, @@ -78,7 +78,7 @@ index 7938b52d741d8..2d5f83b0cdb0b 100644 /* * Match for the GPDwin which unfortunately uses somewhat diff --git a/sound/soc/intel/common/soc-acpi-intel-cht-match.c b/sound/soc/intel/common/soc-acpi-intel-cht-match.c -index cdcbf04b8832f..958305779b125 100644 +index 5e2ec60e2954..207868c699f2 100644 --- a/sound/soc/intel/common/soc-acpi-intel-cht-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-cht-match.c @@ -27,6 +27,14 @@ static const struct dmi_system_id cht_table[] = { @@ -97,9 +97,9 @@ index cdcbf04b8832f..958305779b125 100644 }; -- -2.42.0 +2.43.0 -From 35b3c5195c9fc191de6b5a6e4361762aa37edad2 Mon Sep 17 00:00:00 2001 +From 38181ea8d1f9130ce6d677d306f819d2fa3b5f57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Tue, 3 Nov 2020 13:28:04 +0100 Subject: [PATCH] mwifiex: Add quirk resetting the PCI bridge on MS Surface @@ -133,11 +133,11 @@ Patchset: mwifiex 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c -index 6697132ecc977..f06b4ebc5bd8e 100644 +index 5f997becdbaa..9a9929424513 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c -@@ -1771,9 +1771,21 @@ mwifiex_pcie_send_boot_cmd(struct mwifiex_adapter *adapter, struct sk_buff *skb) - static int mwifiex_pcie_init_fw_port(struct mwifiex_adapter *adapter) +@@ -1702,9 +1702,21 @@ mwifiex_pcie_send_boot_cmd(struct mwifiex_adapter *adapter, struct sk_buff *skb) + static void mwifiex_pcie_init_fw_port(struct mwifiex_adapter *adapter) { struct pcie_service_card *card = adapter->card; + struct pci_dev *pdev = card->dev; @@ -156,10 +156,10 @@ index 6697132ecc977..f06b4ebc5bd8e 100644 + pci_reset_function(parent_pdev); + /* Write the RX ring read pointer in to reg->rx_rdptr */ - if (mwifiex_write_reg(adapter, reg->rx_rdptr, card->rxbd_rdptr | - tx_wrap)) { + mwifiex_write_reg(adapter, reg->rx_rdptr, card->rxbd_rdptr | tx_wrap); + } diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c -index dd6d21f1dbfd7..f46b06f8d6435 100644 +index dd6d21f1dbfd..f46b06f8d643 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c @@ -13,7 +13,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { @@ -252,7 +252,7 @@ index dd6d21f1dbfd7..f46b06f8d6435 100644 static void mwifiex_pcie_set_power_d3cold(struct pci_dev *pdev) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h -index d6ff964aec5bf..5d30ae39d65ec 100644 +index d6ff964aec5b..5d30ae39d65e 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h +++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h @@ -4,6 +4,7 @@ @@ -264,9 +264,9 @@ index d6ff964aec5bf..5d30ae39d65ec 100644 void mwifiex_initialize_quirks(struct pcie_service_card *card); int mwifiex_pcie_reset_d3cold_quirk(struct pci_dev *pdev); -- -2.42.0 +2.43.0 -From 241da24644ea2f5b8119019448b638aa8df6ab26 Mon Sep 17 00:00:00 2001 +From 86149f1c99b17f67d717419af83f3ec76315e35b Mon Sep 17 00:00:00 2001 From: Tsuchiya Yuto Date: Sun, 4 Oct 2020 00:11:49 +0900 Subject: [PATCH] mwifiex: pcie: disable bridge_d3 for Surface gen4+ @@ -288,10 +288,10 @@ Patchset: mwifiex 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c -index f06b4ebc5bd8e..07f13b52ddb92 100644 +index 9a9929424513..2273e3029776 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c -@@ -370,6 +370,7 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev, +@@ -377,6 +377,7 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct pcie_service_card *card; @@ -299,7 +299,7 @@ index f06b4ebc5bd8e..07f13b52ddb92 100644 int ret; pr_debug("info: vendor=0x%4.04X device=0x%4.04X rev=%d\n", -@@ -411,6 +412,12 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev, +@@ -418,6 +419,12 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev, return -1; } @@ -313,7 +313,7 @@ index f06b4ebc5bd8e..07f13b52ddb92 100644 } diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c -index f46b06f8d6435..99b024ecbadea 100644 +index f46b06f8d643..99b024ecbade 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c @@ -14,7 +14,8 @@ static const struct dmi_system_id mwifiex_quirk_table[] = { @@ -407,7 +407,7 @@ index f46b06f8d6435..99b024ecbadea 100644 static void mwifiex_pcie_set_power_d3cold(struct pci_dev *pdev) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h -index 5d30ae39d65ec..c14eb56eb9118 100644 +index 5d30ae39d65e..c14eb56eb911 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h +++ b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h @@ -5,6 +5,7 @@ @@ -419,9 +419,9 @@ index 5d30ae39d65ec..c14eb56eb9118 100644 void mwifiex_initialize_quirks(struct pcie_service_card *card); int mwifiex_pcie_reset_d3cold_quirk(struct pci_dev *pdev); -- -2.42.0 +2.43.0 -From d20b58f9e2ccec57c66864e79c291c2618ab2dbe Mon Sep 17 00:00:00 2001 +From 23775dc0be26e58d04574ab75768cedd8b0076f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Thu, 25 Mar 2021 11:33:02 +0100 Subject: [PATCH] Bluetooth: btusb: Lower passive lescan interval on Marvell @@ -457,7 +457,7 @@ Patchset: mwifiex 1 file changed, 15 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c -index 499f4809fcdf3..2d442e080ca28 100644 +index b8e9de887b5d..66a418ae9584 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -65,6 +65,7 @@ static struct usb_driver btusb_driver; @@ -476,7 +476,7 @@ index 499f4809fcdf3..2d442e080ca28 100644 /* Intel Bluetooth devices */ { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_COMBINED }, -@@ -4388,6 +4390,19 @@ static int btusb_probe(struct usb_interface *intf, +@@ -4399,6 +4401,19 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_MARVELL) hdev->set_bdaddr = btusb_set_bdaddr_marvell; @@ -497,9 +497,9 @@ index 499f4809fcdf3..2d442e080ca28 100644 (id->driver_info & BTUSB_MEDIATEK)) { hdev->setup = btusb_mtk_setup; -- -2.42.0 +2.43.0 -From c6f0985fae241ed43ea1245c9e5861e2c728e21e Mon Sep 17 00:00:00 2001 +From 825328cce718ba6de0fce529e8fd1f4cd6b94dde Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sat, 27 Feb 2021 00:45:52 +0100 Subject: [PATCH] ath10k: Add module parameters to override board files @@ -521,7 +521,7 @@ Patchset: ath10k 1 file changed, 58 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c -index 6cdb225b7eacc..19c036751fb16 100644 +index 6cdb225b7eac..19c036751fb1 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -38,6 +38,9 @@ static bool fw_diag_log; @@ -618,9 +618,9 @@ index 6cdb225b7eacc..19c036751fb16 100644 ret = firmware_request_nowarn(&fw, filename, ar->dev); ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot fw request '%s': %d\n", -- -2.42.0 +2.43.0 -From 986fe56f682f93925b2964f59fe78c7043758e47 Mon Sep 17 00:00:00 2001 +From f4e5ac291e877f3e7e5d888f4965310eb85379f5 Mon Sep 17 00:00:00 2001 From: Dorian Stoll Date: Thu, 30 Jul 2020 13:21:53 +0200 Subject: [PATCH] misc: mei: Add missing IPTS device IDs @@ -632,7 +632,7 @@ Patchset: ipts 2 files changed, 2 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h -index bdc65d50b945f..08723c01d7275 100644 +index 961e5d53a27a..860f99b6ecd6 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -92,6 +92,7 @@ @@ -644,7 +644,7 @@ index bdc65d50b945f..08723c01d7275 100644 #define MEI_DEV_ID_JSP_N 0x4DE0 /* Jasper Lake Point N */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c -index 676d566f38ddf..6b37dd1f8b2a3 100644 +index 676d566f38dd..6b37dd1f8b2a 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -97,6 +97,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { @@ -656,9 +656,9 @@ index 676d566f38ddf..6b37dd1f8b2a3 100644 {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)}, -- -2.42.0 +2.43.0 -From 72ee1cbf26ccc575dbfbaee5e7305ab13e1aeb1e Mon Sep 17 00:00:00 2001 +From 4c91dcde022856325e3babe1a1b9e01fcc21ab0f Mon Sep 17 00:00:00 2001 From: Liban Hannan Date: Tue, 12 Apr 2022 23:31:12 +0100 Subject: [PATCH] iommu: ipts: use IOMMU passthrough mode for IPTS @@ -680,7 +680,7 @@ Patchset: ipts 1 file changed, 24 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c -index 3685ba90ec88e..5a627e081797c 100644 +index 897159dba47d..cc6569613255 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -38,6 +38,8 @@ @@ -692,7 +692,7 @@ index 3685ba90ec88e..5a627e081797c 100644 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) #define IOAPIC_RANGE_START (0xfee00000) -@@ -292,12 +294,14 @@ int intel_iommu_enabled = 0; +@@ -291,12 +293,14 @@ int intel_iommu_enabled = 0; EXPORT_SYMBOL_GPL(intel_iommu_enabled); static int dmar_map_gfx = 1; @@ -706,8 +706,8 @@ index 3685ba90ec88e..5a627e081797c 100644 +#define IDENTMAP_IPTS 16 const struct iommu_ops intel_iommu_ops; - -@@ -2542,6 +2546,9 @@ static int device_def_domain_type(struct device *dev) + static const struct iommu_dirty_ops intel_dirty_ops; +@@ -2548,6 +2552,9 @@ static int device_def_domain_type(struct device *dev) if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) return IOMMU_DOMAIN_IDENTITY; @@ -717,7 +717,7 @@ index 3685ba90ec88e..5a627e081797c 100644 } return 0; -@@ -2849,6 +2856,9 @@ static int __init init_dmars(void) +@@ -2855,6 +2862,9 @@ static int __init init_dmars(void) if (!dmar_map_gfx) iommu_identity_mapping |= IDENTMAP_GFX; @@ -727,7 +727,7 @@ index 3685ba90ec88e..5a627e081797c 100644 check_tylersburg_isoch(); ret = si_domain_init(hw_pass_through); -@@ -4828,6 +4838,17 @@ static void quirk_iommu_igfx(struct pci_dev *dev) +@@ -4977,6 +4987,17 @@ static void quirk_iommu_igfx(struct pci_dev *dev) dmar_map_gfx = 0; } @@ -745,7 +745,7 @@ index 3685ba90ec88e..5a627e081797c 100644 /* G4x/GM45 integrated gfx dmar support is totally busted. */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx); -@@ -4863,6 +4884,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); +@@ -5012,6 +5033,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx); @@ -756,9 +756,9 @@ index 3685ba90ec88e..5a627e081797c 100644 { if (risky_device(dev)) -- -2.42.0 +2.43.0 -From 8330f9f39ce8c9796259a8aeffe919fa950e18f5 Mon Sep 17 00:00:00 2001 +From 7a9591af425eafbb76700f7ab1ab3ae0c3a08e4c Mon Sep 17 00:00:00 2001 From: Dorian Stoll Date: Sun, 11 Dec 2022 12:00:59 +0100 Subject: [PATCH] hid: Add support for Intel Precise Touch and Stylus @@ -825,10 +825,10 @@ Patchset: ipts create mode 100644 drivers/hid/ipts/thread.h diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig -index 790aa908e2a78..0b9d245d10e54 100644 +index 4ce74af79657..86c6c815bd5b 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig -@@ -1345,4 +1345,6 @@ source "drivers/hid/amd-sfh-hid/Kconfig" +@@ -1341,4 +1341,6 @@ source "drivers/hid/amd-sfh-hid/Kconfig" source "drivers/hid/surface-hid/Kconfig" @@ -836,7 +836,7 @@ index 790aa908e2a78..0b9d245d10e54 100644 + endif # HID_SUPPORT diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile -index 8a06d0f840bcb..2ef21b257d0b5 100644 +index 8a06d0f840bc..2ef21b257d0b 100644 --- a/drivers/hid/Makefile +++ b/drivers/hid/Makefile @@ -169,3 +169,5 @@ obj-$(INTEL_ISH_FIRMWARE_DOWNLOADER) += intel-ish-hid/ @@ -847,7 +847,7 @@ index 8a06d0f840bcb..2ef21b257d0b5 100644 +obj-$(CONFIG_HID_IPTS) += ipts/ diff --git a/drivers/hid/ipts/Kconfig b/drivers/hid/ipts/Kconfig new file mode 100644 -index 0000000000000..297401bd388dd +index 000000000000..297401bd388d --- /dev/null +++ b/drivers/hid/ipts/Kconfig @@ -0,0 +1,14 @@ @@ -867,7 +867,7 @@ index 0000000000000..297401bd388dd + module will be called ipts. diff --git a/drivers/hid/ipts/Makefile b/drivers/hid/ipts/Makefile new file mode 100644 -index 0000000000000..883896f68e6ad +index 000000000000..883896f68e6a --- /dev/null +++ b/drivers/hid/ipts/Makefile @@ -0,0 +1,16 @@ @@ -889,7 +889,7 @@ index 0000000000000..883896f68e6ad +ipts-objs += thread.o diff --git a/drivers/hid/ipts/cmd.c b/drivers/hid/ipts/cmd.c new file mode 100644 -index 0000000000000..63a4934bbc5fa +index 000000000000..63a4934bbc5f --- /dev/null +++ b/drivers/hid/ipts/cmd.c @@ -0,0 +1,61 @@ @@ -956,7 +956,7 @@ index 0000000000000..63a4934bbc5fa +} diff --git a/drivers/hid/ipts/cmd.h b/drivers/hid/ipts/cmd.h new file mode 100644 -index 0000000000000..2b4079075b642 +index 000000000000..2b4079075b64 --- /dev/null +++ b/drivers/hid/ipts/cmd.h @@ -0,0 +1,60 @@ @@ -1022,7 +1022,7 @@ index 0000000000000..2b4079075b642 +#endif /* IPTS_CMD_H */ diff --git a/drivers/hid/ipts/context.h b/drivers/hid/ipts/context.h new file mode 100644 -index 0000000000000..ba33259f1f7c5 +index 000000000000..ba33259f1f7c --- /dev/null +++ b/drivers/hid/ipts/context.h @@ -0,0 +1,52 @@ @@ -1080,7 +1080,7 @@ index 0000000000000..ba33259f1f7c5 +#endif /* IPTS_CONTEXT_H */ diff --git a/drivers/hid/ipts/control.c b/drivers/hid/ipts/control.c new file mode 100644 -index 0000000000000..5360842d260ba +index 000000000000..5360842d260b --- /dev/null +++ b/drivers/hid/ipts/control.c @@ -0,0 +1,486 @@ @@ -1572,7 +1572,7 @@ index 0000000000000..5360842d260ba +} diff --git a/drivers/hid/ipts/control.h b/drivers/hid/ipts/control.h new file mode 100644 -index 0000000000000..26629c5144edb +index 000000000000..26629c5144ed --- /dev/null +++ b/drivers/hid/ipts/control.h @@ -0,0 +1,126 @@ @@ -1704,7 +1704,7 @@ index 0000000000000..26629c5144edb +#endif /* IPTS_CONTROL_H */ diff --git a/drivers/hid/ipts/desc.h b/drivers/hid/ipts/desc.h new file mode 100644 -index 0000000000000..307438c7c80cd +index 000000000000..307438c7c80c --- /dev/null +++ b/drivers/hid/ipts/desc.h @@ -0,0 +1,80 @@ @@ -1790,7 +1790,7 @@ index 0000000000000..307438c7c80cd +#endif /* IPTS_DESC_H */ diff --git a/drivers/hid/ipts/eds1.c b/drivers/hid/ipts/eds1.c new file mode 100644 -index 0000000000000..ecbb3a8bdaf60 +index 000000000000..ecbb3a8bdaf6 --- /dev/null +++ b/drivers/hid/ipts/eds1.c @@ -0,0 +1,103 @@ @@ -1899,7 +1899,7 @@ index 0000000000000..ecbb3a8bdaf60 +} diff --git a/drivers/hid/ipts/eds1.h b/drivers/hid/ipts/eds1.h new file mode 100644 -index 0000000000000..eeeb6575e3e89 +index 000000000000..eeeb6575e3e8 --- /dev/null +++ b/drivers/hid/ipts/eds1.h @@ -0,0 +1,35 @@ @@ -1940,7 +1940,7 @@ index 0000000000000..eeeb6575e3e89 + enum hid_report_type report_type, enum hid_class_request request_type); diff --git a/drivers/hid/ipts/eds2.c b/drivers/hid/ipts/eds2.c new file mode 100644 -index 0000000000000..198dc65d78876 +index 000000000000..198dc65d7887 --- /dev/null +++ b/drivers/hid/ipts/eds2.c @@ -0,0 +1,144 @@ @@ -2090,7 +2090,7 @@ index 0000000000000..198dc65d78876 +} diff --git a/drivers/hid/ipts/eds2.h b/drivers/hid/ipts/eds2.h new file mode 100644 -index 0000000000000..064e3716907ab +index 000000000000..064e3716907a --- /dev/null +++ b/drivers/hid/ipts/eds2.h @@ -0,0 +1,35 @@ @@ -2131,7 +2131,7 @@ index 0000000000000..064e3716907ab + enum hid_report_type report_type, enum hid_class_request request_type); diff --git a/drivers/hid/ipts/hid.c b/drivers/hid/ipts/hid.c new file mode 100644 -index 0000000000000..e34a1a4f9fa77 +index 000000000000..e34a1a4f9fa7 --- /dev/null +++ b/drivers/hid/ipts/hid.c @@ -0,0 +1,225 @@ @@ -2362,7 +2362,7 @@ index 0000000000000..e34a1a4f9fa77 +} diff --git a/drivers/hid/ipts/hid.h b/drivers/hid/ipts/hid.h new file mode 100644 -index 0000000000000..1ebe77447903a +index 000000000000..1ebe77447903 --- /dev/null +++ b/drivers/hid/ipts/hid.h @@ -0,0 +1,24 @@ @@ -2392,7 +2392,7 @@ index 0000000000000..1ebe77447903a +#endif /* IPTS_HID_H */ diff --git a/drivers/hid/ipts/main.c b/drivers/hid/ipts/main.c new file mode 100644 -index 0000000000000..fb5b5c13ee3ea +index 000000000000..fb5b5c13ee3e --- /dev/null +++ b/drivers/hid/ipts/main.c @@ -0,0 +1,126 @@ @@ -2524,7 +2524,7 @@ index 0000000000000..fb5b5c13ee3ea +MODULE_LICENSE("GPL"); diff --git a/drivers/hid/ipts/mei.c b/drivers/hid/ipts/mei.c new file mode 100644 -index 0000000000000..1e0395ceae4a4 +index 000000000000..1e0395ceae4a --- /dev/null +++ b/drivers/hid/ipts/mei.c @@ -0,0 +1,188 @@ @@ -2718,7 +2718,7 @@ index 0000000000000..1e0395ceae4a4 +} diff --git a/drivers/hid/ipts/mei.h b/drivers/hid/ipts/mei.h new file mode 100644 -index 0000000000000..973bade6b0fdd +index 000000000000..973bade6b0fd --- /dev/null +++ b/drivers/hid/ipts/mei.h @@ -0,0 +1,66 @@ @@ -2790,7 +2790,7 @@ index 0000000000000..973bade6b0fdd +#endif /* IPTS_MEI_H */ diff --git a/drivers/hid/ipts/receiver.c b/drivers/hid/ipts/receiver.c new file mode 100644 -index 0000000000000..ef66c3c9db807 +index 000000000000..ef66c3c9db80 --- /dev/null +++ b/drivers/hid/ipts/receiver.c @@ -0,0 +1,250 @@ @@ -3046,7 +3046,7 @@ index 0000000000000..ef66c3c9db807 +} diff --git a/drivers/hid/ipts/receiver.h b/drivers/hid/ipts/receiver.h new file mode 100644 -index 0000000000000..3de7da62d40c1 +index 000000000000..3de7da62d40c --- /dev/null +++ b/drivers/hid/ipts/receiver.h @@ -0,0 +1,16 @@ @@ -3068,7 +3068,7 @@ index 0000000000000..3de7da62d40c1 +#endif /* IPTS_RECEIVER_H */ diff --git a/drivers/hid/ipts/resources.c b/drivers/hid/ipts/resources.c new file mode 100644 -index 0000000000000..cc14653b2a9f5 +index 000000000000..cc14653b2a9f --- /dev/null +++ b/drivers/hid/ipts/resources.c @@ -0,0 +1,131 @@ @@ -3205,7 +3205,7 @@ index 0000000000000..cc14653b2a9f5 +} diff --git a/drivers/hid/ipts/resources.h b/drivers/hid/ipts/resources.h new file mode 100644 -index 0000000000000..2068e13285f0e +index 000000000000..2068e13285f0 --- /dev/null +++ b/drivers/hid/ipts/resources.h @@ -0,0 +1,41 @@ @@ -3252,7 +3252,7 @@ index 0000000000000..2068e13285f0e +#endif /* IPTS_RESOURCES_H */ diff --git a/drivers/hid/ipts/spec-data.h b/drivers/hid/ipts/spec-data.h new file mode 100644 -index 0000000000000..e8dd98895a7ee +index 000000000000..e8dd98895a7e --- /dev/null +++ b/drivers/hid/ipts/spec-data.h @@ -0,0 +1,100 @@ @@ -3358,7 +3358,7 @@ index 0000000000000..e8dd98895a7ee +#endif /* IPTS_SPEC_DATA_H */ diff --git a/drivers/hid/ipts/spec-device.h b/drivers/hid/ipts/spec-device.h new file mode 100644 -index 0000000000000..41845f9d90257 +index 000000000000..41845f9d9025 --- /dev/null +++ b/drivers/hid/ipts/spec-device.h @@ -0,0 +1,290 @@ @@ -3654,7 +3654,7 @@ index 0000000000000..41845f9d90257 +#endif /* IPTS_SPEC_DEVICE_H */ diff --git a/drivers/hid/ipts/spec-hid.h b/drivers/hid/ipts/spec-hid.h new file mode 100644 -index 0000000000000..5a58d4a0a610f +index 000000000000..5a58d4a0a610 --- /dev/null +++ b/drivers/hid/ipts/spec-hid.h @@ -0,0 +1,34 @@ @@ -3694,7 +3694,7 @@ index 0000000000000..5a58d4a0a610f +#endif /* IPTS_SPEC_HID_H */ diff --git a/drivers/hid/ipts/thread.c b/drivers/hid/ipts/thread.c new file mode 100644 -index 0000000000000..355e92bea26f8 +index 000000000000..355e92bea26f --- /dev/null +++ b/drivers/hid/ipts/thread.c @@ -0,0 +1,84 @@ @@ -3784,7 +3784,7 @@ index 0000000000000..355e92bea26f8 +} diff --git a/drivers/hid/ipts/thread.h b/drivers/hid/ipts/thread.h new file mode 100644 -index 0000000000000..1f966b8b32c45 +index 000000000000..1f966b8b32c4 --- /dev/null +++ b/drivers/hid/ipts/thread.h @@ -0,0 +1,59 @@ @@ -3848,9 +3848,9 @@ index 0000000000000..1f966b8b32c45 + +#endif /* IPTS_THREAD_H */ -- -2.42.0 +2.43.0 -From 033de13abc9653b2d773f06182465e03d5d6463b Mon Sep 17 00:00:00 2001 +From 8aadfc38967cb2804446c8bdae851377651e6248 Mon Sep 17 00:00:00 2001 From: Dorian Stoll Date: Sun, 11 Dec 2022 12:03:38 +0100 Subject: [PATCH] iommu: intel: Disable source id verification for ITHC @@ -3862,7 +3862,7 @@ Patchset: ithc 1 file changed, 16 insertions(+) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c -index 29b9e55dcf26c..986e91c813ae1 100644 +index 29b9e55dcf26..986e91c813ae 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -386,6 +386,22 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev) @@ -3889,14 +3889,14 @@ index 29b9e55dcf26c..986e91c813ae1 100644 * DMA alias provides us with a PCI device and alias. The only case * where the it will return an alias on a different bus than the -- -2.42.0 +2.43.0 -From 0dd32bcfb70f9e36cfa009d94cd6c86a4839cff3 Mon Sep 17 00:00:00 2001 -From: Dorian Stoll +From fe08b40d122fdb102c2cc4876d2d68ac19d74ae3 Mon Sep 17 00:00:00 2001 +From: quo Date: Sun, 11 Dec 2022 12:10:54 +0100 Subject: [PATCH] hid: Add support for Intel Touch Host Controller -Based on quo/ithc-linux@55803a2 +Based on quo/ithc-linux@0b8b45d Signed-off-by: Dorian Stoll Patchset: ithc @@ -3905,14 +3905,14 @@ Patchset: ithc drivers/hid/Makefile | 1 + drivers/hid/ithc/Kbuild | 6 + drivers/hid/ithc/Kconfig | 12 + - drivers/hid/ithc/ithc-debug.c | 96 ++++++ - drivers/hid/ithc/ithc-dma.c | 258 ++++++++++++++++ - drivers/hid/ithc/ithc-dma.h | 67 +++++ - drivers/hid/ithc/ithc-main.c | 534 ++++++++++++++++++++++++++++++++++ - drivers/hid/ithc/ithc-regs.c | 64 ++++ - drivers/hid/ithc/ithc-regs.h | 186 ++++++++++++ - drivers/hid/ithc/ithc.h | 60 ++++ - 11 files changed, 1286 insertions(+) + drivers/hid/ithc/ithc-debug.c | 130 ++++++ + drivers/hid/ithc/ithc-dma.c | 373 +++++++++++++++++ + drivers/hid/ithc/ithc-dma.h | 69 ++++ + drivers/hid/ithc/ithc-main.c | 728 ++++++++++++++++++++++++++++++++++ + drivers/hid/ithc/ithc-regs.c | 96 +++++ + drivers/hid/ithc/ithc-regs.h | 189 +++++++++ + drivers/hid/ithc/ithc.h | 67 ++++ + 11 files changed, 1673 insertions(+) create mode 100644 drivers/hid/ithc/Kbuild create mode 100644 drivers/hid/ithc/Kconfig create mode 100644 drivers/hid/ithc/ithc-debug.c @@ -3924,10 +3924,10 @@ Patchset: ithc create mode 100644 drivers/hid/ithc/ithc.h diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig -index 0b9d245d10e54..8ba1c309228be 100644 +index 86c6c815bd5b..a87c3c6911fb 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig -@@ -1347,4 +1347,6 @@ source "drivers/hid/surface-hid/Kconfig" +@@ -1343,4 +1343,6 @@ source "drivers/hid/surface-hid/Kconfig" source "drivers/hid/ipts/Kconfig" @@ -3935,7 +3935,7 @@ index 0b9d245d10e54..8ba1c309228be 100644 + endif # HID_SUPPORT diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile -index 2ef21b257d0b5..e94b79727b489 100644 +index 2ef21b257d0b..e94b79727b48 100644 --- a/drivers/hid/Makefile +++ b/drivers/hid/Makefile @@ -171,3 +171,4 @@ obj-$(CONFIG_AMD_SFH_HID) += amd-sfh-hid/ @@ -3945,7 +3945,7 @@ index 2ef21b257d0b5..e94b79727b489 100644 +obj-$(CONFIG_HID_ITHC) += ithc/ diff --git a/drivers/hid/ithc/Kbuild b/drivers/hid/ithc/Kbuild new file mode 100644 -index 0000000000000..aea83f2ac07b4 +index 000000000000..aea83f2ac07b --- /dev/null +++ b/drivers/hid/ithc/Kbuild @@ -0,0 +1,6 @@ @@ -3957,7 +3957,7 @@ index 0000000000000..aea83f2ac07b4 + diff --git a/drivers/hid/ithc/Kconfig b/drivers/hid/ithc/Kconfig new file mode 100644 -index 0000000000000..ede7130236096 +index 000000000000..ede713023609 --- /dev/null +++ b/drivers/hid/ithc/Kconfig @@ -0,0 +1,12 @@ @@ -3975,17 +3975,21 @@ index 0000000000000..ede7130236096 + module will be called ithc. diff --git a/drivers/hid/ithc/ithc-debug.c b/drivers/hid/ithc/ithc-debug.c new file mode 100644 -index 0000000000000..57bf125c45bd5 +index 000000000000..1f1f1e33f2e5 --- /dev/null +++ b/drivers/hid/ithc/ithc-debug.c -@@ -0,0 +1,96 @@ +@@ -0,0 +1,130 @@ ++// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause ++ +#include "ithc.h" + -+void ithc_log_regs(struct ithc *ithc) { -+ if (!ithc->prev_regs) return; -+ u32 __iomem *cur = (__iomem void*)ithc->regs; -+ u32 *prev = (void*)ithc->prev_regs; -+ for (int i = 1024; i < sizeof *ithc->regs / 4; i++) { ++void ithc_log_regs(struct ithc *ithc) ++{ ++ if (!ithc->prev_regs) ++ return; ++ u32 __iomem *cur = (__iomem void *)ithc->regs; ++ u32 *prev = (void *)ithc->prev_regs; ++ for (int i = 1024; i < sizeof(*ithc->regs) / 4; i++) { + u32 x = readl(cur + i); + if (x != prev[i]) { + pci_info(ithc->pci, "reg %04x: %08x -> %08x\n", i * 4, prev[i], x); @@ -3994,55 +3998,79 @@ index 0000000000000..57bf125c45bd5 + } +} + -+static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len, loff_t *offset) { ++static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len, ++ loff_t *offset) ++{ ++ // Debug commands consist of a single letter followed by a list of numbers (decimal or ++ // hexadecimal, space-separated). + struct ithc *ithc = file_inode(f)->i_private; + char cmd[256]; -+ if (!ithc || !ithc->pci) return -ENODEV; -+ if (!len) return -EINVAL; -+ if (len >= sizeof cmd) return -EINVAL; -+ if (copy_from_user(cmd, buf, len)) return -EFAULT; ++ if (!ithc || !ithc->pci) ++ return -ENODEV; ++ if (!len) ++ return -EINVAL; ++ if (len >= sizeof(cmd)) ++ return -EINVAL; ++ if (copy_from_user(cmd, buf, len)) ++ return -EFAULT; + cmd[len] = 0; -+ if (cmd[len-1] == '\n') cmd[len-1] = 0; ++ if (cmd[len-1] == '\n') ++ cmd[len-1] = 0; + pci_info(ithc->pci, "debug command: %s\n", cmd); ++ ++ // Parse the list of arguments into a u32 array. + u32 n = 0; + const char *s = cmd + 1; + u32 a[32]; + while (*s && *s != '\n') { -+ if (n >= ARRAY_SIZE(a)) return -EINVAL; -+ if (*s++ != ' ') return -EINVAL; ++ if (n >= ARRAY_SIZE(a)) ++ return -EINVAL; ++ if (*s++ != ' ') ++ return -EINVAL; + char *e; + a[n++] = simple_strtoul(s, &e, 0); -+ if (e == s) return -EINVAL; ++ if (e == s) ++ return -EINVAL; + s = e; + } + ithc_log_regs(ithc); -+ switch(cmd[0]) { ++ ++ // Execute the command. ++ switch (cmd[0]) { + case 'x': // reset + ithc_reset(ithc); + break; + case 'w': // write register: offset mask value -+ if (n != 3 || (a[0] & 3)) return -EINVAL; -+ pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n", a[0], a[2], a[1]); ++ if (n != 3 || (a[0] & 3)) ++ return -EINVAL; ++ pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n", ++ a[0], a[2], a[1]); + bitsl(((__iomem u32 *)ithc->regs) + a[0] / 4, a[1], a[2]); + break; + case 'r': // read register: offset -+ if (n != 1 || (a[0] & 3)) return -EINVAL; -+ pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0], readl(((__iomem u32 *)ithc->regs) + a[0] / 4)); ++ if (n != 1 || (a[0] & 3)) ++ return -EINVAL; ++ pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0], ++ readl(((__iomem u32 *)ithc->regs) + a[0] / 4)); + break; + case 's': // spi command: cmd offset len data... + // read config: s 4 0 64 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // set touch cfg: s 6 12 4 XX -+ if (n < 3 || a[2] > (n - 3) * 4) return -EINVAL; ++ if (n < 3 || a[2] > (n - 3) * 4) ++ return -EINVAL; + pci_info(ithc->pci, "debug spi command %u with %u bytes of data\n", a[0], a[2]); + if (!CHECK(ithc_spi_command, ithc, a[0], a[1], a[2], a + 3)) -+ for (u32 i = 0; i < (a[2] + 3) / 4; i++) pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]); ++ for (u32 i = 0; i < (a[2] + 3) / 4; i++) ++ pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]); + break; + case 'd': // dma command: cmd len data... + // get report descriptor: d 7 8 0 0 + // enable multitouch: d 3 2 0x0105 -+ if (n < 2 || a[1] > (n - 2) * 4) return -EINVAL; ++ if (n < 2 || a[1] > (n - 2) * 4) ++ return -EINVAL; + pci_info(ithc->pci, "debug dma command %u with %u bytes of data\n", a[0], a[1]); -+ if (ithc_dma_tx(ithc, a[0], a[1], a + 2)) pci_err(ithc->pci, "dma tx failed\n"); ++ if (ithc_dma_tx(ithc, a[0], a[1], a + 2)) ++ pci_err(ithc->pci, "dma tx failed\n"); + break; + default: + return -EINVAL; @@ -4056,87 +4084,125 @@ index 0000000000000..57bf125c45bd5 + .write = ithc_debugfs_cmd_write, +}; + -+static void ithc_debugfs_devres_release(struct device *dev, void *res) { ++static void ithc_debugfs_devres_release(struct device *dev, void *res) ++{ + struct dentry **dbgm = res; -+ if (*dbgm) debugfs_remove_recursive(*dbgm); ++ if (*dbgm) ++ debugfs_remove_recursive(*dbgm); +} + -+int ithc_debug_init(struct ithc *ithc) { -+ struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof *dbgm, GFP_KERNEL); -+ if (!dbgm) return -ENOMEM; ++int ithc_debug_init(struct ithc *ithc) ++{ ++ struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof(*dbgm), GFP_KERNEL); ++ if (!dbgm) ++ return -ENOMEM; + devres_add(&ithc->pci->dev, dbgm); + struct dentry *dbg = debugfs_create_dir(DEVNAME, NULL); -+ if (IS_ERR(dbg)) return PTR_ERR(dbg); ++ if (IS_ERR(dbg)) ++ return PTR_ERR(dbg); + *dbgm = dbg; + + struct dentry *cmd = debugfs_create_file("cmd", 0220, dbg, ithc, &ithc_debugfops_cmd); -+ if (IS_ERR(cmd)) return PTR_ERR(cmd); ++ if (IS_ERR(cmd)) ++ return PTR_ERR(cmd); + + return 0; +} + diff --git a/drivers/hid/ithc/ithc-dma.c b/drivers/hid/ithc/ithc-dma.c new file mode 100644 -index 0000000000000..7e89b3496918d +index 000000000000..ffb8689b8a78 --- /dev/null +++ b/drivers/hid/ithc/ithc-dma.c -@@ -0,0 +1,258 @@ +@@ -0,0 +1,373 @@ ++// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause ++ +#include "ithc.h" + -+static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p, unsigned num_buffers, unsigned num_pages, enum dma_data_direction dir) { ++// The THC uses tables of PRDs (physical region descriptors) to describe the TX and RX data buffers. ++// Each PRD contains the DMA address and size of a block of DMA memory, and some status flags. ++// This allows each data buffer to consist of multiple non-contiguous blocks of memory. ++ ++static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p, ++ unsigned int num_buffers, unsigned int num_pages, enum dma_data_direction dir) ++{ + p->num_pages = num_pages; + p->dir = dir; ++ // We allocate enough space to have one PRD per data buffer page, however if the data ++ // buffer pages happen to be contiguous, we can describe the buffer using fewer PRDs, so ++ // some will remain unused (which is fine). + p->size = round_up(num_buffers * num_pages * sizeof(struct ithc_phys_region_desc), PAGE_SIZE); + p->addr = dmam_alloc_coherent(&ithc->pci->dev, p->size, &p->dma_addr, GFP_KERNEL); -+ if (!p->addr) return -ENOMEM; -+ if (p->dma_addr & (PAGE_SIZE - 1)) return -EFAULT; ++ if (!p->addr) ++ return -ENOMEM; ++ if (p->dma_addr & (PAGE_SIZE - 1)) ++ return -EFAULT; + return 0; +} + ++// Devres managed sg_table wrapper. +struct ithc_sg_table { + void *addr; + struct sg_table sgt; + enum dma_data_direction dir; +}; -+static void ithc_dma_sgtable_free(struct sg_table *sgt) { ++static void ithc_dma_sgtable_free(struct sg_table *sgt) ++{ + struct scatterlist *sg; + int i; + for_each_sgtable_sg(sgt, sg, i) { + struct page *p = sg_page(sg); -+ if (p) __free_page(p); ++ if (p) ++ __free_page(p); + } + sg_free_table(sgt); +} -+static void ithc_dma_data_devres_release(struct device *dev, void *res) { ++static void ithc_dma_data_devres_release(struct device *dev, void *res) ++{ + struct ithc_sg_table *sgt = res; -+ if (sgt->addr) vunmap(sgt->addr); ++ if (sgt->addr) ++ vunmap(sgt->addr); + dma_unmap_sgtable(dev, &sgt->sgt, sgt->dir, 0); + ithc_dma_sgtable_free(&sgt->sgt); +} + -+static int ithc_dma_data_alloc(struct ithc* ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b) { -+ // We don't use dma_alloc_coherent for data buffers, because they don't have to be contiguous (we can use one PRD per page) or coherent (they are unidirectional). -+ // Instead we use an sg_table of individually allocated pages (5.13 has dma_alloc_noncontiguous for this, but we'd like to support 5.10 for now). ++static int ithc_dma_data_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, ++ struct ithc_dma_data_buffer *b) ++{ ++ // We don't use dma_alloc_coherent() for data buffers, because they don't have to be ++ // coherent (they are unidirectional) or contiguous (we can use one PRD per page). ++ // We could use dma_alloc_noncontiguous(), however this still always allocates a single ++ // DMA mapped segment, which is more restrictive than what we need. ++ // Instead we use an sg_table of individually allocated pages. + struct page *pages[16]; -+ if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages)) return -EINVAL; ++ if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages)) ++ return -EINVAL; + b->active_idx = -1; -+ struct ithc_sg_table *sgt = devres_alloc(ithc_dma_data_devres_release, sizeof *sgt, GFP_KERNEL); -+ if (!sgt) return -ENOMEM; ++ struct ithc_sg_table *sgt = devres_alloc( ++ ithc_dma_data_devres_release, sizeof(*sgt), GFP_KERNEL); ++ if (!sgt) ++ return -ENOMEM; + sgt->dir = prds->dir; ++ + if (!sg_alloc_table(&sgt->sgt, prds->num_pages, GFP_KERNEL)) { + struct scatterlist *sg; + int i; + bool ok = true; + for_each_sgtable_sg(&sgt->sgt, sg, i) { -+ struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); // don't need __GFP_DMA for PCI DMA -+ if (!p) { ok = false; break; } ++ // NOTE: don't need __GFP_DMA for PCI DMA ++ struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); ++ if (!p) { ++ ok = false; ++ break; ++ } + sg_set_page(sg, p, PAGE_SIZE, 0); + } + if (ok && !dma_map_sgtable(&ithc->pci->dev, &sgt->sgt, prds->dir, 0)) { + devres_add(&ithc->pci->dev, sgt); + b->sgt = &sgt->sgt; + b->addr = sgt->addr = vmap(pages, prds->num_pages, 0, PAGE_KERNEL); -+ if (!b->addr) return -ENOMEM; ++ if (!b->addr) ++ return -ENOMEM; + return 0; + } + ithc_dma_sgtable_free(&sgt->sgt); @@ -4145,17 +4211,29 @@ index 0000000000000..7e89b3496918d + return -ENOMEM; +} + -+static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) { ++static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, ++ struct ithc_dma_data_buffer *b, unsigned int idx) ++{ ++ // Give a buffer to the THC. + struct ithc_phys_region_desc *prd = prds->addr; + prd += idx * prds->num_pages; -+ if (b->active_idx >= 0) { pci_err(ithc->pci, "buffer already active\n"); return -EINVAL; } ++ if (b->active_idx >= 0) { ++ pci_err(ithc->pci, "buffer already active\n"); ++ return -EINVAL; ++ } + b->active_idx = idx; + if (prds->dir == DMA_TO_DEVICE) { -+ if (b->data_size > PAGE_SIZE) return -EINVAL; ++ // TX buffer: Caller should have already filled the data buffer, so just fill ++ // the PRD and flush. ++ // (TODO: Support multi-page TX buffers. So far no device seems to use or need ++ // these though.) ++ if (b->data_size > PAGE_SIZE) ++ return -EINVAL; + prd->addr = sg_dma_address(b->sgt->sgl) >> 10; + prd->size = b->data_size | PRD_FLAG_END; + flush_kernel_vmap_range(b->addr, b->data_size); + } else if (prds->dir == DMA_FROM_DEVICE) { ++ // RX buffer: Reset PRDs. + struct scatterlist *sg; + int i; + for_each_sgtable_dma_sg(b->sgt, sg, i) { @@ -4170,21 +4248,34 @@ index 0000000000000..7e89b3496918d + return 0; +} + -+static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) { ++static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, ++ struct ithc_dma_data_buffer *b, unsigned int idx) ++{ ++ // Take a buffer from the THC. + struct ithc_phys_region_desc *prd = prds->addr; + prd += idx * prds->num_pages; -+ if (b->active_idx != idx) { pci_err(ithc->pci, "wrong buffer index\n"); return -EINVAL; } ++ // This is purely a sanity check. We don't strictly need the idx parameter for this ++ // function, because it should always be the same as active_idx, unless we have a bug. ++ if (b->active_idx != idx) { ++ pci_err(ithc->pci, "wrong buffer index\n"); ++ return -EINVAL; ++ } + b->active_idx = -1; + if (prds->dir == DMA_FROM_DEVICE) { ++ // RX buffer: Calculate actual received data size from PRDs. + dma_rmb(); // for the prds + b->data_size = 0; + struct scatterlist *sg; + int i; + for_each_sgtable_dma_sg(b->sgt, sg, i) { -+ unsigned size = prd->size; ++ unsigned int size = prd->size; + b->data_size += size & PRD_SIZE_MASK; -+ if (size & PRD_FLAG_END) break; -+ if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) { pci_err(ithc->pci, "truncated prd\n"); break; } ++ if (size & PRD_FLAG_END) ++ break; ++ if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) { ++ pci_err(ithc->pci, "truncated prd\n"); ++ break; ++ } + prd++; + } + invalidate_kernel_vmap_range(b->addr, b->data_size); @@ -4193,93 +4284,139 @@ index 0000000000000..7e89b3496918d + return 0; +} + -+int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname) { ++int ithc_dma_rx_init(struct ithc *ithc, u8 channel) ++{ + struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; + mutex_init(&rx->mutex); ++ ++ // Allocate buffers. + u32 buf_size = DEVCFG_DMA_RX_SIZE(ithc->config.dma_buf_sizes); -+ unsigned num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE; -+ pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n", NUM_RX_BUF, buf_size, num_pages); ++ unsigned int num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE; ++ pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n", ++ NUM_RX_BUF, buf_size, num_pages); + CHECK_RET(ithc_dma_prd_alloc, ithc, &rx->prds, NUM_RX_BUF, num_pages, DMA_FROM_DEVICE); -+ for (unsigned i = 0; i < NUM_RX_BUF; i++) ++ for (unsigned int i = 0; i < NUM_RX_BUF; i++) + CHECK_RET(ithc_dma_data_alloc, ithc, &rx->prds, &rx->bufs[i]); ++ ++ // Init registers. + writeb(DMA_RX_CONTROL2_RESET, &ithc->regs->dma_rx[channel].control2); + lo_hi_writeq(rx->prds.dma_addr, &ithc->regs->dma_rx[channel].addr); + writeb(NUM_RX_BUF - 1, &ithc->regs->dma_rx[channel].num_bufs); + writeb(num_pages - 1, &ithc->regs->dma_rx[channel].num_prds); + u8 head = readb(&ithc->regs->dma_rx[channel].head); -+ if (head) { pci_err(ithc->pci, "head is nonzero (%u)\n", head); return -EIO; } -+ for (unsigned i = 0; i < NUM_RX_BUF; i++) ++ if (head) { ++ pci_err(ithc->pci, "head is nonzero (%u)\n", head); ++ return -EIO; ++ } ++ ++ // Init buffers. ++ for (unsigned int i = 0; i < NUM_RX_BUF; i++) + CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, &rx->bufs[i], i); ++ + writeb(head ^ DMA_RX_WRAP_FLAG, &ithc->regs->dma_rx[channel].tail); + return 0; +} -+void ithc_dma_rx_enable(struct ithc *ithc, u8 channel) { -+ bitsb_set(&ithc->regs->dma_rx[channel].control, DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA); -+ CHECK(waitl, ithc, &ithc->regs->dma_rx[1].status, DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED); ++ ++void ithc_dma_rx_enable(struct ithc *ithc, u8 channel) ++{ ++ bitsb_set(&ithc->regs->dma_rx[channel].control, ++ DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA); ++ CHECK(waitl, ithc, &ithc->regs->dma_rx[channel].status, ++ DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED); +} + -+int ithc_dma_tx_init(struct ithc *ithc) { ++int ithc_dma_tx_init(struct ithc *ithc) ++{ + struct ithc_dma_tx *tx = &ithc->dma_tx; + mutex_init(&tx->mutex); ++ ++ // Allocate buffers. + tx->max_size = DEVCFG_DMA_TX_SIZE(ithc->config.dma_buf_sizes); -+ unsigned num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE; -+ pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n", tx->max_size, num_pages); ++ unsigned int num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE; ++ pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n", ++ tx->max_size, num_pages); + CHECK_RET(ithc_dma_prd_alloc, ithc, &tx->prds, 1, num_pages, DMA_TO_DEVICE); + CHECK_RET(ithc_dma_data_alloc, ithc, &tx->prds, &tx->buf); ++ ++ // Init registers. + lo_hi_writeq(tx->prds.dma_addr, &ithc->regs->dma_tx.addr); + writeb(num_pages - 1, &ithc->regs->dma_tx.num_prds); ++ ++ // Init buffers. + CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); + return 0; +} + -+static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data, u8 channel, u8 buf) { ++static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data, ++ u8 channel, u8 buf) ++{ + if (buf >= NUM_RX_BUF) { + pci_err(ithc->pci, "invalid dma ringbuffer index\n"); + return -EINVAL; + } -+ ithc_set_active(ithc); + u32 len = data->data_size; + struct ithc_dma_rx_header *hdr = data->addr; + u8 *hiddata = (void *)(hdr + 1); -+ if (len >= sizeof *hdr && hdr->code == DMA_RX_CODE_RESET) { ++ if (len >= sizeof(*hdr) && hdr->code == DMA_RX_CODE_RESET) { ++ // The THC sends a reset request when we need to reinitialize the device. ++ // This usually only happens if we send an invalid command or put the device ++ // in a bad state. + CHECK(ithc_reset, ithc); -+ } else if (len < sizeof *hdr || len != sizeof *hdr + hdr->data_size) { ++ } else if (len < sizeof(*hdr) || len != sizeof(*hdr) + hdr->data_size) { + if (hdr->code == DMA_RX_CODE_INPUT_REPORT) { -+ // When the CPU enters a low power state during DMA, we can get truncated messages. -+ // Typically this will be a single touch HID report that is only 1 byte, or a multitouch report that is 257 bytes. ++ // When the CPU enters a low power state during DMA, we can get truncated ++ // messages. For Surface devices, this will typically be a single touch ++ // report that is only 1 byte, or a multitouch report that is 257 bytes. + // See also ithc_set_active(). + } else { -+ pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n", channel, buf, len, hdr->code, hdr->data_size); -+ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0); ++ pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n", ++ channel, buf, len, hdr->code, hdr->data_size); ++ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, ++ hdr, min(len, 0x400u), 0); + } + } else if (hdr->code == DMA_RX_CODE_REPORT_DESCRIPTOR && hdr->data_size > 8) { ++ // Response to a 'get report descriptor' request. ++ // The actual descriptor is preceded by 8 nul bytes. + CHECK(hid_parse_report, ithc->hid, hiddata + 8, hdr->data_size - 8); + WRITE_ONCE(ithc->hid_parse_done, true); + wake_up(&ithc->wait_hid_parse); + } else if (hdr->code == DMA_RX_CODE_INPUT_REPORT) { ++ // Standard HID input report containing touch data. + CHECK(hid_input_report, ithc->hid, HID_INPUT_REPORT, hiddata, hdr->data_size, 1); + } else if (hdr->code == DMA_RX_CODE_FEATURE_REPORT) { ++ // Response to a 'get feature' request. + bool done = false; + mutex_lock(&ithc->hid_get_feature_mutex); + if (ithc->hid_get_feature_buf) { -+ if (hdr->data_size < ithc->hid_get_feature_size) ithc->hid_get_feature_size = hdr->data_size; ++ if (hdr->data_size < ithc->hid_get_feature_size) ++ ithc->hid_get_feature_size = hdr->data_size; + memcpy(ithc->hid_get_feature_buf, hiddata, ithc->hid_get_feature_size); + ithc->hid_get_feature_buf = NULL; + done = true; + } + mutex_unlock(&ithc->hid_get_feature_mutex); -+ if (done) wake_up(&ithc->wait_hid_get_feature); -+ else CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT, hiddata, hdr->data_size, 1); ++ if (done) { ++ wake_up(&ithc->wait_hid_get_feature); ++ } else { ++ // Received data without a matching request, or the request already ++ // timed out. (XXX What's the correct thing to do here?) ++ CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT, ++ hiddata, hdr->data_size, 1); ++ } + } else { -+ pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n", channel, buf, len, hdr->code); -+ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0); ++ pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n", ++ channel, buf, len, hdr->code); ++ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, ++ hdr, min(len, 0x400u), 0); + } + return 0; +} + -+static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) { ++static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) ++{ ++ // Process all filled RX buffers from the ringbuffer. + struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; -+ unsigned n = rx->num_received; ++ unsigned int n = rx->num_received; + u8 head_wrap = readb(&ithc->regs->dma_rx[channel].head); + while (1) { + u8 tail = n % NUM_RX_BUF; @@ -4287,7 +4424,8 @@ index 0000000000000..7e89b3496918d + writeb(tail_wrap, &ithc->regs->dma_rx[channel].tail); + // ringbuffer is full if tail_wrap == head_wrap + // ringbuffer is empty if tail_wrap == head_wrap ^ WRAP_FLAG -+ if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG)) return 0; ++ if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG)) ++ return 0; + + // take the buffer that the device just filled + struct ithc_dma_data_buffer *b = &rx->bufs[n % NUM_RX_BUF]; @@ -4301,7 +4439,8 @@ index 0000000000000..7e89b3496918d + CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, b, tail); + } +} -+int ithc_dma_rx(struct ithc *ithc, u8 channel) { ++int ithc_dma_rx(struct ithc *ithc, u8 channel) ++{ + struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; + mutex_lock(&rx->mutex); + int ret = ithc_dma_rx_unlocked(ithc, channel); @@ -4309,14 +4448,21 @@ index 0000000000000..7e89b3496918d + return ret; +} + -+static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) { ++static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) ++{ ++ ithc_set_active(ithc, 100 * USEC_PER_MSEC); ++ ++ // Send a single TX buffer to the THC. + pci_dbg(ithc->pci, "dma tx command %u, size %u\n", cmdcode, datasize); + struct ithc_dma_tx_header *hdr; ++ // Data must be padded to next 4-byte boundary. + u8 padding = datasize & 3 ? 4 - (datasize & 3) : 0; -+ unsigned fullsize = sizeof *hdr + datasize + padding; -+ if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE) return -EINVAL; ++ unsigned int fullsize = sizeof(*hdr) + datasize + padding; ++ if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE) ++ return -EINVAL; + CHECK_RET(ithc_dma_data_buffer_get, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); + ++ // Fill the TX buffer with header and data. + ithc->dma_tx.buf.data_size = fullsize; + hdr = ithc->dma_tx.buf.addr; + hdr->code = cmdcode; @@ -4324,15 +4470,18 @@ index 0000000000000..7e89b3496918d + u8 *dest = (void *)(hdr + 1); + memcpy(dest, data, datasize); + dest += datasize; -+ for (u8 p = 0; p < padding; p++) *dest++ = 0; ++ for (u8 p = 0; p < padding; p++) ++ *dest++ = 0; + CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); + ++ // Let the THC process the buffer. + bitsb_set(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND); + CHECK_RET(waitb, ithc, &ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND, 0); + writel(DMA_TX_STATUS_DONE, &ithc->regs->dma_tx.status); + return 0; +} -+int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) { ++int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) ++{ + mutex_lock(&ithc->dma_tx.mutex); + int ret = ithc_dma_tx_unlocked(ithc, cmdcode, datasize, data); + mutex_unlock(&ithc->dma_tx.mutex); @@ -4341,10 +4490,12 @@ index 0000000000000..7e89b3496918d + diff --git a/drivers/hid/ithc/ithc-dma.h b/drivers/hid/ithc/ithc-dma.h new file mode 100644 -index 0000000000000..d9f2c19a13f3a +index 000000000000..93652e4476bf --- /dev/null +++ b/drivers/hid/ithc/ithc-dma.h -@@ -0,0 +1,67 @@ +@@ -0,0 +1,69 @@ ++/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ ++ +#define PRD_SIZE_MASK 0xffffff +#define PRD_FLAG_END 0x1000000 +#define PRD_FLAG_SUCCESS 0x2000000 @@ -4406,7 +4557,7 @@ index 0000000000000..d9f2c19a13f3a + struct ithc_dma_data_buffer bufs[NUM_RX_BUF]; +}; + -+int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname); ++int ithc_dma_rx_init(struct ithc *ithc, u8 channel); +void ithc_dma_rx_enable(struct ithc *ithc, u8 channel); +int ithc_dma_tx_init(struct ithc *ithc); +int ithc_dma_rx(struct ithc *ithc, u8 channel); @@ -4414,10 +4565,12 @@ index 0000000000000..d9f2c19a13f3a + diff --git a/drivers/hid/ithc/ithc-main.c b/drivers/hid/ithc/ithc-main.c new file mode 100644 -index 0000000000000..09512b9cb4d31 +index 000000000000..87ed4aa70fda --- /dev/null +++ b/drivers/hid/ithc/ithc-main.c -@@ -0,0 +1,534 @@ +@@ -0,0 +1,728 @@ ++// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause ++ +#include "ithc.h" + +MODULE_DESCRIPTION("Intel Touch Host Controller driver"); @@ -4462,6 +4615,9 @@ index 0000000000000..09512b9cb4d31 + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT2) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT1) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT2) }, ++ // XXX So far the THC seems to be the only Intel PCI device with PCI_CLASS_INPUT_PEN, ++ // so instead of the device list we could just do: ++ // { .vendor = PCI_VENDOR_ID_INTEL, .device = PCI_ANY_ID, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, .class = PCI_CLASS_INPUT_PEN, .class_mask = ~0, }, + {} +}; +MODULE_DEVICE_TABLE(pci, ithc_pci_tbl); @@ -4472,6 +4628,7 @@ index 0000000000000..09512b9cb4d31 +module_param_named(poll, ithc_use_polling, bool, 0); +MODULE_PARM_DESC(poll, "Use polling instead of interrupts"); + ++// Since all known devices seem to use only channel 1, by default we disable channel 0. +static bool ithc_use_rx0 = false; +module_param_named(rx0, ithc_use_rx0, bool, 0); +MODULE_PARM_DESC(rx0, "Use DMA RX channel 0"); @@ -4480,37 +4637,56 @@ index 0000000000000..09512b9cb4d31 +module_param_named(rx1, ithc_use_rx1, bool, 0); +MODULE_PARM_DESC(rx1, "Use DMA RX channel 1"); + -+static bool ithc_log_regs_enabled = false; -+module_param_named(logregs, ithc_log_regs_enabled, bool, 0); -+MODULE_PARM_DESC(logregs, "Log changes in register values (for debugging)"); -+ ++// Values below 250 seem to work well on the SP7+. If this is set too high, you may observe cursor stuttering. ++static int ithc_dma_latency_us = 200; ++module_param_named(dma_latency_us, ithc_dma_latency_us, int, 0); ++MODULE_PARM_DESC(dma_latency_us, "Determines the CPU latency QoS value for DMA transfers (in microseconds), -1 to disable latency QoS"); ++ ++// Values above 1700 seem to work well on the SP7+. If this is set too low, you may observe cursor stuttering. ++static unsigned int ithc_dma_early_us = 2000; ++module_param_named(dma_early_us, ithc_dma_early_us, uint, 0); ++MODULE_PARM_DESC(dma_early_us, "Determines how early the CPU latency QoS value is applied before the next expected IRQ (in microseconds)"); ++ ++static bool ithc_log_regs_enabled = false; ++module_param_named(logregs, ithc_log_regs_enabled, bool, 0); ++MODULE_PARM_DESC(logregs, "Log changes in register values (for debugging)"); ++ +// Sysfs attributes + -+static bool ithc_is_config_valid(struct ithc *ithc) { ++static bool ithc_is_config_valid(struct ithc *ithc) ++{ + return ithc->config.device_id == DEVCFG_DEVICE_ID_TIC; +} + -+static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf) { ++static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf) ++{ + struct ithc *ithc = dev_get_drvdata(dev); -+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; ++ if (!ithc || !ithc_is_config_valid(ithc)) ++ return -ENODEV; + return sprintf(buf, "0x%04x", ithc->config.vendor_id); +} +static DEVICE_ATTR_RO(vendor); -+static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf) { ++static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf) ++{ + struct ithc *ithc = dev_get_drvdata(dev); -+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; ++ if (!ithc || !ithc_is_config_valid(ithc)) ++ return -ENODEV; + return sprintf(buf, "0x%04x", ithc->config.product_id); +} +static DEVICE_ATTR_RO(product); -+static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf) { ++static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf) ++{ + struct ithc *ithc = dev_get_drvdata(dev); -+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; ++ if (!ithc || !ithc_is_config_valid(ithc)) ++ return -ENODEV; + return sprintf(buf, "%u", ithc->config.revision); +} +static DEVICE_ATTR_RO(revision); -+static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) { ++static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) ++{ + struct ithc *ithc = dev_get_drvdata(dev); -+ if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; ++ if (!ithc || !ithc_is_config_valid(ithc)) ++ return -ENODEV; + u32 v = ithc->config.fw_version; + return sprintf(buf, "%i.%i.%i.%i", v >> 24, v >> 16 & 0xff, v >> 8 & 0xff, v & 0xff); +} @@ -4537,45 +4713,75 @@ index 0000000000000..09512b9cb4d31 +static int ithc_hid_open(struct hid_device *hdev) { return 0; } +static void ithc_hid_close(struct hid_device *hdev) { } + -+static int ithc_hid_parse(struct hid_device *hdev) { ++static int ithc_hid_parse(struct hid_device *hdev) ++{ + struct ithc *ithc = hdev->driver_data; + u64 val = 0; + WRITE_ONCE(ithc->hid_parse_done, false); -+ CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof val, &val); -+ if (!wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done), msecs_to_jiffies(1000))) return -ETIMEDOUT; -+ return 0; ++ for (int retries = 0; ; retries++) { ++ CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof(val), &val); ++ if (wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done), ++ msecs_to_jiffies(200))) ++ return 0; ++ if (retries > 5) { ++ pci_err(ithc->pci, "failed to read report descriptor\n"); ++ return -ETIMEDOUT; ++ } ++ pci_warn(ithc->pci, "failed to read report descriptor, retrying\n"); ++ } +} + -+static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, unsigned char rtype, int reqtype) { ++static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, ++ size_t len, unsigned char rtype, int reqtype) ++{ + struct ithc *ithc = hdev->driver_data; -+ if (!buf || !len) return -EINVAL; ++ if (!buf || !len) ++ return -EINVAL; + u32 code; -+ if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_OUTPUT_REPORT; -+ else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_SET_FEATURE; -+ else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) code = DMA_TX_CODE_GET_FEATURE; -+ else { -+ pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n", rtype, reqtype, reportnum); ++ if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) { ++ code = DMA_TX_CODE_OUTPUT_REPORT; ++ } else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) { ++ code = DMA_TX_CODE_SET_FEATURE; ++ } else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) { ++ code = DMA_TX_CODE_GET_FEATURE; ++ } else { ++ pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n", ++ rtype, reqtype, reportnum); + return -EINVAL; + } + buf[0] = reportnum; ++ + if (reqtype == HID_REQ_GET_REPORT) { ++ // Prepare for response. + mutex_lock(&ithc->hid_get_feature_mutex); + ithc->hid_get_feature_buf = buf; + ithc->hid_get_feature_size = len; + mutex_unlock(&ithc->hid_get_feature_mutex); ++ ++ // Transmit 'get feature' request. + int r = CHECK(ithc_dma_tx, ithc, code, 1, buf); + if (!r) { -+ r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature, !ithc->hid_get_feature_buf, msecs_to_jiffies(1000)); -+ if (!r) r = -ETIMEDOUT; -+ else if (r < 0) r = -EINTR; -+ else r = 0; ++ r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature, ++ !ithc->hid_get_feature_buf, msecs_to_jiffies(1000)); ++ if (!r) ++ r = -ETIMEDOUT; ++ else if (r < 0) ++ r = -EINTR; ++ else ++ r = 0; + } ++ ++ // If everything went ok, the buffer has been filled with the response data. ++ // Return the response size. + mutex_lock(&ithc->hid_get_feature_mutex); + ithc->hid_get_feature_buf = NULL; -+ if (!r) r = ithc->hid_get_feature_size; ++ if (!r) ++ r = ithc->hid_get_feature_size; + mutex_unlock(&ithc->hid_get_feature_mutex); + return r; + } ++ ++ // 'Set feature', or 'output report'. These don't have a response. + CHECK_RET(ithc_dma_tx, ithc, code, len, buf); + return 0; +} @@ -4589,17 +4795,22 @@ index 0000000000000..09512b9cb4d31 + .raw_request = ithc_hid_raw_request, +}; + -+static void ithc_hid_devres_release(struct device *dev, void *res) { ++static void ithc_hid_devres_release(struct device *dev, void *res) ++{ + struct hid_device **hidm = res; -+ if (*hidm) hid_destroy_device(*hidm); ++ if (*hidm) ++ hid_destroy_device(*hidm); +} + -+static int ithc_hid_init(struct ithc *ithc) { -+ struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof *hidm, GFP_KERNEL); -+ if (!hidm) return -ENOMEM; ++static int ithc_hid_init(struct ithc *ithc) ++{ ++ struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof(*hidm), GFP_KERNEL); ++ if (!hidm) ++ return -ENOMEM; + devres_add(&ithc->pci->dev, hidm); + struct hid_device *hid = hid_allocate_device(); -+ if (IS_ERR(hid)) return PTR_ERR(hid); ++ if (IS_ERR(hid)) ++ return PTR_ERR(hid); + *hidm = hid; + + strscpy(hid->name, DEVFULLNAME, sizeof(hid->name)); @@ -4618,27 +4829,45 @@ index 0000000000000..09512b9cb4d31 + +// Interrupts/polling + -+static void ithc_activity_timer_callback(struct timer_list *t) { -+ struct ithc *ithc = container_of(t, struct ithc, activity_timer); ++static enum hrtimer_restart ithc_activity_start_timer_callback(struct hrtimer *t) ++{ ++ struct ithc *ithc = container_of(t, struct ithc, activity_start_timer); ++ ithc_set_active(ithc, ithc_dma_early_us * 2 + USEC_PER_MSEC); ++ return HRTIMER_NORESTART; ++} ++ ++static enum hrtimer_restart ithc_activity_end_timer_callback(struct hrtimer *t) ++{ ++ struct ithc *ithc = container_of(t, struct ithc, activity_end_timer); + cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); ++ return HRTIMER_NORESTART; +} + -+void ithc_set_active(struct ithc *ithc) { ++void ithc_set_active(struct ithc *ithc, unsigned int duration_us) ++{ ++ if (ithc_dma_latency_us < 0) ++ return; + // When CPU usage is very low, the CPU can enter various low power states (C2-C10). -+ // This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_UNKNOWN_12 will be set when this happens. -+ // The amount of truncated messages can become very high, resulting in user-visible effects (laggy/stuttering cursor). -+ // To avoid this, we use a CPU latency QoS request to prevent the CPU from entering low power states during touch interactions. -+ cpu_latency_qos_update_request(&ithc->activity_qos, 0); -+ mod_timer(&ithc->activity_timer, jiffies + msecs_to_jiffies(1000)); ++ // This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_RX_TIMEOUT will be ++ // set when this happens. The amount of truncated messages can become very high, resulting ++ // in user-visible effects (laggy/stuttering cursor). To avoid this, we use a CPU latency ++ // QoS request to prevent the CPU from entering low power states during touch interactions. ++ cpu_latency_qos_update_request(&ithc->activity_qos, ithc_dma_latency_us); ++ hrtimer_start_range_ns(&ithc->activity_end_timer, ++ ns_to_ktime(duration_us * NSEC_PER_USEC), duration_us * NSEC_PER_USEC, HRTIMER_MODE_REL); +} + -+static int ithc_set_device_enabled(struct ithc *ithc, bool enable) { -+ u32 x = ithc->config.touch_cfg = (ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2 -+ | (enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0); -+ return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE, offsetof(struct ithc_device_config, touch_cfg), sizeof x, &x); ++static int ithc_set_device_enabled(struct ithc *ithc, bool enable) ++{ ++ u32 x = ithc->config.touch_cfg = ++ (ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2 | ++ (enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0); ++ return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE, ++ offsetof(struct ithc_device_config, touch_cfg), sizeof(x), &x); +} + -+static void ithc_disable_interrupts(struct ithc *ithc) { ++static void ithc_disable_interrupts(struct ithc *ithc) ++{ + writel(0, &ithc->regs->error_control); + bitsb(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_IRQ, 0); + bitsb(&ithc->regs->dma_rx[0].control, DMA_RX_CONTROL_IRQ_UNKNOWN_1 | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_UNKNOWN_4 | DMA_RX_CONTROL_IRQ_DATA, 0); @@ -4646,43 +4875,85 @@ index 0000000000000..09512b9cb4d31 + bitsb(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_IRQ, 0); +} + -+static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned channel) { -+ writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA, &ithc->regs->dma_rx[channel].status); ++static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned int channel) ++{ ++ writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA, ++ &ithc->regs->dma_rx[channel].status); +} + -+static void ithc_clear_interrupts(struct ithc *ithc) { ++static void ithc_clear_interrupts(struct ithc *ithc) ++{ + writel(0xffffffff, &ithc->regs->error_flags); + writel(ERROR_STATUS_DMA | ERROR_STATUS_SPI, &ithc->regs->error_status); + writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); + ithc_clear_dma_rx_interrupts(ithc, 0); + ithc_clear_dma_rx_interrupts(ithc, 1); -+ writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2, &ithc->regs->dma_tx.status); ++ writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2, ++ &ithc->regs->dma_tx.status); +} + -+static void ithc_process(struct ithc *ithc) { ++static void ithc_process(struct ithc *ithc) ++{ + ithc_log_regs(ithc); + -+ // read and clear error bits ++ bool rx0 = ithc_use_rx0 && (readl(&ithc->regs->dma_rx[0].status) & (DMA_RX_STATUS_ERROR | DMA_RX_STATUS_HAVE_DATA)) != 0; ++ bool rx1 = ithc_use_rx1 && (readl(&ithc->regs->dma_rx[1].status) & (DMA_RX_STATUS_ERROR | DMA_RX_STATUS_HAVE_DATA)) != 0; ++ ++ // Track time between DMA rx transfers, so we can try to predict when we need to enable CPU latency QoS for the next transfer ++ ktime_t t = ktime_get(); ++ ktime_t dt = ktime_sub(t, ithc->last_rx_time); ++ if (rx0 || rx1) { ++ ithc->last_rx_time = t; ++ if (dt > ms_to_ktime(100)) { ++ ithc->cur_rx_seq_count = 0; ++ ithc->cur_rx_seq_errors = 0; ++ } ++ ithc->cur_rx_seq_count++; ++ if (!ithc_use_polling && ithc_dma_latency_us >= 0) { ++ // Disable QoS, since the DMA transfer has completed (we re-enable it after a delay below) ++ cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); ++ hrtimer_try_to_cancel(&ithc->activity_end_timer); ++ } ++ } ++ ++ // Read and clear error bits + u32 err = readl(&ithc->regs->error_flags); + if (err) { -+ if (err & ~ERROR_FLAG_DMA_UNKNOWN_12) pci_err(ithc->pci, "error flags: 0x%08x\n", err); + writel(err, &ithc->regs->error_flags); ++ if (err & ~ERROR_FLAG_DMA_RX_TIMEOUT) ++ pci_err(ithc->pci, "error flags: 0x%08x\n", err); ++ if (err & ERROR_FLAG_DMA_RX_TIMEOUT) { ++ // Only log an error if we see a significant number of these errors. ++ ithc->cur_rx_seq_errors++; ++ if (ithc->cur_rx_seq_errors && ithc->cur_rx_seq_errors % 50 == 0 && ithc->cur_rx_seq_errors > ithc->cur_rx_seq_count / 10) ++ pci_err(ithc->pci, "High number of DMA RX timeouts/errors (%u/%u, dt=%lldus). Try adjusting dma_early_us and/or dma_latency_us.\n", ++ ithc->cur_rx_seq_errors, ithc->cur_rx_seq_count, ktime_to_us(dt)); ++ } + } + -+ // process DMA rx ++ // Process DMA rx + if (ithc_use_rx0) { + ithc_clear_dma_rx_interrupts(ithc, 0); -+ ithc_dma_rx(ithc, 0); ++ if (rx0) ++ ithc_dma_rx(ithc, 0); + } + if (ithc_use_rx1) { + ithc_clear_dma_rx_interrupts(ithc, 1); -+ ithc_dma_rx(ithc, 1); ++ if (rx1) ++ ithc_dma_rx(ithc, 1); ++ } ++ ++ // Start timer to re-enable QoS for next rx, but only if we've seen an ERROR_FLAG_DMA_RX_TIMEOUT ++ if ((rx0 || rx1) && !ithc_use_polling && ithc_dma_latency_us >= 0 && ithc->cur_rx_seq_errors > 0) { ++ ktime_t expires = ktime_add(t, ktime_sub_us(dt, ithc_dma_early_us)); ++ hrtimer_start_range_ns(&ithc->activity_start_timer, expires, 10 * NSEC_PER_USEC, HRTIMER_MODE_ABS); + } + + ithc_log_regs(ithc); +} + -+static irqreturn_t ithc_interrupt_thread(int irq, void *arg) { ++static irqreturn_t ithc_interrupt_thread(int irq, void *arg) ++{ + struct ithc *ithc = arg; + pci_dbg(ithc->pci, "IRQ! err=%08x/%08x/%08x, cmd=%02x/%08x, rx0=%02x/%08x, rx1=%02x/%08x, tx=%02x/%08x\n", + readl(&ithc->regs->error_control), readl(&ithc->regs->error_status), readl(&ithc->regs->error_flags), @@ -4694,14 +4965,21 @@ index 0000000000000..09512b9cb4d31 + return IRQ_HANDLED; +} + -+static int ithc_poll_thread(void *arg) { ++static int ithc_poll_thread(void *arg) ++{ + struct ithc *ithc = arg; -+ unsigned sleep = 100; ++ unsigned int sleep = 100; + while (!kthread_should_stop()) { + u32 n = ithc->dma_rx[1].num_received; + ithc_process(ithc); -+ if (n != ithc->dma_rx[1].num_received) sleep = 20; -+ else sleep = min(200u, sleep + (sleep >> 4) + 1); ++ // Decrease polling interval to 20ms if we received data, otherwise slowly ++ // increase it up to 200ms. ++ if (n != ithc->dma_rx[1].num_received) { ++ ithc_set_active(ithc, 100 * USEC_PER_MSEC); ++ sleep = 20; ++ } else { ++ sleep = min(200u, sleep + (sleep >> 4) + 1); ++ } + msleep_interruptible(sleep); + } + return 0; @@ -4709,7 +4987,8 @@ index 0000000000000..09512b9cb4d31 + +// Device initialization and shutdown + -+static void ithc_disable(struct ithc *ithc) { ++static void ithc_disable(struct ithc *ithc) ++{ + bitsl_set(&ithc->regs->control_bits, CONTROL_QUIESCE); + CHECK(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, CONTROL_IS_QUIESCED); + bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0); @@ -4721,81 +5000,112 @@ index 0000000000000..09512b9cb4d31 + ithc_clear_interrupts(ithc); +} + -+static int ithc_init_device(struct ithc *ithc) { ++static int ithc_init_device(struct ithc *ithc) ++{ + ithc_log_regs(ithc); + bool was_enabled = (readl(&ithc->regs->control_bits) & CONTROL_NRESET) != 0; + ithc_disable(ithc); + CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_READY, CONTROL_READY); ++ ++ // Since we don't yet know which SPI config the device wants, use default speed and mode ++ // initially for reading config data. + ithc_set_spi_config(ithc, 10, 0); -+ bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000); // seems to help with reading config + -+ if (was_enabled) if (msleep_interruptible(100)) return -EINTR; ++ // Setting the following bit seems to make reading the config more reliable. ++ bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000); ++ ++ // If the device was previously enabled, wait a bit to make sure it's fully shut down. ++ if (was_enabled) ++ if (msleep_interruptible(100)) ++ return -EINTR; ++ ++ // Take the touch device out of reset. + bitsl(&ithc->regs->control_bits, CONTROL_QUIESCE, 0); + CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, 0); + for (int retries = 0; ; retries++) { + ithc_log_regs(ithc); + bitsl_set(&ithc->regs->control_bits, CONTROL_NRESET); -+ if (!waitl(ithc, &ithc->regs->state, 0xf, 2)) break; ++ if (!waitl(ithc, &ithc->regs->state, 0xf, 2)) ++ break; + if (retries > 5) { -+ pci_err(ithc->pci, "too many retries, failed to reset device\n"); ++ pci_err(ithc->pci, "failed to reset device, state = 0x%08x\n", readl(&ithc->regs->state)); + return -ETIMEDOUT; + } -+ pci_err(ithc->pci, "invalid state, retrying reset\n"); ++ pci_warn(ithc->pci, "invalid state, retrying reset\n"); + bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0); -+ if (msleep_interruptible(1000)) return -EINTR; ++ if (msleep_interruptible(1000)) ++ return -EINTR; + } + ithc_log_regs(ithc); + ++ // Waiting for the following status bit makes reading config much more reliable, ++ // however the official driver does not seem to do this... + CHECK(waitl, ithc, &ithc->regs->dma_rx[0].status, DMA_RX_STATUS_UNKNOWN_4, DMA_RX_STATUS_UNKNOWN_4); + -+ // read config ++ // Read configuration data. + for (int retries = 0; ; retries++) { + ithc_log_regs(ithc); -+ memset(&ithc->config, 0, sizeof ithc->config); -+ CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof ithc->config, &ithc->config); ++ memset(&ithc->config, 0, sizeof(ithc->config)); ++ CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof(ithc->config), &ithc->config); + u32 *p = (void *)&ithc->config; + pci_info(ithc->pci, "config: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", + p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); -+ if (ithc_is_config_valid(ithc)) break; ++ if (ithc_is_config_valid(ithc)) ++ break; + if (retries > 10) { -+ pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n", ithc->config.device_id); ++ pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n", ++ ithc->config.device_id); + return -EIO; + } -+ pci_err(ithc->pci, "failed to read config, retrying\n"); -+ if (msleep_interruptible(100)) return -EINTR; ++ pci_warn(ithc->pci, "failed to read config, retrying\n"); ++ if (msleep_interruptible(100)) ++ return -EINTR; + } + ithc_log_regs(ithc); + -+ CHECK_RET(ithc_set_spi_config, ithc, DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config), DEVCFG_SPI_MODE(ithc->config.spi_config)); ++ // Apply SPI config and enable touch device. ++ CHECK_RET(ithc_set_spi_config, ithc, ++ DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config), ++ DEVCFG_SPI_MODE(ithc->config.spi_config)); + CHECK_RET(ithc_set_device_enabled, ithc, true); + ithc_log_regs(ithc); + return 0; +} + -+int ithc_reset(struct ithc *ithc) { -+ // FIXME This should probably do devres_release_group()+ithc_start(). But because this is called during DMA -+ // processing, that would have to be done asynchronously (schedule_work()?). And with extra locking? ++int ithc_reset(struct ithc *ithc) ++{ ++ // FIXME This should probably do devres_release_group()+ithc_start(). ++ // But because this is called during DMA processing, that would have to be done ++ // asynchronously (schedule_work()?). And with extra locking? + pci_err(ithc->pci, "reset\n"); + CHECK(ithc_init_device, ithc); -+ if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0); -+ if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1); ++ if (ithc_use_rx0) ++ ithc_dma_rx_enable(ithc, 0); ++ if (ithc_use_rx1) ++ ithc_dma_rx_enable(ithc, 1); + ithc_log_regs(ithc); + pci_dbg(ithc->pci, "reset completed\n"); + return 0; +} + -+static void ithc_stop(void *res) { ++static void ithc_stop(void *res) ++{ + struct ithc *ithc = res; + pci_dbg(ithc->pci, "stopping\n"); + ithc_log_regs(ithc); -+ if (ithc->poll_thread) CHECK(kthread_stop, ithc->poll_thread); -+ if (ithc->irq >= 0) disable_irq(ithc->irq); ++ ++ if (ithc->poll_thread) ++ CHECK(kthread_stop, ithc->poll_thread); ++ if (ithc->irq >= 0) ++ disable_irq(ithc->irq); + CHECK(ithc_set_device_enabled, ithc, false); + ithc_disable(ithc); -+ del_timer_sync(&ithc->activity_timer); ++ hrtimer_cancel(&ithc->activity_start_timer); ++ hrtimer_cancel(&ithc->activity_end_timer); + cpu_latency_qos_remove_request(&ithc->activity_qos); -+ // clear dma config -+ for(unsigned i = 0; i < 2; i++) { ++ ++ // Clear DMA config. ++ for (unsigned int i = 0; i < 2; i++) { + CHECK(waitl, ithc, &ithc->regs->dma_rx[i].status, DMA_RX_STATUS_ENABLED, 0); + lo_hi_writeq(0, &ithc->regs->dma_rx[i].addr); + writeb(0, &ithc->regs->dma_rx[i].num_bufs); @@ -4803,35 +5113,43 @@ index 0000000000000..09512b9cb4d31 + } + lo_hi_writeq(0, &ithc->regs->dma_tx.addr); + writeb(0, &ithc->regs->dma_tx.num_prds); ++ + ithc_log_regs(ithc); + pci_dbg(ithc->pci, "stopped\n"); +} + -+static void ithc_clear_drvdata(void *res) { ++static void ithc_clear_drvdata(void *res) ++{ + struct pci_dev *pci = res; + pci_set_drvdata(pci, NULL); +} + -+static int ithc_start(struct pci_dev *pci) { ++static int ithc_start(struct pci_dev *pci) ++{ + pci_dbg(pci, "starting\n"); + if (pci_get_drvdata(pci)) { + pci_err(pci, "device already initialized\n"); + return -EINVAL; + } -+ if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL)) return -ENOMEM; ++ if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL)) ++ return -ENOMEM; + -+ struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof *ithc, GFP_KERNEL); -+ if (!ithc) return -ENOMEM; ++ // Allocate/init main driver struct. ++ struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof(*ithc), GFP_KERNEL); ++ if (!ithc) ++ return -ENOMEM; + ithc->irq = -1; + ithc->pci = pci; -+ snprintf(ithc->phys, sizeof ithc->phys, "pci-%s/" DEVNAME, pci_name(pci)); ++ snprintf(ithc->phys, sizeof(ithc->phys), "pci-%s/" DEVNAME, pci_name(pci)); + init_waitqueue_head(&ithc->wait_hid_parse); + init_waitqueue_head(&ithc->wait_hid_get_feature); + mutex_init(&ithc->hid_get_feature_mutex); + pci_set_drvdata(pci, ithc); + CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_clear_drvdata, pci); -+ if (ithc_log_regs_enabled) ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof *ithc->prev_regs, GFP_KERNEL); ++ if (ithc_log_regs_enabled) ++ ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof(*ithc->prev_regs), GFP_KERNEL); + ++ // PCI initialization. + CHECK_RET(pcim_enable_device, pci); + pci_set_master(pci); + CHECK_RET(pcim_iomap_regions, pci, BIT(0), DEVNAME " regs"); @@ -4839,29 +5157,39 @@ index 0000000000000..09512b9cb4d31 + CHECK_RET(pci_set_power_state, pci, PCI_D0); + ithc->regs = pcim_iomap_table(pci)[0]; + ++ // Allocate IRQ. + if (!ithc_use_polling) { + CHECK_RET(pci_alloc_irq_vectors, pci, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX); + ithc->irq = CHECK(pci_irq_vector, pci, 0); -+ if (ithc->irq < 0) return ithc->irq; ++ if (ithc->irq < 0) ++ return ithc->irq; + } + ++ // Initialize THC and touch device. + CHECK_RET(ithc_init_device, ithc); + CHECK(devm_device_add_groups, &pci->dev, ithc_attribute_groups); -+ if (ithc_use_rx0) CHECK_RET(ithc_dma_rx_init, ithc, 0, ithc_use_rx1 ? DEVNAME "0" : DEVNAME); -+ if (ithc_use_rx1) CHECK_RET(ithc_dma_rx_init, ithc, 1, ithc_use_rx0 ? DEVNAME "1" : DEVNAME); ++ if (ithc_use_rx0) ++ CHECK_RET(ithc_dma_rx_init, ithc, 0); ++ if (ithc_use_rx1) ++ CHECK_RET(ithc_dma_rx_init, ithc, 1); + CHECK_RET(ithc_dma_tx_init, ithc); + -+ CHECK_RET(ithc_hid_init, ithc); -+ + cpu_latency_qos_add_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); -+ timer_setup(&ithc->activity_timer, ithc_activity_timer_callback, 0); ++ hrtimer_init(&ithc->activity_start_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); ++ ithc->activity_start_timer.function = ithc_activity_start_timer_callback; ++ hrtimer_init(&ithc->activity_end_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ ithc->activity_end_timer.function = ithc_activity_end_timer_callback; + -+ // add ithc_stop callback AFTER setting up DMA buffers, so that polling/irqs/DMA are disabled BEFORE the buffers are freed ++ // Add ithc_stop() callback AFTER setting up DMA buffers, so that polling/irqs/DMA are ++ // disabled BEFORE the buffers are freed. + CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_stop, ithc); + ++ CHECK_RET(ithc_hid_init, ithc); ++ ++ // Start polling/IRQ. + if (ithc_use_polling) { + pci_info(pci, "using polling instead of irq\n"); -+ // use a thread instead of simple timer because we want to be able to sleep ++ // Use a thread instead of simple timer because we want to be able to sleep. + ithc->poll_thread = kthread_run(ithc_poll_thread, ithc, DEVNAME "poll"); + if (IS_ERR(ithc->poll_thread)) { + int err = PTR_ERR(ithc->poll_thread); @@ -4869,13 +5197,17 @@ index 0000000000000..09512b9cb4d31 + return err; + } + } else { -+ CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL, ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc); ++ CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL, ++ ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc); + } + -+ if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0); -+ if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1); ++ if (ithc_use_rx0) ++ ithc_dma_rx_enable(ithc, 0); ++ if (ithc_use_rx1) ++ ithc_dma_rx_enable(ithc, 1); + -+ // hid_add_device can only be called after irq/polling is started and DMA is enabled, because it calls ithc_hid_parse which reads the report descriptor via DMA ++ // hid_add_device() can only be called after irq/polling is started and DMA is enabled, ++ // because it calls ithc_hid_parse() which reads the report descriptor via DMA. + CHECK_RET(hid_add_device, ithc->hid); + + CHECK(ithc_debug_init, ithc); @@ -4884,43 +5216,54 @@ index 0000000000000..09512b9cb4d31 + return 0; +} + -+static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id) { ++static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id) ++{ + pci_dbg(pci, "device probe\n"); + return ithc_start(pci); +} + -+static void ithc_remove(struct pci_dev *pci) { ++static void ithc_remove(struct pci_dev *pci) ++{ + pci_dbg(pci, "device remove\n"); + // all cleanup is handled by devres +} + -+static int ithc_suspend(struct device *dev) { ++// For suspend/resume, we just deinitialize and reinitialize everything. ++// TODO It might be cleaner to keep the HID device around, however we would then have to signal ++// to userspace that the touch device has lost state and userspace needs to e.g. resend 'set ++// feature' requests. Hidraw does not seem to have a facility to do that. ++static int ithc_suspend(struct device *dev) ++{ + struct pci_dev *pci = to_pci_dev(dev); + pci_dbg(pci, "pm suspend\n"); + devres_release_group(dev, ithc_start); + return 0; +} + -+static int ithc_resume(struct device *dev) { ++static int ithc_resume(struct device *dev) ++{ + struct pci_dev *pci = to_pci_dev(dev); + pci_dbg(pci, "pm resume\n"); + return ithc_start(pci); +} + -+static int ithc_freeze(struct device *dev) { ++static int ithc_freeze(struct device *dev) ++{ + struct pci_dev *pci = to_pci_dev(dev); + pci_dbg(pci, "pm freeze\n"); + devres_release_group(dev, ithc_start); + return 0; +} + -+static int ithc_thaw(struct device *dev) { ++static int ithc_thaw(struct device *dev) ++{ + struct pci_dev *pci = to_pci_dev(dev); + pci_dbg(pci, "pm thaw\n"); + return ithc_start(pci); +} + -+static int ithc_restore(struct device *dev) { ++static int ithc_restore(struct device *dev) ++{ + struct pci_dev *pci = to_pci_dev(dev); + pci_dbg(pci, "pm restore\n"); + return ithc_start(pci); @@ -4941,11 +5284,13 @@ index 0000000000000..09512b9cb4d31 + //.dev_groups = ithc_attribute_groups, // could use this (since 5.14), however the attributes won't have valid values until config has been read anyway +}; + -+static int __init ithc_init(void) { ++static int __init ithc_init(void) ++{ + return pci_register_driver(&ithc_driver); +} + -+static void __exit ithc_exit(void) { ++static void __exit ithc_exit(void) ++{ + pci_unregister_driver(&ithc_driver); +} + @@ -4954,80 +5299,114 @@ index 0000000000000..09512b9cb4d31 + diff --git a/drivers/hid/ithc/ithc-regs.c b/drivers/hid/ithc/ithc-regs.c new file mode 100644 -index 0000000000000..85d567b05761f +index 000000000000..e058721886e3 --- /dev/null +++ b/drivers/hid/ithc/ithc-regs.c -@@ -0,0 +1,64 @@ +@@ -0,0 +1,96 @@ ++// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause ++ +#include "ithc.h" + +#define reg_num(r) (0x1fff & (u16)(__force u64)(r)) + -+void bitsl(__iomem u32 *reg, u32 mask, u32 val) { -+ if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask); ++void bitsl(__iomem u32 *reg, u32 mask, u32 val) ++{ ++ if (val & ~mask) ++ pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", ++ reg_num(reg), val, mask); + writel((readl(reg) & ~mask) | (val & mask), reg); +} + -+void bitsb(__iomem u8 *reg, u8 mask, u8 val) { -+ if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask); ++void bitsb(__iomem u8 *reg, u8 mask, u8 val) ++{ ++ if (val & ~mask) ++ pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", ++ reg_num(reg), val, mask); + writeb((readb(reg) & ~mask) | (val & mask), reg); +} + -+int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val) { -+ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val); ++int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val) ++{ ++ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", ++ reg_num(reg), mask, val); + u32 x; + if (readl_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) { -+ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val); ++ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", ++ reg_num(reg), mask, val); + return -ETIMEDOUT; + } + pci_dbg(ithc->pci, "done waiting\n"); + return 0; +} + -+int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val) { -+ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val); ++int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val) ++{ ++ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", ++ reg_num(reg), mask, val); + u8 x; + if (readb_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) { -+ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val); ++ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", ++ reg_num(reg), mask, val); + return -ETIMEDOUT; + } + pci_dbg(ithc->pci, "done waiting\n"); + return 0; +} + -+int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode) { ++int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode) ++{ + pci_dbg(ithc->pci, "setting SPI speed to %i, mode %i\n", speed, mode); -+ if (mode == 3) mode = 2; ++ if (mode == 3) ++ mode = 2; + bitsl(&ithc->regs->spi_config, + SPI_CONFIG_MODE(0xff) | SPI_CONFIG_SPEED(0xff) | SPI_CONFIG_UNKNOWN_18(0xff) | SPI_CONFIG_SPEED2(0xff), + SPI_CONFIG_MODE(mode) | SPI_CONFIG_SPEED(speed) | SPI_CONFIG_UNKNOWN_18(0) | SPI_CONFIG_SPEED2(speed)); + return 0; +} + -+int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data) { ++int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data) ++{ + pci_dbg(ithc->pci, "SPI command %u, size %u, offset %u\n", command, size, offset); -+ if (size > sizeof ithc->regs->spi_cmd.data) return -EINVAL; ++ if (size > sizeof(ithc->regs->spi_cmd.data)) ++ return -EINVAL; ++ ++ // Wait if the device is still busy. + CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0); ++ // Clear result flags. + writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); ++ ++ // Init SPI command data. + writeb(command, &ithc->regs->spi_cmd.code); + writew(size, &ithc->regs->spi_cmd.size); + writel(offset, &ithc->regs->spi_cmd.offset); + u32 *p = data, n = (size + 3) / 4; -+ for (u32 i = 0; i < n; i++) writel(p[i], &ithc->regs->spi_cmd.data[i]); ++ for (u32 i = 0; i < n; i++) ++ writel(p[i], &ithc->regs->spi_cmd.data[i]); ++ ++ // Start transmission. + bitsb_set(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_SEND); + CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0); -+ if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE) return -EIO; -+ if (readw(&ithc->regs->spi_cmd.size) != size) return -EMSGSIZE; -+ for (u32 i = 0; i < n; i++) p[i] = readl(&ithc->regs->spi_cmd.data[i]); ++ ++ // Read response. ++ if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE) ++ return -EIO; ++ if (readw(&ithc->regs->spi_cmd.size) != size) ++ return -EMSGSIZE; ++ for (u32 i = 0; i < n; i++) ++ p[i] = readl(&ithc->regs->spi_cmd.data[i]); ++ + writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); + return 0; +} + diff --git a/drivers/hid/ithc/ithc-regs.h b/drivers/hid/ithc/ithc-regs.h new file mode 100644 -index 0000000000000..1a96092ed7eed +index 000000000000..d4007d9e2bac --- /dev/null +++ b/drivers/hid/ithc/ithc-regs.h -@@ -0,0 +1,186 @@ +@@ -0,0 +1,189 @@ ++/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ ++ +#define CONTROL_QUIESCE BIT(1) +#define CONTROL_IS_QUIESCED BIT(2) +#define CONTROL_NRESET BIT(3) @@ -5054,7 +5433,7 @@ index 0000000000000..1a96092ed7eed + +#define ERROR_FLAG_DMA_UNKNOWN_9 BIT(9) +#define ERROR_FLAG_DMA_UNKNOWN_10 BIT(10) -+#define ERROR_FLAG_DMA_UNKNOWN_12 BIT(12) // set when we receive a truncated DMA message ++#define ERROR_FLAG_DMA_RX_TIMEOUT BIT(12) // set when we receive a truncated DMA message +#define ERROR_FLAG_DMA_UNKNOWN_13 BIT(13) +#define ERROR_FLAG_SPI_BUS_TURNAROUND BIT(16) +#define ERROR_FLAG_SPI_RESPONSE_TIMEOUT BIT(17) @@ -5097,6 +5476,7 @@ index 0000000000000..1a96092ed7eed +#define DMA_RX_STATUS_HAVE_DATA BIT(5) +#define DMA_RX_STATUS_ENABLED BIT(8) + ++// COUNTER_RESET can be written to counter registers to reset them to zero. However, in some cases this can mess up the THC. +#define COUNTER_RESET BIT(31) + +struct ithc_registers { @@ -5177,15 +5557,15 @@ index 0000000000000..1a96092ed7eed +#define DEVCFG_SPI_MAX_FREQ(x) (((x) >> 1) & 0xf) // high bit = use high speed mode? +#define DEVCFG_SPI_MODE(x) (((x) >> 6) & 3) +#define DEVCFG_SPI_UNKNOWN_8(x) (((x) >> 8) & 0x3f) -+#define DEVCFG_SPI_NEEDS_HEARTBEAT BIT(20) -+#define DEVCFG_SPI_HEARTBEAT_INTERVAL (((x) >> 21) & 7) ++#define DEVCFG_SPI_NEEDS_HEARTBEAT BIT(20) // TODO implement heartbeat ++#define DEVCFG_SPI_HEARTBEAT_INTERVAL(x) (((x) >> 21) & 7) +#define DEVCFG_SPI_UNKNOWN_25 BIT(25) +#define DEVCFG_SPI_UNKNOWN_26 BIT(26) +#define DEVCFG_SPI_UNKNOWN_27 BIT(27) -+#define DEVCFG_SPI_DELAY (((x) >> 28) & 7) -+#define DEVCFG_SPI_USE_EXT_READ_CFG BIT(31) ++#define DEVCFG_SPI_DELAY(x) (((x) >> 28) & 7) // TODO use this ++#define DEVCFG_SPI_USE_EXT_READ_CFG BIT(31) // TODO use this? + -+struct ithc_device_config { ++struct ithc_device_config { // (Example values are from an SP7+.) + u32 _unknown_00; // 00 = 0xe0000402 (0xe0000401 after DMA_RX_CODE_RESET) + u32 _unknown_04; // 04 = 0x00000000 + u32 dma_buf_sizes; // 08 = 0x000a00ff @@ -5196,9 +5576,9 @@ index 0000000000000..1a96092ed7eed + u16 vendor_id; // 1c = 0x045e = Microsoft Corp. + u16 product_id; // 1e = 0x0c1a + u32 revision; // 20 = 0x00000001 -+ u32 fw_version; // 24 = 0x05008a8b = 5.0.138.139 ++ u32 fw_version; // 24 = 0x05008a8b = 5.0.138.139 (this value looks more random on newer devices) + u32 _unknown_28; // 28 = 0x00000000 -+ u32 fw_mode; // 2c = 0x00000000 ++ u32 fw_mode; // 2c = 0x00000000 (for fw update?) + u32 _unknown_30; // 30 = 0x00000000 + u32 _unknown_34; // 34 = 0x0404035e (u8,u8,u8,u8 = version?) + u32 _unknown_38; // 38 = 0x000001c0 (0x000001c1 after DMA_RX_CODE_RESET) @@ -5216,10 +5596,12 @@ index 0000000000000..1a96092ed7eed + diff --git a/drivers/hid/ithc/ithc.h b/drivers/hid/ithc/ithc.h new file mode 100644 -index 0000000000000..6a9b0d480bc15 +index 000000000000..028e55a4ec53 --- /dev/null +++ b/drivers/hid/ithc/ithc.h -@@ -0,0 +1,60 @@ +@@ -0,0 +1,67 @@ ++/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ ++ +#include +#include +#include @@ -5243,7 +5625,7 @@ index 0000000000000..6a9b0d480bc15 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define CHECK(fn, ...) ({ int r = fn(__VA_ARGS__); if (r < 0) pci_err(ithc->pci, "%s: %s failed with %i\n", __func__, #fn, r); r; }) -+#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while(0) ++#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while (0) + +#define NUM_RX_BUF 16 + @@ -5257,8 +5639,13 @@ index 0000000000000..6a9b0d480bc15 + struct pci_dev *pci; + int irq; + struct task_struct *poll_thread; ++ + struct pm_qos_request activity_qos; -+ struct timer_list activity_timer; ++ struct hrtimer activity_start_timer; ++ struct hrtimer activity_end_timer; ++ ktime_t last_rx_time; ++ unsigned int cur_rx_seq_count; ++ unsigned int cur_rx_seq_errors; + + struct hid_device *hid; + bool hid_parse_done; @@ -5271,1610 +5658,19 @@ index 0000000000000..6a9b0d480bc15 + struct ithc_registers __iomem *regs; + struct ithc_registers *prev_regs; // for debugging + struct ithc_device_config config; -+ struct ithc_dma_rx dma_rx[2]; -+ struct ithc_dma_tx dma_tx; -+}; -+ -+int ithc_reset(struct ithc *ithc); -+void ithc_set_active(struct ithc *ithc); -+int ithc_debug_init(struct ithc *ithc); -+void ithc_log_regs(struct ithc *ithc); -+ --- -2.42.0 - -From 9f8d2a0f4012644f56ed8dfd322e575b57e1c208 Mon Sep 17 00:00:00 2001 -From: quo -Date: Mon, 23 Oct 2023 10:15:29 +0200 -Subject: [PATCH] Update ITHC from module repo - -Changes: - - Added some comments and fixed a few checkpatch warnings - - Improved CPU latency QoS handling - - Retry reading the report descriptor on error / timeout - -Based on https://github.com/quo/ithc-linux/commit/0b8b45d9775e756d6bd3a699bfaf9f5bd7b9b10b - -Signed-off-by: Dorian Stoll -Patchset: ithc ---- - drivers/hid/ithc/ithc-debug.c | 94 +++++--- - drivers/hid/ithc/ithc-dma.c | 231 +++++++++++++----- - drivers/hid/ithc/ithc-dma.h | 4 +- - drivers/hid/ithc/ithc-main.c | 430 ++++++++++++++++++++++++---------- - drivers/hid/ithc/ithc-regs.c | 68 ++++-- - drivers/hid/ithc/ithc-regs.h | 19 +- - drivers/hid/ithc/ithc.h | 13 +- - 7 files changed, 623 insertions(+), 236 deletions(-) - -diff --git a/drivers/hid/ithc/ithc-debug.c b/drivers/hid/ithc/ithc-debug.c -index 57bf125c45bd5..1f1f1e33f2e5a 100644 ---- a/drivers/hid/ithc/ithc-debug.c -+++ b/drivers/hid/ithc/ithc-debug.c -@@ -1,10 +1,14 @@ -+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause -+ - #include "ithc.h" - --void ithc_log_regs(struct ithc *ithc) { -- if (!ithc->prev_regs) return; -- u32 __iomem *cur = (__iomem void*)ithc->regs; -- u32 *prev = (void*)ithc->prev_regs; -- for (int i = 1024; i < sizeof *ithc->regs / 4; i++) { -+void ithc_log_regs(struct ithc *ithc) -+{ -+ if (!ithc->prev_regs) -+ return; -+ u32 __iomem *cur = (__iomem void *)ithc->regs; -+ u32 *prev = (void *)ithc->prev_regs; -+ for (int i = 1024; i < sizeof(*ithc->regs) / 4; i++) { - u32 x = readl(cur + i); - if (x != prev[i]) { - pci_info(ithc->pci, "reg %04x: %08x -> %08x\n", i * 4, prev[i], x); -@@ -13,55 +17,79 @@ void ithc_log_regs(struct ithc *ithc) { - } - } - --static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len, loff_t *offset) { -+static ssize_t ithc_debugfs_cmd_write(struct file *f, const char __user *buf, size_t len, -+ loff_t *offset) -+{ -+ // Debug commands consist of a single letter followed by a list of numbers (decimal or -+ // hexadecimal, space-separated). - struct ithc *ithc = file_inode(f)->i_private; - char cmd[256]; -- if (!ithc || !ithc->pci) return -ENODEV; -- if (!len) return -EINVAL; -- if (len >= sizeof cmd) return -EINVAL; -- if (copy_from_user(cmd, buf, len)) return -EFAULT; -+ if (!ithc || !ithc->pci) -+ return -ENODEV; -+ if (!len) -+ return -EINVAL; -+ if (len >= sizeof(cmd)) -+ return -EINVAL; -+ if (copy_from_user(cmd, buf, len)) -+ return -EFAULT; - cmd[len] = 0; -- if (cmd[len-1] == '\n') cmd[len-1] = 0; -+ if (cmd[len-1] == '\n') -+ cmd[len-1] = 0; - pci_info(ithc->pci, "debug command: %s\n", cmd); -+ -+ // Parse the list of arguments into a u32 array. - u32 n = 0; - const char *s = cmd + 1; - u32 a[32]; - while (*s && *s != '\n') { -- if (n >= ARRAY_SIZE(a)) return -EINVAL; -- if (*s++ != ' ') return -EINVAL; -+ if (n >= ARRAY_SIZE(a)) -+ return -EINVAL; -+ if (*s++ != ' ') -+ return -EINVAL; - char *e; - a[n++] = simple_strtoul(s, &e, 0); -- if (e == s) return -EINVAL; -+ if (e == s) -+ return -EINVAL; - s = e; - } - ithc_log_regs(ithc); -- switch(cmd[0]) { -+ -+ // Execute the command. -+ switch (cmd[0]) { - case 'x': // reset - ithc_reset(ithc); - break; - case 'w': // write register: offset mask value -- if (n != 3 || (a[0] & 3)) return -EINVAL; -- pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n", a[0], a[2], a[1]); -+ if (n != 3 || (a[0] & 3)) -+ return -EINVAL; -+ pci_info(ithc->pci, "debug write 0x%04x = 0x%08x (mask 0x%08x)\n", -+ a[0], a[2], a[1]); - bitsl(((__iomem u32 *)ithc->regs) + a[0] / 4, a[1], a[2]); - break; - case 'r': // read register: offset -- if (n != 1 || (a[0] & 3)) return -EINVAL; -- pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0], readl(((__iomem u32 *)ithc->regs) + a[0] / 4)); -+ if (n != 1 || (a[0] & 3)) -+ return -EINVAL; -+ pci_info(ithc->pci, "debug read 0x%04x = 0x%08x\n", a[0], -+ readl(((__iomem u32 *)ithc->regs) + a[0] / 4)); - break; - case 's': // spi command: cmd offset len data... - // read config: s 4 0 64 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // set touch cfg: s 6 12 4 XX -- if (n < 3 || a[2] > (n - 3) * 4) return -EINVAL; -+ if (n < 3 || a[2] > (n - 3) * 4) -+ return -EINVAL; - pci_info(ithc->pci, "debug spi command %u with %u bytes of data\n", a[0], a[2]); - if (!CHECK(ithc_spi_command, ithc, a[0], a[1], a[2], a + 3)) -- for (u32 i = 0; i < (a[2] + 3) / 4; i++) pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]); -+ for (u32 i = 0; i < (a[2] + 3) / 4; i++) -+ pci_info(ithc->pci, "resp %u = 0x%08x\n", i, a[3+i]); - break; - case 'd': // dma command: cmd len data... - // get report descriptor: d 7 8 0 0 - // enable multitouch: d 3 2 0x0105 -- if (n < 2 || a[1] > (n - 2) * 4) return -EINVAL; -+ if (n < 2 || a[1] > (n - 2) * 4) -+ return -EINVAL; - pci_info(ithc->pci, "debug dma command %u with %u bytes of data\n", a[0], a[1]); -- if (ithc_dma_tx(ithc, a[0], a[1], a + 2)) pci_err(ithc->pci, "dma tx failed\n"); -+ if (ithc_dma_tx(ithc, a[0], a[1], a + 2)) -+ pci_err(ithc->pci, "dma tx failed\n"); - break; - default: - return -EINVAL; -@@ -75,21 +103,27 @@ static const struct file_operations ithc_debugfops_cmd = { - .write = ithc_debugfs_cmd_write, - }; - --static void ithc_debugfs_devres_release(struct device *dev, void *res) { -+static void ithc_debugfs_devres_release(struct device *dev, void *res) -+{ - struct dentry **dbgm = res; -- if (*dbgm) debugfs_remove_recursive(*dbgm); -+ if (*dbgm) -+ debugfs_remove_recursive(*dbgm); - } - --int ithc_debug_init(struct ithc *ithc) { -- struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof *dbgm, GFP_KERNEL); -- if (!dbgm) return -ENOMEM; -+int ithc_debug_init(struct ithc *ithc) -+{ -+ struct dentry **dbgm = devres_alloc(ithc_debugfs_devres_release, sizeof(*dbgm), GFP_KERNEL); -+ if (!dbgm) -+ return -ENOMEM; - devres_add(&ithc->pci->dev, dbgm); - struct dentry *dbg = debugfs_create_dir(DEVNAME, NULL); -- if (IS_ERR(dbg)) return PTR_ERR(dbg); -+ if (IS_ERR(dbg)) -+ return PTR_ERR(dbg); - *dbgm = dbg; - - struct dentry *cmd = debugfs_create_file("cmd", 0220, dbg, ithc, &ithc_debugfops_cmd); -- if (IS_ERR(cmd)) return PTR_ERR(cmd); -+ if (IS_ERR(cmd)) -+ return PTR_ERR(cmd); - - return 0; - } -diff --git a/drivers/hid/ithc/ithc-dma.c b/drivers/hid/ithc/ithc-dma.c -index 7e89b3496918d..ffb8689b8a780 100644 ---- a/drivers/hid/ithc/ithc-dma.c -+++ b/drivers/hid/ithc/ithc-dma.c -@@ -1,59 +1,91 @@ -+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause -+ - #include "ithc.h" - --static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p, unsigned num_buffers, unsigned num_pages, enum dma_data_direction dir) { -+// The THC uses tables of PRDs (physical region descriptors) to describe the TX and RX data buffers. -+// Each PRD contains the DMA address and size of a block of DMA memory, and some status flags. -+// This allows each data buffer to consist of multiple non-contiguous blocks of memory. -+ -+static int ithc_dma_prd_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *p, -+ unsigned int num_buffers, unsigned int num_pages, enum dma_data_direction dir) -+{ - p->num_pages = num_pages; - p->dir = dir; -+ // We allocate enough space to have one PRD per data buffer page, however if the data -+ // buffer pages happen to be contiguous, we can describe the buffer using fewer PRDs, so -+ // some will remain unused (which is fine). - p->size = round_up(num_buffers * num_pages * sizeof(struct ithc_phys_region_desc), PAGE_SIZE); - p->addr = dmam_alloc_coherent(&ithc->pci->dev, p->size, &p->dma_addr, GFP_KERNEL); -- if (!p->addr) return -ENOMEM; -- if (p->dma_addr & (PAGE_SIZE - 1)) return -EFAULT; -+ if (!p->addr) -+ return -ENOMEM; -+ if (p->dma_addr & (PAGE_SIZE - 1)) -+ return -EFAULT; - return 0; - } - -+// Devres managed sg_table wrapper. - struct ithc_sg_table { - void *addr; - struct sg_table sgt; - enum dma_data_direction dir; - }; --static void ithc_dma_sgtable_free(struct sg_table *sgt) { -+static void ithc_dma_sgtable_free(struct sg_table *sgt) -+{ - struct scatterlist *sg; - int i; - for_each_sgtable_sg(sgt, sg, i) { - struct page *p = sg_page(sg); -- if (p) __free_page(p); -+ if (p) -+ __free_page(p); - } - sg_free_table(sgt); - } --static void ithc_dma_data_devres_release(struct device *dev, void *res) { -+static void ithc_dma_data_devres_release(struct device *dev, void *res) -+{ - struct ithc_sg_table *sgt = res; -- if (sgt->addr) vunmap(sgt->addr); -+ if (sgt->addr) -+ vunmap(sgt->addr); - dma_unmap_sgtable(dev, &sgt->sgt, sgt->dir, 0); - ithc_dma_sgtable_free(&sgt->sgt); - } - --static int ithc_dma_data_alloc(struct ithc* ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b) { -- // We don't use dma_alloc_coherent for data buffers, because they don't have to be contiguous (we can use one PRD per page) or coherent (they are unidirectional). -- // Instead we use an sg_table of individually allocated pages (5.13 has dma_alloc_noncontiguous for this, but we'd like to support 5.10 for now). -+static int ithc_dma_data_alloc(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, -+ struct ithc_dma_data_buffer *b) -+{ -+ // We don't use dma_alloc_coherent() for data buffers, because they don't have to be -+ // coherent (they are unidirectional) or contiguous (we can use one PRD per page). -+ // We could use dma_alloc_noncontiguous(), however this still always allocates a single -+ // DMA mapped segment, which is more restrictive than what we need. -+ // Instead we use an sg_table of individually allocated pages. - struct page *pages[16]; -- if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages)) return -EINVAL; -+ if (prds->num_pages == 0 || prds->num_pages > ARRAY_SIZE(pages)) -+ return -EINVAL; - b->active_idx = -1; -- struct ithc_sg_table *sgt = devres_alloc(ithc_dma_data_devres_release, sizeof *sgt, GFP_KERNEL); -- if (!sgt) return -ENOMEM; -+ struct ithc_sg_table *sgt = devres_alloc( -+ ithc_dma_data_devres_release, sizeof(*sgt), GFP_KERNEL); -+ if (!sgt) -+ return -ENOMEM; - sgt->dir = prds->dir; -+ - if (!sg_alloc_table(&sgt->sgt, prds->num_pages, GFP_KERNEL)) { - struct scatterlist *sg; - int i; - bool ok = true; - for_each_sgtable_sg(&sgt->sgt, sg, i) { -- struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); // don't need __GFP_DMA for PCI DMA -- if (!p) { ok = false; break; } -+ // NOTE: don't need __GFP_DMA for PCI DMA -+ struct page *p = pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); -+ if (!p) { -+ ok = false; -+ break; -+ } - sg_set_page(sg, p, PAGE_SIZE, 0); - } - if (ok && !dma_map_sgtable(&ithc->pci->dev, &sgt->sgt, prds->dir, 0)) { - devres_add(&ithc->pci->dev, sgt); - b->sgt = &sgt->sgt; - b->addr = sgt->addr = vmap(pages, prds->num_pages, 0, PAGE_KERNEL); -- if (!b->addr) return -ENOMEM; -+ if (!b->addr) -+ return -ENOMEM; - return 0; - } - ithc_dma_sgtable_free(&sgt->sgt); -@@ -62,17 +94,29 @@ static int ithc_dma_data_alloc(struct ithc* ithc, struct ithc_dma_prd_buffer *pr - return -ENOMEM; - } - --static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) { -+static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, -+ struct ithc_dma_data_buffer *b, unsigned int idx) -+{ -+ // Give a buffer to the THC. - struct ithc_phys_region_desc *prd = prds->addr; - prd += idx * prds->num_pages; -- if (b->active_idx >= 0) { pci_err(ithc->pci, "buffer already active\n"); return -EINVAL; } -+ if (b->active_idx >= 0) { -+ pci_err(ithc->pci, "buffer already active\n"); -+ return -EINVAL; -+ } - b->active_idx = idx; - if (prds->dir == DMA_TO_DEVICE) { -- if (b->data_size > PAGE_SIZE) return -EINVAL; -+ // TX buffer: Caller should have already filled the data buffer, so just fill -+ // the PRD and flush. -+ // (TODO: Support multi-page TX buffers. So far no device seems to use or need -+ // these though.) -+ if (b->data_size > PAGE_SIZE) -+ return -EINVAL; - prd->addr = sg_dma_address(b->sgt->sgl) >> 10; - prd->size = b->data_size | PRD_FLAG_END; - flush_kernel_vmap_range(b->addr, b->data_size); - } else if (prds->dir == DMA_FROM_DEVICE) { -+ // RX buffer: Reset PRDs. - struct scatterlist *sg; - int i; - for_each_sgtable_dma_sg(b->sgt, sg, i) { -@@ -87,21 +131,34 @@ static int ithc_dma_data_buffer_put(struct ithc *ithc, struct ithc_dma_prd_buffe - return 0; - } - --static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, struct ithc_dma_data_buffer *b, unsigned idx) { -+static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffer *prds, -+ struct ithc_dma_data_buffer *b, unsigned int idx) -+{ -+ // Take a buffer from the THC. - struct ithc_phys_region_desc *prd = prds->addr; - prd += idx * prds->num_pages; -- if (b->active_idx != idx) { pci_err(ithc->pci, "wrong buffer index\n"); return -EINVAL; } -+ // This is purely a sanity check. We don't strictly need the idx parameter for this -+ // function, because it should always be the same as active_idx, unless we have a bug. -+ if (b->active_idx != idx) { -+ pci_err(ithc->pci, "wrong buffer index\n"); -+ return -EINVAL; -+ } - b->active_idx = -1; - if (prds->dir == DMA_FROM_DEVICE) { -+ // RX buffer: Calculate actual received data size from PRDs. - dma_rmb(); // for the prds - b->data_size = 0; - struct scatterlist *sg; - int i; - for_each_sgtable_dma_sg(b->sgt, sg, i) { -- unsigned size = prd->size; -+ unsigned int size = prd->size; - b->data_size += size & PRD_SIZE_MASK; -- if (size & PRD_FLAG_END) break; -- if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) { pci_err(ithc->pci, "truncated prd\n"); break; } -+ if (size & PRD_FLAG_END) -+ break; -+ if ((size & PRD_SIZE_MASK) != sg_dma_len(sg)) { -+ pci_err(ithc->pci, "truncated prd\n"); -+ break; -+ } - prd++; - } - invalidate_kernel_vmap_range(b->addr, b->data_size); -@@ -110,93 +167,139 @@ static int ithc_dma_data_buffer_get(struct ithc *ithc, struct ithc_dma_prd_buffe - return 0; - } - --int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname) { -+int ithc_dma_rx_init(struct ithc *ithc, u8 channel) -+{ - struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; - mutex_init(&rx->mutex); -+ -+ // Allocate buffers. - u32 buf_size = DEVCFG_DMA_RX_SIZE(ithc->config.dma_buf_sizes); -- unsigned num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE; -- pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n", NUM_RX_BUF, buf_size, num_pages); -+ unsigned int num_pages = (buf_size + PAGE_SIZE - 1) / PAGE_SIZE; -+ pci_dbg(ithc->pci, "allocating rx buffers: num = %u, size = %u, pages = %u\n", -+ NUM_RX_BUF, buf_size, num_pages); - CHECK_RET(ithc_dma_prd_alloc, ithc, &rx->prds, NUM_RX_BUF, num_pages, DMA_FROM_DEVICE); -- for (unsigned i = 0; i < NUM_RX_BUF; i++) -+ for (unsigned int i = 0; i < NUM_RX_BUF; i++) - CHECK_RET(ithc_dma_data_alloc, ithc, &rx->prds, &rx->bufs[i]); -+ -+ // Init registers. - writeb(DMA_RX_CONTROL2_RESET, &ithc->regs->dma_rx[channel].control2); - lo_hi_writeq(rx->prds.dma_addr, &ithc->regs->dma_rx[channel].addr); - writeb(NUM_RX_BUF - 1, &ithc->regs->dma_rx[channel].num_bufs); - writeb(num_pages - 1, &ithc->regs->dma_rx[channel].num_prds); - u8 head = readb(&ithc->regs->dma_rx[channel].head); -- if (head) { pci_err(ithc->pci, "head is nonzero (%u)\n", head); return -EIO; } -- for (unsigned i = 0; i < NUM_RX_BUF; i++) -+ if (head) { -+ pci_err(ithc->pci, "head is nonzero (%u)\n", head); -+ return -EIO; -+ } -+ -+ // Init buffers. -+ for (unsigned int i = 0; i < NUM_RX_BUF; i++) - CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, &rx->bufs[i], i); -+ - writeb(head ^ DMA_RX_WRAP_FLAG, &ithc->regs->dma_rx[channel].tail); - return 0; - } --void ithc_dma_rx_enable(struct ithc *ithc, u8 channel) { -- bitsb_set(&ithc->regs->dma_rx[channel].control, DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA); -- CHECK(waitl, ithc, &ithc->regs->dma_rx[1].status, DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED); -+ -+void ithc_dma_rx_enable(struct ithc *ithc, u8 channel) -+{ -+ bitsb_set(&ithc->regs->dma_rx[channel].control, -+ DMA_RX_CONTROL_ENABLE | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_DATA); -+ CHECK(waitl, ithc, &ithc->regs->dma_rx[channel].status, -+ DMA_RX_STATUS_ENABLED, DMA_RX_STATUS_ENABLED); - } - --int ithc_dma_tx_init(struct ithc *ithc) { -+int ithc_dma_tx_init(struct ithc *ithc) -+{ - struct ithc_dma_tx *tx = &ithc->dma_tx; - mutex_init(&tx->mutex); -+ -+ // Allocate buffers. - tx->max_size = DEVCFG_DMA_TX_SIZE(ithc->config.dma_buf_sizes); -- unsigned num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE; -- pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n", tx->max_size, num_pages); -+ unsigned int num_pages = (tx->max_size + PAGE_SIZE - 1) / PAGE_SIZE; -+ pci_dbg(ithc->pci, "allocating tx buffers: size = %u, pages = %u\n", -+ tx->max_size, num_pages); - CHECK_RET(ithc_dma_prd_alloc, ithc, &tx->prds, 1, num_pages, DMA_TO_DEVICE); - CHECK_RET(ithc_dma_data_alloc, ithc, &tx->prds, &tx->buf); -+ -+ // Init registers. - lo_hi_writeq(tx->prds.dma_addr, &ithc->regs->dma_tx.addr); - writeb(num_pages - 1, &ithc->regs->dma_tx.num_prds); -+ -+ // Init buffers. - CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); - return 0; - } - --static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data, u8 channel, u8 buf) { -+static int ithc_dma_rx_process_buf(struct ithc *ithc, struct ithc_dma_data_buffer *data, -+ u8 channel, u8 buf) -+{ - if (buf >= NUM_RX_BUF) { - pci_err(ithc->pci, "invalid dma ringbuffer index\n"); - return -EINVAL; - } -- ithc_set_active(ithc); - u32 len = data->data_size; - struct ithc_dma_rx_header *hdr = data->addr; - u8 *hiddata = (void *)(hdr + 1); -- if (len >= sizeof *hdr && hdr->code == DMA_RX_CODE_RESET) { -+ if (len >= sizeof(*hdr) && hdr->code == DMA_RX_CODE_RESET) { -+ // The THC sends a reset request when we need to reinitialize the device. -+ // This usually only happens if we send an invalid command or put the device -+ // in a bad state. - CHECK(ithc_reset, ithc); -- } else if (len < sizeof *hdr || len != sizeof *hdr + hdr->data_size) { -+ } else if (len < sizeof(*hdr) || len != sizeof(*hdr) + hdr->data_size) { - if (hdr->code == DMA_RX_CODE_INPUT_REPORT) { -- // When the CPU enters a low power state during DMA, we can get truncated messages. -- // Typically this will be a single touch HID report that is only 1 byte, or a multitouch report that is 257 bytes. -+ // When the CPU enters a low power state during DMA, we can get truncated -+ // messages. For Surface devices, this will typically be a single touch -+ // report that is only 1 byte, or a multitouch report that is 257 bytes. - // See also ithc_set_active(). - } else { -- pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n", channel, buf, len, hdr->code, hdr->data_size); -- print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0); -+ pci_err(ithc->pci, "invalid dma rx data! channel %u, buffer %u, size %u, code %u, data size %u\n", -+ channel, buf, len, hdr->code, hdr->data_size); -+ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, -+ hdr, min(len, 0x400u), 0); - } - } else if (hdr->code == DMA_RX_CODE_REPORT_DESCRIPTOR && hdr->data_size > 8) { -+ // Response to a 'get report descriptor' request. -+ // The actual descriptor is preceded by 8 nul bytes. - CHECK(hid_parse_report, ithc->hid, hiddata + 8, hdr->data_size - 8); - WRITE_ONCE(ithc->hid_parse_done, true); - wake_up(&ithc->wait_hid_parse); - } else if (hdr->code == DMA_RX_CODE_INPUT_REPORT) { -+ // Standard HID input report containing touch data. - CHECK(hid_input_report, ithc->hid, HID_INPUT_REPORT, hiddata, hdr->data_size, 1); - } else if (hdr->code == DMA_RX_CODE_FEATURE_REPORT) { -+ // Response to a 'get feature' request. - bool done = false; - mutex_lock(&ithc->hid_get_feature_mutex); - if (ithc->hid_get_feature_buf) { -- if (hdr->data_size < ithc->hid_get_feature_size) ithc->hid_get_feature_size = hdr->data_size; -+ if (hdr->data_size < ithc->hid_get_feature_size) -+ ithc->hid_get_feature_size = hdr->data_size; - memcpy(ithc->hid_get_feature_buf, hiddata, ithc->hid_get_feature_size); - ithc->hid_get_feature_buf = NULL; - done = true; - } - mutex_unlock(&ithc->hid_get_feature_mutex); -- if (done) wake_up(&ithc->wait_hid_get_feature); -- else CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT, hiddata, hdr->data_size, 1); -+ if (done) { -+ wake_up(&ithc->wait_hid_get_feature); -+ } else { -+ // Received data without a matching request, or the request already -+ // timed out. (XXX What's the correct thing to do here?) -+ CHECK(hid_input_report, ithc->hid, HID_FEATURE_REPORT, -+ hiddata, hdr->data_size, 1); -+ } - } else { -- pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n", channel, buf, len, hdr->code); -- print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, hdr, min(len, 0x400u), 0); -+ pci_dbg(ithc->pci, "unhandled dma rx data! channel %u, buffer %u, size %u, code %u\n", -+ channel, buf, len, hdr->code); -+ print_hex_dump_debug(DEVNAME " data: ", DUMP_PREFIX_OFFSET, 32, 1, -+ hdr, min(len, 0x400u), 0); - } - return 0; - } - --static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) { -+static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) -+{ -+ // Process all filled RX buffers from the ringbuffer. - struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; -- unsigned n = rx->num_received; -+ unsigned int n = rx->num_received; - u8 head_wrap = readb(&ithc->regs->dma_rx[channel].head); - while (1) { - u8 tail = n % NUM_RX_BUF; -@@ -204,7 +307,8 @@ static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) { - writeb(tail_wrap, &ithc->regs->dma_rx[channel].tail); - // ringbuffer is full if tail_wrap == head_wrap - // ringbuffer is empty if tail_wrap == head_wrap ^ WRAP_FLAG -- if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG)) return 0; -+ if (tail_wrap == (head_wrap ^ DMA_RX_WRAP_FLAG)) -+ return 0; - - // take the buffer that the device just filled - struct ithc_dma_data_buffer *b = &rx->bufs[n % NUM_RX_BUF]; -@@ -218,7 +322,8 @@ static int ithc_dma_rx_unlocked(struct ithc *ithc, u8 channel) { - CHECK_RET(ithc_dma_data_buffer_put, ithc, &rx->prds, b, tail); - } - } --int ithc_dma_rx(struct ithc *ithc, u8 channel) { -+int ithc_dma_rx(struct ithc *ithc, u8 channel) -+{ - struct ithc_dma_rx *rx = &ithc->dma_rx[channel]; - mutex_lock(&rx->mutex); - int ret = ithc_dma_rx_unlocked(ithc, channel); -@@ -226,14 +331,21 @@ int ithc_dma_rx(struct ithc *ithc, u8 channel) { - return ret; - } - --static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) { -+static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) -+{ -+ ithc_set_active(ithc, 100 * USEC_PER_MSEC); -+ -+ // Send a single TX buffer to the THC. - pci_dbg(ithc->pci, "dma tx command %u, size %u\n", cmdcode, datasize); - struct ithc_dma_tx_header *hdr; -+ // Data must be padded to next 4-byte boundary. - u8 padding = datasize & 3 ? 4 - (datasize & 3) : 0; -- unsigned fullsize = sizeof *hdr + datasize + padding; -- if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE) return -EINVAL; -+ unsigned int fullsize = sizeof(*hdr) + datasize + padding; -+ if (fullsize > ithc->dma_tx.max_size || fullsize > PAGE_SIZE) -+ return -EINVAL; - CHECK_RET(ithc_dma_data_buffer_get, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); - -+ // Fill the TX buffer with header and data. - ithc->dma_tx.buf.data_size = fullsize; - hdr = ithc->dma_tx.buf.addr; - hdr->code = cmdcode; -@@ -241,15 +353,18 @@ static int ithc_dma_tx_unlocked(struct ithc *ithc, u32 cmdcode, u32 datasize, vo - u8 *dest = (void *)(hdr + 1); - memcpy(dest, data, datasize); - dest += datasize; -- for (u8 p = 0; p < padding; p++) *dest++ = 0; -+ for (u8 p = 0; p < padding; p++) -+ *dest++ = 0; - CHECK_RET(ithc_dma_data_buffer_put, ithc, &ithc->dma_tx.prds, &ithc->dma_tx.buf, 0); - -+ // Let the THC process the buffer. - bitsb_set(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND); - CHECK_RET(waitb, ithc, &ithc->regs->dma_tx.control, DMA_TX_CONTROL_SEND, 0); - writel(DMA_TX_STATUS_DONE, &ithc->regs->dma_tx.status); - return 0; - } --int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) { -+int ithc_dma_tx(struct ithc *ithc, u32 cmdcode, u32 datasize, void *data) -+{ - mutex_lock(&ithc->dma_tx.mutex); - int ret = ithc_dma_tx_unlocked(ithc, cmdcode, datasize, data); - mutex_unlock(&ithc->dma_tx.mutex); -diff --git a/drivers/hid/ithc/ithc-dma.h b/drivers/hid/ithc/ithc-dma.h -index d9f2c19a13f3a..93652e4476bf8 100644 ---- a/drivers/hid/ithc/ithc-dma.h -+++ b/drivers/hid/ithc/ithc-dma.h -@@ -1,3 +1,5 @@ -+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ -+ - #define PRD_SIZE_MASK 0xffffff - #define PRD_FLAG_END 0x1000000 - #define PRD_FLAG_SUCCESS 0x2000000 -@@ -59,7 +61,7 @@ struct ithc_dma_rx { - struct ithc_dma_data_buffer bufs[NUM_RX_BUF]; - }; - --int ithc_dma_rx_init(struct ithc *ithc, u8 channel, const char *devname); -+int ithc_dma_rx_init(struct ithc *ithc, u8 channel); - void ithc_dma_rx_enable(struct ithc *ithc, u8 channel); - int ithc_dma_tx_init(struct ithc *ithc); - int ithc_dma_rx(struct ithc *ithc, u8 channel); -diff --git a/drivers/hid/ithc/ithc-main.c b/drivers/hid/ithc/ithc-main.c -index 09512b9cb4d31..87ed4aa70fda0 100644 ---- a/drivers/hid/ithc/ithc-main.c -+++ b/drivers/hid/ithc/ithc-main.c -@@ -1,3 +1,5 @@ -+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause -+ - #include "ithc.h" - - MODULE_DESCRIPTION("Intel Touch Host Controller driver"); -@@ -42,6 +44,9 @@ static const struct pci_device_id ithc_pci_tbl[] = { - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_RPL_S_PORT2) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT1) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_THC_MTL_PORT2) }, -+ // XXX So far the THC seems to be the only Intel PCI device with PCI_CLASS_INPUT_PEN, -+ // so instead of the device list we could just do: -+ // { .vendor = PCI_VENDOR_ID_INTEL, .device = PCI_ANY_ID, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, .class = PCI_CLASS_INPUT_PEN, .class_mask = ~0, }, - {} - }; - MODULE_DEVICE_TABLE(pci, ithc_pci_tbl); -@@ -52,6 +57,7 @@ static bool ithc_use_polling = false; - module_param_named(poll, ithc_use_polling, bool, 0); - MODULE_PARM_DESC(poll, "Use polling instead of interrupts"); - -+// Since all known devices seem to use only channel 1, by default we disable channel 0. - static bool ithc_use_rx0 = false; - module_param_named(rx0, ithc_use_rx0, bool, 0); - MODULE_PARM_DESC(rx0, "Use DMA RX channel 0"); -@@ -60,37 +66,56 @@ static bool ithc_use_rx1 = true; - module_param_named(rx1, ithc_use_rx1, bool, 0); - MODULE_PARM_DESC(rx1, "Use DMA RX channel 1"); - -+// Values below 250 seem to work well on the SP7+. If this is set too high, you may observe cursor stuttering. -+static int ithc_dma_latency_us = 200; -+module_param_named(dma_latency_us, ithc_dma_latency_us, int, 0); -+MODULE_PARM_DESC(dma_latency_us, "Determines the CPU latency QoS value for DMA transfers (in microseconds), -1 to disable latency QoS"); -+ -+// Values above 1700 seem to work well on the SP7+. If this is set too low, you may observe cursor stuttering. -+static unsigned int ithc_dma_early_us = 2000; -+module_param_named(dma_early_us, ithc_dma_early_us, uint, 0); -+MODULE_PARM_DESC(dma_early_us, "Determines how early the CPU latency QoS value is applied before the next expected IRQ (in microseconds)"); -+ - static bool ithc_log_regs_enabled = false; - module_param_named(logregs, ithc_log_regs_enabled, bool, 0); - MODULE_PARM_DESC(logregs, "Log changes in register values (for debugging)"); - - // Sysfs attributes - --static bool ithc_is_config_valid(struct ithc *ithc) { -+static bool ithc_is_config_valid(struct ithc *ithc) -+{ - return ithc->config.device_id == DEVCFG_DEVICE_ID_TIC; - } - --static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf) { -+static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf) -+{ - struct ithc *ithc = dev_get_drvdata(dev); -- if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ if (!ithc || !ithc_is_config_valid(ithc)) -+ return -ENODEV; - return sprintf(buf, "0x%04x", ithc->config.vendor_id); - } - static DEVICE_ATTR_RO(vendor); --static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf) { -+static ssize_t product_show(struct device *dev, struct device_attribute *attr, char *buf) -+{ - struct ithc *ithc = dev_get_drvdata(dev); -- if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ if (!ithc || !ithc_is_config_valid(ithc)) -+ return -ENODEV; - return sprintf(buf, "0x%04x", ithc->config.product_id); - } - static DEVICE_ATTR_RO(product); --static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf) { -+static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf) -+{ - struct ithc *ithc = dev_get_drvdata(dev); -- if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ if (!ithc || !ithc_is_config_valid(ithc)) -+ return -ENODEV; - return sprintf(buf, "%u", ithc->config.revision); - } - static DEVICE_ATTR_RO(revision); --static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) { -+static ssize_t fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) -+{ - struct ithc *ithc = dev_get_drvdata(dev); -- if (!ithc || !ithc_is_config_valid(ithc)) return -ENODEV; -+ if (!ithc || !ithc_is_config_valid(ithc)) -+ return -ENODEV; - u32 v = ithc->config.fw_version; - return sprintf(buf, "%i.%i.%i.%i", v >> 24, v >> 16 & 0xff, v >> 8 & 0xff, v & 0xff); - } -@@ -117,45 +142,75 @@ static void ithc_hid_stop(struct hid_device *hdev) { } - static int ithc_hid_open(struct hid_device *hdev) { return 0; } - static void ithc_hid_close(struct hid_device *hdev) { } - --static int ithc_hid_parse(struct hid_device *hdev) { -+static int ithc_hid_parse(struct hid_device *hdev) -+{ - struct ithc *ithc = hdev->driver_data; - u64 val = 0; - WRITE_ONCE(ithc->hid_parse_done, false); -- CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof val, &val); -- if (!wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done), msecs_to_jiffies(1000))) return -ETIMEDOUT; -- return 0; -+ for (int retries = 0; ; retries++) { -+ CHECK_RET(ithc_dma_tx, ithc, DMA_TX_CODE_GET_REPORT_DESCRIPTOR, sizeof(val), &val); -+ if (wait_event_timeout(ithc->wait_hid_parse, READ_ONCE(ithc->hid_parse_done), -+ msecs_to_jiffies(200))) -+ return 0; -+ if (retries > 5) { -+ pci_err(ithc->pci, "failed to read report descriptor\n"); -+ return -ETIMEDOUT; -+ } -+ pci_warn(ithc->pci, "failed to read report descriptor, retrying\n"); -+ } - } - --static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, unsigned char rtype, int reqtype) { -+static int ithc_hid_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, -+ size_t len, unsigned char rtype, int reqtype) -+{ - struct ithc *ithc = hdev->driver_data; -- if (!buf || !len) return -EINVAL; -+ if (!buf || !len) -+ return -EINVAL; - u32 code; -- if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_OUTPUT_REPORT; -- else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) code = DMA_TX_CODE_SET_FEATURE; -- else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) code = DMA_TX_CODE_GET_FEATURE; -- else { -- pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n", rtype, reqtype, reportnum); -+ if (rtype == HID_OUTPUT_REPORT && reqtype == HID_REQ_SET_REPORT) { -+ code = DMA_TX_CODE_OUTPUT_REPORT; -+ } else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_SET_REPORT) { -+ code = DMA_TX_CODE_SET_FEATURE; -+ } else if (rtype == HID_FEATURE_REPORT && reqtype == HID_REQ_GET_REPORT) { -+ code = DMA_TX_CODE_GET_FEATURE; -+ } else { -+ pci_err(ithc->pci, "unhandled hid request %i %i for report id %i\n", -+ rtype, reqtype, reportnum); - return -EINVAL; - } - buf[0] = reportnum; -+ - if (reqtype == HID_REQ_GET_REPORT) { -+ // Prepare for response. - mutex_lock(&ithc->hid_get_feature_mutex); - ithc->hid_get_feature_buf = buf; - ithc->hid_get_feature_size = len; - mutex_unlock(&ithc->hid_get_feature_mutex); -+ -+ // Transmit 'get feature' request. - int r = CHECK(ithc_dma_tx, ithc, code, 1, buf); - if (!r) { -- r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature, !ithc->hid_get_feature_buf, msecs_to_jiffies(1000)); -- if (!r) r = -ETIMEDOUT; -- else if (r < 0) r = -EINTR; -- else r = 0; -+ r = wait_event_interruptible_timeout(ithc->wait_hid_get_feature, -+ !ithc->hid_get_feature_buf, msecs_to_jiffies(1000)); -+ if (!r) -+ r = -ETIMEDOUT; -+ else if (r < 0) -+ r = -EINTR; -+ else -+ r = 0; - } -+ -+ // If everything went ok, the buffer has been filled with the response data. -+ // Return the response size. - mutex_lock(&ithc->hid_get_feature_mutex); - ithc->hid_get_feature_buf = NULL; -- if (!r) r = ithc->hid_get_feature_size; -+ if (!r) -+ r = ithc->hid_get_feature_size; - mutex_unlock(&ithc->hid_get_feature_mutex); - return r; - } -+ -+ // 'Set feature', or 'output report'. These don't have a response. - CHECK_RET(ithc_dma_tx, ithc, code, len, buf); - return 0; - } -@@ -169,17 +224,22 @@ static struct hid_ll_driver ithc_ll_driver = { - .raw_request = ithc_hid_raw_request, - }; - --static void ithc_hid_devres_release(struct device *dev, void *res) { -+static void ithc_hid_devres_release(struct device *dev, void *res) -+{ - struct hid_device **hidm = res; -- if (*hidm) hid_destroy_device(*hidm); -+ if (*hidm) -+ hid_destroy_device(*hidm); - } - --static int ithc_hid_init(struct ithc *ithc) { -- struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof *hidm, GFP_KERNEL); -- if (!hidm) return -ENOMEM; -+static int ithc_hid_init(struct ithc *ithc) -+{ -+ struct hid_device **hidm = devres_alloc(ithc_hid_devres_release, sizeof(*hidm), GFP_KERNEL); -+ if (!hidm) -+ return -ENOMEM; - devres_add(&ithc->pci->dev, hidm); - struct hid_device *hid = hid_allocate_device(); -- if (IS_ERR(hid)) return PTR_ERR(hid); -+ if (IS_ERR(hid)) -+ return PTR_ERR(hid); - *hidm = hid; - - strscpy(hid->name, DEVFULLNAME, sizeof(hid->name)); -@@ -198,27 +258,45 @@ static int ithc_hid_init(struct ithc *ithc) { - - // Interrupts/polling - --static void ithc_activity_timer_callback(struct timer_list *t) { -- struct ithc *ithc = container_of(t, struct ithc, activity_timer); -- cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); -+static enum hrtimer_restart ithc_activity_start_timer_callback(struct hrtimer *t) -+{ -+ struct ithc *ithc = container_of(t, struct ithc, activity_start_timer); -+ ithc_set_active(ithc, ithc_dma_early_us * 2 + USEC_PER_MSEC); -+ return HRTIMER_NORESTART; - } - --void ithc_set_active(struct ithc *ithc) { -- // When CPU usage is very low, the CPU can enter various low power states (C2-C10). -- // This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_UNKNOWN_12 will be set when this happens. -- // The amount of truncated messages can become very high, resulting in user-visible effects (laggy/stuttering cursor). -- // To avoid this, we use a CPU latency QoS request to prevent the CPU from entering low power states during touch interactions. -- cpu_latency_qos_update_request(&ithc->activity_qos, 0); -- mod_timer(&ithc->activity_timer, jiffies + msecs_to_jiffies(1000)); --} -- --static int ithc_set_device_enabled(struct ithc *ithc, bool enable) { -- u32 x = ithc->config.touch_cfg = (ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2 -- | (enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0); -- return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE, offsetof(struct ithc_device_config, touch_cfg), sizeof x, &x); -+static enum hrtimer_restart ithc_activity_end_timer_callback(struct hrtimer *t) -+{ -+ struct ithc *ithc = container_of(t, struct ithc, activity_end_timer); -+ cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); -+ return HRTIMER_NORESTART; - } - --static void ithc_disable_interrupts(struct ithc *ithc) { -+void ithc_set_active(struct ithc *ithc, unsigned int duration_us) -+{ -+ if (ithc_dma_latency_us < 0) -+ return; -+ // When CPU usage is very low, the CPU can enter various low power states (C2-C10). -+ // This disrupts DMA, causing truncated DMA messages. ERROR_FLAG_DMA_RX_TIMEOUT will be -+ // set when this happens. The amount of truncated messages can become very high, resulting -+ // in user-visible effects (laggy/stuttering cursor). To avoid this, we use a CPU latency -+ // QoS request to prevent the CPU from entering low power states during touch interactions. -+ cpu_latency_qos_update_request(&ithc->activity_qos, ithc_dma_latency_us); -+ hrtimer_start_range_ns(&ithc->activity_end_timer, -+ ns_to_ktime(duration_us * NSEC_PER_USEC), duration_us * NSEC_PER_USEC, HRTIMER_MODE_REL); -+} -+ -+static int ithc_set_device_enabled(struct ithc *ithc, bool enable) -+{ -+ u32 x = ithc->config.touch_cfg = -+ (ithc->config.touch_cfg & ~(u32)DEVCFG_TOUCH_MASK) | DEVCFG_TOUCH_UNKNOWN_2 | -+ (enable ? DEVCFG_TOUCH_ENABLE | DEVCFG_TOUCH_UNKNOWN_3 | DEVCFG_TOUCH_UNKNOWN_4 : 0); -+ return ithc_spi_command(ithc, SPI_CMD_CODE_WRITE, -+ offsetof(struct ithc_device_config, touch_cfg), sizeof(x), &x); -+} -+ -+static void ithc_disable_interrupts(struct ithc *ithc) -+{ - writel(0, &ithc->regs->error_control); - bitsb(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_IRQ, 0); - bitsb(&ithc->regs->dma_rx[0].control, DMA_RX_CONTROL_IRQ_UNKNOWN_1 | DMA_RX_CONTROL_IRQ_ERROR | DMA_RX_CONTROL_IRQ_UNKNOWN_4 | DMA_RX_CONTROL_IRQ_DATA, 0); -@@ -226,43 +304,85 @@ static void ithc_disable_interrupts(struct ithc *ithc) { - bitsb(&ithc->regs->dma_tx.control, DMA_TX_CONTROL_IRQ, 0); - } - --static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned channel) { -- writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA, &ithc->regs->dma_rx[channel].status); -+static void ithc_clear_dma_rx_interrupts(struct ithc *ithc, unsigned int channel) -+{ -+ writel(DMA_RX_STATUS_ERROR | DMA_RX_STATUS_UNKNOWN_4 | DMA_RX_STATUS_HAVE_DATA, -+ &ithc->regs->dma_rx[channel].status); - } - --static void ithc_clear_interrupts(struct ithc *ithc) { -+static void ithc_clear_interrupts(struct ithc *ithc) -+{ - writel(0xffffffff, &ithc->regs->error_flags); - writel(ERROR_STATUS_DMA | ERROR_STATUS_SPI, &ithc->regs->error_status); - writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); - ithc_clear_dma_rx_interrupts(ithc, 0); - ithc_clear_dma_rx_interrupts(ithc, 1); -- writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2, &ithc->regs->dma_tx.status); -+ writel(DMA_TX_STATUS_DONE | DMA_TX_STATUS_ERROR | DMA_TX_STATUS_UNKNOWN_2, -+ &ithc->regs->dma_tx.status); - } - --static void ithc_process(struct ithc *ithc) { -+static void ithc_process(struct ithc *ithc) -+{ - ithc_log_regs(ithc); - -- // read and clear error bits -+ bool rx0 = ithc_use_rx0 && (readl(&ithc->regs->dma_rx[0].status) & (DMA_RX_STATUS_ERROR | DMA_RX_STATUS_HAVE_DATA)) != 0; -+ bool rx1 = ithc_use_rx1 && (readl(&ithc->regs->dma_rx[1].status) & (DMA_RX_STATUS_ERROR | DMA_RX_STATUS_HAVE_DATA)) != 0; -+ -+ // Track time between DMA rx transfers, so we can try to predict when we need to enable CPU latency QoS for the next transfer -+ ktime_t t = ktime_get(); -+ ktime_t dt = ktime_sub(t, ithc->last_rx_time); -+ if (rx0 || rx1) { -+ ithc->last_rx_time = t; -+ if (dt > ms_to_ktime(100)) { -+ ithc->cur_rx_seq_count = 0; -+ ithc->cur_rx_seq_errors = 0; -+ } -+ ithc->cur_rx_seq_count++; -+ if (!ithc_use_polling && ithc_dma_latency_us >= 0) { -+ // Disable QoS, since the DMA transfer has completed (we re-enable it after a delay below) -+ cpu_latency_qos_update_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); -+ hrtimer_try_to_cancel(&ithc->activity_end_timer); -+ } -+ } -+ -+ // Read and clear error bits - u32 err = readl(&ithc->regs->error_flags); - if (err) { -- if (err & ~ERROR_FLAG_DMA_UNKNOWN_12) pci_err(ithc->pci, "error flags: 0x%08x\n", err); - writel(err, &ithc->regs->error_flags); -+ if (err & ~ERROR_FLAG_DMA_RX_TIMEOUT) -+ pci_err(ithc->pci, "error flags: 0x%08x\n", err); -+ if (err & ERROR_FLAG_DMA_RX_TIMEOUT) { -+ // Only log an error if we see a significant number of these errors. -+ ithc->cur_rx_seq_errors++; -+ if (ithc->cur_rx_seq_errors && ithc->cur_rx_seq_errors % 50 == 0 && ithc->cur_rx_seq_errors > ithc->cur_rx_seq_count / 10) -+ pci_err(ithc->pci, "High number of DMA RX timeouts/errors (%u/%u, dt=%lldus). Try adjusting dma_early_us and/or dma_latency_us.\n", -+ ithc->cur_rx_seq_errors, ithc->cur_rx_seq_count, ktime_to_us(dt)); -+ } - } - -- // process DMA rx -+ // Process DMA rx - if (ithc_use_rx0) { - ithc_clear_dma_rx_interrupts(ithc, 0); -- ithc_dma_rx(ithc, 0); -+ if (rx0) -+ ithc_dma_rx(ithc, 0); - } - if (ithc_use_rx1) { - ithc_clear_dma_rx_interrupts(ithc, 1); -- ithc_dma_rx(ithc, 1); -+ if (rx1) -+ ithc_dma_rx(ithc, 1); -+ } -+ -+ // Start timer to re-enable QoS for next rx, but only if we've seen an ERROR_FLAG_DMA_RX_TIMEOUT -+ if ((rx0 || rx1) && !ithc_use_polling && ithc_dma_latency_us >= 0 && ithc->cur_rx_seq_errors > 0) { -+ ktime_t expires = ktime_add(t, ktime_sub_us(dt, ithc_dma_early_us)); -+ hrtimer_start_range_ns(&ithc->activity_start_timer, expires, 10 * NSEC_PER_USEC, HRTIMER_MODE_ABS); - } - - ithc_log_regs(ithc); - } - --static irqreturn_t ithc_interrupt_thread(int irq, void *arg) { -+static irqreturn_t ithc_interrupt_thread(int irq, void *arg) -+{ - struct ithc *ithc = arg; - pci_dbg(ithc->pci, "IRQ! err=%08x/%08x/%08x, cmd=%02x/%08x, rx0=%02x/%08x, rx1=%02x/%08x, tx=%02x/%08x\n", - readl(&ithc->regs->error_control), readl(&ithc->regs->error_status), readl(&ithc->regs->error_flags), -@@ -274,14 +394,21 @@ static irqreturn_t ithc_interrupt_thread(int irq, void *arg) { - return IRQ_HANDLED; - } - --static int ithc_poll_thread(void *arg) { -+static int ithc_poll_thread(void *arg) -+{ - struct ithc *ithc = arg; -- unsigned sleep = 100; -+ unsigned int sleep = 100; - while (!kthread_should_stop()) { - u32 n = ithc->dma_rx[1].num_received; - ithc_process(ithc); -- if (n != ithc->dma_rx[1].num_received) sleep = 20; -- else sleep = min(200u, sleep + (sleep >> 4) + 1); -+ // Decrease polling interval to 20ms if we received data, otherwise slowly -+ // increase it up to 200ms. -+ if (n != ithc->dma_rx[1].num_received) { -+ ithc_set_active(ithc, 100 * USEC_PER_MSEC); -+ sleep = 20; -+ } else { -+ sleep = min(200u, sleep + (sleep >> 4) + 1); -+ } - msleep_interruptible(sleep); - } - return 0; -@@ -289,7 +416,8 @@ static int ithc_poll_thread(void *arg) { - - // Device initialization and shutdown - --static void ithc_disable(struct ithc *ithc) { -+static void ithc_disable(struct ithc *ithc) -+{ - bitsl_set(&ithc->regs->control_bits, CONTROL_QUIESCE); - CHECK(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, CONTROL_IS_QUIESCED); - bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0); -@@ -301,81 +429,112 @@ static void ithc_disable(struct ithc *ithc) { - ithc_clear_interrupts(ithc); - } - --static int ithc_init_device(struct ithc *ithc) { -+static int ithc_init_device(struct ithc *ithc) -+{ - ithc_log_regs(ithc); - bool was_enabled = (readl(&ithc->regs->control_bits) & CONTROL_NRESET) != 0; - ithc_disable(ithc); - CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_READY, CONTROL_READY); -+ -+ // Since we don't yet know which SPI config the device wants, use default speed and mode -+ // initially for reading config data. - ithc_set_spi_config(ithc, 10, 0); -- bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000); // seems to help with reading config - -- if (was_enabled) if (msleep_interruptible(100)) return -EINTR; -+ // Setting the following bit seems to make reading the config more reliable. -+ bitsl_set(&ithc->regs->dma_rx[0].unknown_init_bits, 0x80000000); -+ -+ // If the device was previously enabled, wait a bit to make sure it's fully shut down. -+ if (was_enabled) -+ if (msleep_interruptible(100)) -+ return -EINTR; -+ -+ // Take the touch device out of reset. - bitsl(&ithc->regs->control_bits, CONTROL_QUIESCE, 0); - CHECK_RET(waitl, ithc, &ithc->regs->control_bits, CONTROL_IS_QUIESCED, 0); - for (int retries = 0; ; retries++) { - ithc_log_regs(ithc); - bitsl_set(&ithc->regs->control_bits, CONTROL_NRESET); -- if (!waitl(ithc, &ithc->regs->state, 0xf, 2)) break; -+ if (!waitl(ithc, &ithc->regs->state, 0xf, 2)) -+ break; - if (retries > 5) { -- pci_err(ithc->pci, "too many retries, failed to reset device\n"); -+ pci_err(ithc->pci, "failed to reset device, state = 0x%08x\n", readl(&ithc->regs->state)); - return -ETIMEDOUT; - } -- pci_err(ithc->pci, "invalid state, retrying reset\n"); -+ pci_warn(ithc->pci, "invalid state, retrying reset\n"); - bitsl(&ithc->regs->control_bits, CONTROL_NRESET, 0); -- if (msleep_interruptible(1000)) return -EINTR; -+ if (msleep_interruptible(1000)) -+ return -EINTR; - } - ithc_log_regs(ithc); - -+ // Waiting for the following status bit makes reading config much more reliable, -+ // however the official driver does not seem to do this... - CHECK(waitl, ithc, &ithc->regs->dma_rx[0].status, DMA_RX_STATUS_UNKNOWN_4, DMA_RX_STATUS_UNKNOWN_4); - -- // read config -+ // Read configuration data. - for (int retries = 0; ; retries++) { - ithc_log_regs(ithc); -- memset(&ithc->config, 0, sizeof ithc->config); -- CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof ithc->config, &ithc->config); -+ memset(&ithc->config, 0, sizeof(ithc->config)); -+ CHECK_RET(ithc_spi_command, ithc, SPI_CMD_CODE_READ, 0, sizeof(ithc->config), &ithc->config); - u32 *p = (void *)&ithc->config; - pci_info(ithc->pci, "config: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); -- if (ithc_is_config_valid(ithc)) break; -+ if (ithc_is_config_valid(ithc)) -+ break; - if (retries > 10) { -- pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n", ithc->config.device_id); -+ pci_err(ithc->pci, "failed to read config, unknown device ID 0x%08x\n", -+ ithc->config.device_id); - return -EIO; - } -- pci_err(ithc->pci, "failed to read config, retrying\n"); -- if (msleep_interruptible(100)) return -EINTR; -+ pci_warn(ithc->pci, "failed to read config, retrying\n"); -+ if (msleep_interruptible(100)) -+ return -EINTR; - } - ithc_log_regs(ithc); - -- CHECK_RET(ithc_set_spi_config, ithc, DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config), DEVCFG_SPI_MODE(ithc->config.spi_config)); -+ // Apply SPI config and enable touch device. -+ CHECK_RET(ithc_set_spi_config, ithc, -+ DEVCFG_SPI_MAX_FREQ(ithc->config.spi_config), -+ DEVCFG_SPI_MODE(ithc->config.spi_config)); - CHECK_RET(ithc_set_device_enabled, ithc, true); - ithc_log_regs(ithc); - return 0; - } - --int ithc_reset(struct ithc *ithc) { -- // FIXME This should probably do devres_release_group()+ithc_start(). But because this is called during DMA -- // processing, that would have to be done asynchronously (schedule_work()?). And with extra locking? -+int ithc_reset(struct ithc *ithc) -+{ -+ // FIXME This should probably do devres_release_group()+ithc_start(). -+ // But because this is called during DMA processing, that would have to be done -+ // asynchronously (schedule_work()?). And with extra locking? - pci_err(ithc->pci, "reset\n"); - CHECK(ithc_init_device, ithc); -- if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0); -- if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1); -+ if (ithc_use_rx0) -+ ithc_dma_rx_enable(ithc, 0); -+ if (ithc_use_rx1) -+ ithc_dma_rx_enable(ithc, 1); - ithc_log_regs(ithc); - pci_dbg(ithc->pci, "reset completed\n"); - return 0; - } - --static void ithc_stop(void *res) { -+static void ithc_stop(void *res) -+{ - struct ithc *ithc = res; - pci_dbg(ithc->pci, "stopping\n"); - ithc_log_regs(ithc); -- if (ithc->poll_thread) CHECK(kthread_stop, ithc->poll_thread); -- if (ithc->irq >= 0) disable_irq(ithc->irq); -+ -+ if (ithc->poll_thread) -+ CHECK(kthread_stop, ithc->poll_thread); -+ if (ithc->irq >= 0) -+ disable_irq(ithc->irq); - CHECK(ithc_set_device_enabled, ithc, false); - ithc_disable(ithc); -- del_timer_sync(&ithc->activity_timer); -+ hrtimer_cancel(&ithc->activity_start_timer); -+ hrtimer_cancel(&ithc->activity_end_timer); - cpu_latency_qos_remove_request(&ithc->activity_qos); -- // clear dma config -- for(unsigned i = 0; i < 2; i++) { -+ -+ // Clear DMA config. -+ for (unsigned int i = 0; i < 2; i++) { - CHECK(waitl, ithc, &ithc->regs->dma_rx[i].status, DMA_RX_STATUS_ENABLED, 0); - lo_hi_writeq(0, &ithc->regs->dma_rx[i].addr); - writeb(0, &ithc->regs->dma_rx[i].num_bufs); -@@ -383,35 +542,43 @@ static void ithc_stop(void *res) { - } - lo_hi_writeq(0, &ithc->regs->dma_tx.addr); - writeb(0, &ithc->regs->dma_tx.num_prds); -+ - ithc_log_regs(ithc); - pci_dbg(ithc->pci, "stopped\n"); - } - --static void ithc_clear_drvdata(void *res) { -+static void ithc_clear_drvdata(void *res) -+{ - struct pci_dev *pci = res; - pci_set_drvdata(pci, NULL); - } - --static int ithc_start(struct pci_dev *pci) { -+static int ithc_start(struct pci_dev *pci) -+{ - pci_dbg(pci, "starting\n"); - if (pci_get_drvdata(pci)) { - pci_err(pci, "device already initialized\n"); - return -EINVAL; - } -- if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL)) return -ENOMEM; -+ if (!devres_open_group(&pci->dev, ithc_start, GFP_KERNEL)) -+ return -ENOMEM; - -- struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof *ithc, GFP_KERNEL); -- if (!ithc) return -ENOMEM; -+ // Allocate/init main driver struct. -+ struct ithc *ithc = devm_kzalloc(&pci->dev, sizeof(*ithc), GFP_KERNEL); -+ if (!ithc) -+ return -ENOMEM; - ithc->irq = -1; - ithc->pci = pci; -- snprintf(ithc->phys, sizeof ithc->phys, "pci-%s/" DEVNAME, pci_name(pci)); -+ snprintf(ithc->phys, sizeof(ithc->phys), "pci-%s/" DEVNAME, pci_name(pci)); - init_waitqueue_head(&ithc->wait_hid_parse); - init_waitqueue_head(&ithc->wait_hid_get_feature); - mutex_init(&ithc->hid_get_feature_mutex); - pci_set_drvdata(pci, ithc); - CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_clear_drvdata, pci); -- if (ithc_log_regs_enabled) ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof *ithc->prev_regs, GFP_KERNEL); -+ if (ithc_log_regs_enabled) -+ ithc->prev_regs = devm_kzalloc(&pci->dev, sizeof(*ithc->prev_regs), GFP_KERNEL); - -+ // PCI initialization. - CHECK_RET(pcim_enable_device, pci); - pci_set_master(pci); - CHECK_RET(pcim_iomap_regions, pci, BIT(0), DEVNAME " regs"); -@@ -419,29 +586,39 @@ static int ithc_start(struct pci_dev *pci) { - CHECK_RET(pci_set_power_state, pci, PCI_D0); - ithc->regs = pcim_iomap_table(pci)[0]; - -+ // Allocate IRQ. - if (!ithc_use_polling) { - CHECK_RET(pci_alloc_irq_vectors, pci, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX); - ithc->irq = CHECK(pci_irq_vector, pci, 0); -- if (ithc->irq < 0) return ithc->irq; -+ if (ithc->irq < 0) -+ return ithc->irq; - } - -+ // Initialize THC and touch device. - CHECK_RET(ithc_init_device, ithc); - CHECK(devm_device_add_groups, &pci->dev, ithc_attribute_groups); -- if (ithc_use_rx0) CHECK_RET(ithc_dma_rx_init, ithc, 0, ithc_use_rx1 ? DEVNAME "0" : DEVNAME); -- if (ithc_use_rx1) CHECK_RET(ithc_dma_rx_init, ithc, 1, ithc_use_rx0 ? DEVNAME "1" : DEVNAME); -+ if (ithc_use_rx0) -+ CHECK_RET(ithc_dma_rx_init, ithc, 0); -+ if (ithc_use_rx1) -+ CHECK_RET(ithc_dma_rx_init, ithc, 1); - CHECK_RET(ithc_dma_tx_init, ithc); - -- CHECK_RET(ithc_hid_init, ithc); -- - cpu_latency_qos_add_request(&ithc->activity_qos, PM_QOS_DEFAULT_VALUE); -- timer_setup(&ithc->activity_timer, ithc_activity_timer_callback, 0); -+ hrtimer_init(&ithc->activity_start_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); -+ ithc->activity_start_timer.function = ithc_activity_start_timer_callback; -+ hrtimer_init(&ithc->activity_end_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ ithc->activity_end_timer.function = ithc_activity_end_timer_callback; - -- // add ithc_stop callback AFTER setting up DMA buffers, so that polling/irqs/DMA are disabled BEFORE the buffers are freed -+ // Add ithc_stop() callback AFTER setting up DMA buffers, so that polling/irqs/DMA are -+ // disabled BEFORE the buffers are freed. - CHECK_RET(devm_add_action_or_reset, &pci->dev, ithc_stop, ithc); - -+ CHECK_RET(ithc_hid_init, ithc); -+ -+ // Start polling/IRQ. - if (ithc_use_polling) { - pci_info(pci, "using polling instead of irq\n"); -- // use a thread instead of simple timer because we want to be able to sleep -+ // Use a thread instead of simple timer because we want to be able to sleep. - ithc->poll_thread = kthread_run(ithc_poll_thread, ithc, DEVNAME "poll"); - if (IS_ERR(ithc->poll_thread)) { - int err = PTR_ERR(ithc->poll_thread); -@@ -449,13 +626,17 @@ static int ithc_start(struct pci_dev *pci) { - return err; - } - } else { -- CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL, ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc); -+ CHECK_RET(devm_request_threaded_irq, &pci->dev, ithc->irq, NULL, -+ ithc_interrupt_thread, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, DEVNAME, ithc); - } - -- if (ithc_use_rx0) ithc_dma_rx_enable(ithc, 0); -- if (ithc_use_rx1) ithc_dma_rx_enable(ithc, 1); -+ if (ithc_use_rx0) -+ ithc_dma_rx_enable(ithc, 0); -+ if (ithc_use_rx1) -+ ithc_dma_rx_enable(ithc, 1); - -- // hid_add_device can only be called after irq/polling is started and DMA is enabled, because it calls ithc_hid_parse which reads the report descriptor via DMA -+ // hid_add_device() can only be called after irq/polling is started and DMA is enabled, -+ // because it calls ithc_hid_parse() which reads the report descriptor via DMA. - CHECK_RET(hid_add_device, ithc->hid); - - CHECK(ithc_debug_init, ithc); -@@ -464,43 +645,54 @@ static int ithc_start(struct pci_dev *pci) { - return 0; - } - --static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id) { -+static int ithc_probe(struct pci_dev *pci, const struct pci_device_id *id) -+{ - pci_dbg(pci, "device probe\n"); - return ithc_start(pci); - } - --static void ithc_remove(struct pci_dev *pci) { -+static void ithc_remove(struct pci_dev *pci) -+{ - pci_dbg(pci, "device remove\n"); - // all cleanup is handled by devres - } - --static int ithc_suspend(struct device *dev) { -+// For suspend/resume, we just deinitialize and reinitialize everything. -+// TODO It might be cleaner to keep the HID device around, however we would then have to signal -+// to userspace that the touch device has lost state and userspace needs to e.g. resend 'set -+// feature' requests. Hidraw does not seem to have a facility to do that. -+static int ithc_suspend(struct device *dev) -+{ - struct pci_dev *pci = to_pci_dev(dev); - pci_dbg(pci, "pm suspend\n"); - devres_release_group(dev, ithc_start); - return 0; - } - --static int ithc_resume(struct device *dev) { -+static int ithc_resume(struct device *dev) -+{ - struct pci_dev *pci = to_pci_dev(dev); - pci_dbg(pci, "pm resume\n"); - return ithc_start(pci); - } - --static int ithc_freeze(struct device *dev) { -+static int ithc_freeze(struct device *dev) -+{ - struct pci_dev *pci = to_pci_dev(dev); - pci_dbg(pci, "pm freeze\n"); - devres_release_group(dev, ithc_start); - return 0; - } - --static int ithc_thaw(struct device *dev) { -+static int ithc_thaw(struct device *dev) -+{ - struct pci_dev *pci = to_pci_dev(dev); - pci_dbg(pci, "pm thaw\n"); - return ithc_start(pci); - } - --static int ithc_restore(struct device *dev) { -+static int ithc_restore(struct device *dev) -+{ - struct pci_dev *pci = to_pci_dev(dev); - pci_dbg(pci, "pm restore\n"); - return ithc_start(pci); -@@ -521,11 +713,13 @@ static struct pci_driver ithc_driver = { - //.dev_groups = ithc_attribute_groups, // could use this (since 5.14), however the attributes won't have valid values until config has been read anyway - }; - --static int __init ithc_init(void) { -+static int __init ithc_init(void) -+{ - return pci_register_driver(&ithc_driver); - } - --static void __exit ithc_exit(void) { -+static void __exit ithc_exit(void) -+{ - pci_unregister_driver(&ithc_driver); - } - -diff --git a/drivers/hid/ithc/ithc-regs.c b/drivers/hid/ithc/ithc-regs.c -index 85d567b05761f..e058721886e37 100644 ---- a/drivers/hid/ithc/ithc-regs.c -+++ b/drivers/hid/ithc/ithc-regs.c -@@ -1,63 +1,95 @@ -+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause -+ - #include "ithc.h" - - #define reg_num(r) (0x1fff & (u16)(__force u64)(r)) - --void bitsl(__iomem u32 *reg, u32 mask, u32 val) { -- if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask); -+void bitsl(__iomem u32 *reg, u32 mask, u32 val) -+{ -+ if (val & ~mask) -+ pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", -+ reg_num(reg), val, mask); - writel((readl(reg) & ~mask) | (val & mask), reg); - } - --void bitsb(__iomem u8 *reg, u8 mask, u8 val) { -- if (val & ~mask) pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", reg_num(reg), val, mask); -+void bitsb(__iomem u8 *reg, u8 mask, u8 val) -+{ -+ if (val & ~mask) -+ pr_err("register 0x%x: invalid value 0x%x for bitmask 0x%x\n", -+ reg_num(reg), val, mask); - writeb((readb(reg) & ~mask) | (val & mask), reg); - } - --int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val) { -- pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val); -+int waitl(struct ithc *ithc, __iomem u32 *reg, u32 mask, u32 val) -+{ -+ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", -+ reg_num(reg), mask, val); - u32 x; - if (readl_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) { -- pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", reg_num(reg), mask, val); -+ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%08x val 0x%08x\n", -+ reg_num(reg), mask, val); - return -ETIMEDOUT; - } - pci_dbg(ithc->pci, "done waiting\n"); - return 0; - } - --int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val) { -- pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val); -+int waitb(struct ithc *ithc, __iomem u8 *reg, u8 mask, u8 val) -+{ -+ pci_dbg(ithc->pci, "waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", -+ reg_num(reg), mask, val); - u8 x; - if (readb_poll_timeout(reg, x, (x & mask) == val, 200, 1000*1000)) { -- pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", reg_num(reg), mask, val); -+ pci_err(ithc->pci, "timed out waiting for reg 0x%04x mask 0x%02x val 0x%02x\n", -+ reg_num(reg), mask, val); - return -ETIMEDOUT; - } - pci_dbg(ithc->pci, "done waiting\n"); - return 0; - } - --int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode) { -+int ithc_set_spi_config(struct ithc *ithc, u8 speed, u8 mode) -+{ - pci_dbg(ithc->pci, "setting SPI speed to %i, mode %i\n", speed, mode); -- if (mode == 3) mode = 2; -+ if (mode == 3) -+ mode = 2; - bitsl(&ithc->regs->spi_config, - SPI_CONFIG_MODE(0xff) | SPI_CONFIG_SPEED(0xff) | SPI_CONFIG_UNKNOWN_18(0xff) | SPI_CONFIG_SPEED2(0xff), - SPI_CONFIG_MODE(mode) | SPI_CONFIG_SPEED(speed) | SPI_CONFIG_UNKNOWN_18(0) | SPI_CONFIG_SPEED2(speed)); - return 0; - } - --int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data) { -+int ithc_spi_command(struct ithc *ithc, u8 command, u32 offset, u32 size, void *data) -+{ - pci_dbg(ithc->pci, "SPI command %u, size %u, offset %u\n", command, size, offset); -- if (size > sizeof ithc->regs->spi_cmd.data) return -EINVAL; -+ if (size > sizeof(ithc->regs->spi_cmd.data)) -+ return -EINVAL; -+ -+ // Wait if the device is still busy. - CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0); -+ // Clear result flags. - writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); -+ -+ // Init SPI command data. - writeb(command, &ithc->regs->spi_cmd.code); - writew(size, &ithc->regs->spi_cmd.size); - writel(offset, &ithc->regs->spi_cmd.offset); - u32 *p = data, n = (size + 3) / 4; -- for (u32 i = 0; i < n; i++) writel(p[i], &ithc->regs->spi_cmd.data[i]); -+ for (u32 i = 0; i < n; i++) -+ writel(p[i], &ithc->regs->spi_cmd.data[i]); -+ -+ // Start transmission. - bitsb_set(&ithc->regs->spi_cmd.control, SPI_CMD_CONTROL_SEND); - CHECK_RET(waitl, ithc, &ithc->regs->spi_cmd.status, SPI_CMD_STATUS_BUSY, 0); -- if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE) return -EIO; -- if (readw(&ithc->regs->spi_cmd.size) != size) return -EMSGSIZE; -- for (u32 i = 0; i < n; i++) p[i] = readl(&ithc->regs->spi_cmd.data[i]); -+ -+ // Read response. -+ if ((readl(&ithc->regs->spi_cmd.status) & (SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR)) != SPI_CMD_STATUS_DONE) -+ return -EIO; -+ if (readw(&ithc->regs->spi_cmd.size) != size) -+ return -EMSGSIZE; -+ for (u32 i = 0; i < n; i++) -+ p[i] = readl(&ithc->regs->spi_cmd.data[i]); -+ - writel(SPI_CMD_STATUS_DONE | SPI_CMD_STATUS_ERROR, &ithc->regs->spi_cmd.status); - return 0; - } -diff --git a/drivers/hid/ithc/ithc-regs.h b/drivers/hid/ithc/ithc-regs.h -index 1a96092ed7eed..d4007d9e2bacc 100644 ---- a/drivers/hid/ithc/ithc-regs.h -+++ b/drivers/hid/ithc/ithc-regs.h -@@ -1,3 +1,5 @@ -+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ -+ - #define CONTROL_QUIESCE BIT(1) - #define CONTROL_IS_QUIESCED BIT(2) - #define CONTROL_NRESET BIT(3) -@@ -24,7 +26,7 @@ - - #define ERROR_FLAG_DMA_UNKNOWN_9 BIT(9) - #define ERROR_FLAG_DMA_UNKNOWN_10 BIT(10) --#define ERROR_FLAG_DMA_UNKNOWN_12 BIT(12) // set when we receive a truncated DMA message -+#define ERROR_FLAG_DMA_RX_TIMEOUT BIT(12) // set when we receive a truncated DMA message - #define ERROR_FLAG_DMA_UNKNOWN_13 BIT(13) - #define ERROR_FLAG_SPI_BUS_TURNAROUND BIT(16) - #define ERROR_FLAG_SPI_RESPONSE_TIMEOUT BIT(17) -@@ -67,6 +69,7 @@ - #define DMA_RX_STATUS_HAVE_DATA BIT(5) - #define DMA_RX_STATUS_ENABLED BIT(8) - -+// COUNTER_RESET can be written to counter registers to reset them to zero. However, in some cases this can mess up the THC. - #define COUNTER_RESET BIT(31) - - struct ithc_registers { -@@ -147,15 +150,15 @@ static_assert(sizeof(struct ithc_registers) == 0x1300); - #define DEVCFG_SPI_MAX_FREQ(x) (((x) >> 1) & 0xf) // high bit = use high speed mode? - #define DEVCFG_SPI_MODE(x) (((x) >> 6) & 3) - #define DEVCFG_SPI_UNKNOWN_8(x) (((x) >> 8) & 0x3f) --#define DEVCFG_SPI_NEEDS_HEARTBEAT BIT(20) --#define DEVCFG_SPI_HEARTBEAT_INTERVAL (((x) >> 21) & 7) -+#define DEVCFG_SPI_NEEDS_HEARTBEAT BIT(20) // TODO implement heartbeat -+#define DEVCFG_SPI_HEARTBEAT_INTERVAL(x) (((x) >> 21) & 7) - #define DEVCFG_SPI_UNKNOWN_25 BIT(25) - #define DEVCFG_SPI_UNKNOWN_26 BIT(26) - #define DEVCFG_SPI_UNKNOWN_27 BIT(27) --#define DEVCFG_SPI_DELAY (((x) >> 28) & 7) --#define DEVCFG_SPI_USE_EXT_READ_CFG BIT(31) -+#define DEVCFG_SPI_DELAY(x) (((x) >> 28) & 7) // TODO use this -+#define DEVCFG_SPI_USE_EXT_READ_CFG BIT(31) // TODO use this? - --struct ithc_device_config { -+struct ithc_device_config { // (Example values are from an SP7+.) - u32 _unknown_00; // 00 = 0xe0000402 (0xe0000401 after DMA_RX_CODE_RESET) - u32 _unknown_04; // 04 = 0x00000000 - u32 dma_buf_sizes; // 08 = 0x000a00ff -@@ -166,9 +169,9 @@ struct ithc_device_config { - u16 vendor_id; // 1c = 0x045e = Microsoft Corp. - u16 product_id; // 1e = 0x0c1a - u32 revision; // 20 = 0x00000001 -- u32 fw_version; // 24 = 0x05008a8b = 5.0.138.139 -+ u32 fw_version; // 24 = 0x05008a8b = 5.0.138.139 (this value looks more random on newer devices) - u32 _unknown_28; // 28 = 0x00000000 -- u32 fw_mode; // 2c = 0x00000000 -+ u32 fw_mode; // 2c = 0x00000000 (for fw update?) - u32 _unknown_30; // 30 = 0x00000000 - u32 _unknown_34; // 34 = 0x0404035e (u8,u8,u8,u8 = version?) - u32 _unknown_38; // 38 = 0x000001c0 (0x000001c1 after DMA_RX_CODE_RESET) -diff --git a/drivers/hid/ithc/ithc.h b/drivers/hid/ithc/ithc.h -index 6a9b0d480bc15..028e55a4ec53e 100644 ---- a/drivers/hid/ithc/ithc.h -+++ b/drivers/hid/ithc/ithc.h -@@ -1,3 +1,5 @@ -+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ -+ - #include - #include - #include -@@ -21,7 +23,7 @@ - #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - - #define CHECK(fn, ...) ({ int r = fn(__VA_ARGS__); if (r < 0) pci_err(ithc->pci, "%s: %s failed with %i\n", __func__, #fn, r); r; }) --#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while(0) -+#define CHECK_RET(...) do { int r = CHECK(__VA_ARGS__); if (r < 0) return r; } while (0) - - #define NUM_RX_BUF 16 - -@@ -35,8 +37,13 @@ struct ithc { - struct pci_dev *pci; - int irq; - struct task_struct *poll_thread; ++ struct ithc_dma_rx dma_rx[2]; ++ struct ithc_dma_tx dma_tx; ++}; + - struct pm_qos_request activity_qos; -- struct timer_list activity_timer; -+ struct hrtimer activity_start_timer; -+ struct hrtimer activity_end_timer; -+ ktime_t last_rx_time; -+ unsigned int cur_rx_seq_count; -+ unsigned int cur_rx_seq_errors; - - struct hid_device *hid; - bool hid_parse_done; -@@ -54,7 +61,7 @@ struct ithc { - }; - - int ithc_reset(struct ithc *ithc); --void ithc_set_active(struct ithc *ithc); ++int ithc_reset(struct ithc *ithc); +void ithc_set_active(struct ithc *ithc, unsigned int duration_us); - int ithc_debug_init(struct ithc *ithc); - void ithc_log_regs(struct ithc *ithc); - ++int ithc_debug_init(struct ithc *ithc); ++void ithc_log_regs(struct ithc *ithc); ++ -- -2.42.0 +2.43.0 -From c4cbbcd24ea10e6558753174ae6dabcc9b54e438 Mon Sep 17 00:00:00 2001 +From fb7e9294f3970a450b891c2cc7b2195861d454e3 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 22 Oct 2023 14:57:11 +0200 Subject: [PATCH] platform/surface: aggregator_registry: Add support for @@ -6891,7 +5687,7 @@ Patchset: surface-sam 1 file changed, 3 insertions(+) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c -index 0fe5be5396525..0d8c8395c5886 100644 +index aeb3feae40ff..2bc4977037fc 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -367,6 +367,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { @@ -6905,9 +5701,9 @@ index 0fe5be5396525..0d8c8395c5886 100644 { "MSHW0123", (unsigned long)ssam_node_group_sls }, -- -2.42.0 +2.43.0 -From 0bb0adce3efad7a43fc3811f6cc24148c8c75253 Mon Sep 17 00:00:00 2001 +From 2de16abc5d0d2334e2935b1bdb3667a95d0009f2 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Mon, 20 Nov 2023 19:47:00 +0100 Subject: [PATCH] platform/surface: aggregator_registry: Add support for @@ -6925,7 +5721,7 @@ Patchset: surface-sam 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c -index 0d8c8395c5886..530db4db71aba 100644 +index 2bc4977037fc..26cb6229ad16 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -247,8 +247,8 @@ static const struct software_node *ssam_node_group_sl5[] = { @@ -6975,9 +5771,743 @@ index 0d8c8395c5886..530db4db71aba 100644 { }, }; -- -2.42.0 +2.43.0 + +From c06e370b5ed873b603aa0dc2faafe24a9e63b3e8 Mon Sep 17 00:00:00 2001 +From: Ivor Wanders +Date: Mon, 18 Dec 2023 19:21:32 -0500 +Subject: [PATCH] platform/surface: aggregator_registry: add entry for fan + speed + +Add an entry for the fan speed function. +Add this new entry to the Surface Pro 9 group. + +Signed-off-by: Ivor Wanders +Link: https://github.com/linux-surface/kernel/pull/144 +Reviewed-by: Maximilian Luz +Patchset: surface-sam +--- + drivers/platform/surface/surface_aggregator_registry.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c +index 26cb6229ad16..f02a933160ff 100644 +--- a/drivers/platform/surface/surface_aggregator_registry.c ++++ b/drivers/platform/surface/surface_aggregator_registry.c +@@ -74,6 +74,12 @@ static const struct software_node ssam_node_tmp_pprof = { + .parent = &ssam_node_root, + }; + ++/* Fan speed function. */ ++static const struct software_node ssam_node_fan_speed = { ++ .name = "ssam:01:05:01:01:01", ++ .parent = &ssam_node_root, ++}; ++ + /* Tablet-mode switch via KIP subsystem. */ + static const struct software_node ssam_node_kip_tablet_switch = { + .name = "ssam:01:0e:01:00:01", +@@ -319,6 +325,7 @@ static const struct software_node *ssam_node_group_sp9[] = { + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_pprof, ++ &ssam_node_fan_speed, + &ssam_node_pos_tablet_switch, + &ssam_node_hid_kip_keyboard, + &ssam_node_hid_kip_penstash, +-- +2.43.0 + +From 63dcbbcad69219e1487db46a5c26c1ebdd9ef6be Mon Sep 17 00:00:00 2001 +From: Ivor Wanders +Date: Thu, 30 Nov 2023 20:20:24 -0500 +Subject: [PATCH] hwmon: add fan speed monitoring driver for Surface devices + +Adds a driver that provides read only access to the fan speed for Microsoft +Surface Pro devices. The fan speed is always regulated by the EC and cannot +be influenced directly. + +Signed-off-by: Ivor Wanders +Link: https://github.com/linux-surface/kernel/pull/144 +Patchset: surface-sam +--- + Documentation/hwmon/index.rst | 1 + + Documentation/hwmon/surface_fan.rst | 25 ++++++++ + MAINTAINERS | 8 +++ + drivers/hwmon/Kconfig | 13 ++++ + drivers/hwmon/Makefile | 1 + + drivers/hwmon/surface_fan.c | 93 +++++++++++++++++++++++++++++ + 6 files changed, 141 insertions(+) + create mode 100644 Documentation/hwmon/surface_fan.rst + create mode 100644 drivers/hwmon/surface_fan.c + +diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst +index 72f4e6065bae..7c254562abd6 100644 +--- a/Documentation/hwmon/index.rst ++++ b/Documentation/hwmon/index.rst +@@ -204,6 +204,7 @@ Hardware Monitoring Kernel Drivers + smsc47m1 + sparx5-temp + stpddc60 ++ surface_fan + sy7636a-hwmon + tc654 + tc74 +diff --git a/Documentation/hwmon/surface_fan.rst b/Documentation/hwmon/surface_fan.rst +new file mode 100644 +index 000000000000..07942574c4f0 +--- /dev/null ++++ b/Documentation/hwmon/surface_fan.rst +@@ -0,0 +1,25 @@ ++.. SPDX-License-Identifier: GPL-2.0-or-later ++ ++Kernel driver surface_fan ++========================= ++ ++Supported Devices: ++ ++ * Microsoft Surface Pro 9 ++ ++Author: Ivor Wanders ++ ++Description ++----------- ++ ++This provides monitoring of the fan found in some Microsoft Surface Pro devices, ++like the Surface Pro 9. The fan is always controlled by the onboard controller. ++ ++Sysfs interface ++--------------- ++ ++======================= ======= ========================================= ++Name Perm Description ++======================= ======= ========================================= ++``fan1_input`` RO Current fan speed in RPM. ++======================= ======= ========================================= +diff --git a/MAINTAINERS b/MAINTAINERS +index a7c4cf8201e0..77eb076e77da 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -14331,6 +14331,14 @@ F: Documentation/driver-api/surface_aggregator/clients/dtx.rst + F: drivers/platform/surface/surface_dtx.c + F: include/uapi/linux/surface_aggregator/dtx.h + ++MICROSOFT SURFACE SENSOR FAN DRIVER ++M: Maximilian Luz ++M: Ivor Wanders ++L: linux-hwmon@vger.kernel.org ++S: Maintained ++F: Documentation/hwmon/surface_fan.rst ++F: drivers/hwmon/surface_fan.c ++ + MICROSOFT SURFACE GPE LID SUPPORT DRIVER + M: Maximilian Luz + L: platform-driver-x86@vger.kernel.org +diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig +index cf27523eed5a..1cef428c79ea 100644 +--- a/drivers/hwmon/Kconfig ++++ b/drivers/hwmon/Kconfig +@@ -1983,6 +1983,19 @@ config SENSORS_SFCTEMP + This driver can also be built as a module. If so, the module + will be called sfctemp. + ++config SENSORS_SURFACE_FAN ++ tristate "Surface Fan Driver" ++ depends on SURFACE_AGGREGATOR ++ help ++ Driver that provides monitoring of the fan on Surface Pro devices that ++ have a fan, like the Surface Pro 9. ++ ++ This makes the fan's current speed accessible through the hwmon ++ system. It does not provide control over the fan, the firmware is ++ responsible for that, this driver merely provides monitoring. ++ ++ Select M or Y here, if you want to be able to read the fan's speed. ++ + config SENSORS_ADC128D818 + tristate "Texas Instruments ADC128D818" + depends on I2C +diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile +index e84bd9685b5c..30a284fc5ab6 100644 +--- a/drivers/hwmon/Makefile ++++ b/drivers/hwmon/Makefile +@@ -200,6 +200,7 @@ obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47m1.o + obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o + obj-$(CONFIG_SENSORS_SPARX5) += sparx5-temp.o + obj-$(CONFIG_SENSORS_STTS751) += stts751.o ++obj-$(CONFIG_SENSORS_SURFACE_FAN)+= surface_fan.o + obj-$(CONFIG_SENSORS_SY7636A) += sy7636a-hwmon.o + obj-$(CONFIG_SENSORS_AMC6821) += amc6821.o + obj-$(CONFIG_SENSORS_TC74) += tc74.o +diff --git a/drivers/hwmon/surface_fan.c b/drivers/hwmon/surface_fan.c +new file mode 100644 +index 000000000000..7c2e3ae3eb40 +--- /dev/null ++++ b/drivers/hwmon/surface_fan.c +@@ -0,0 +1,93 @@ ++// SPDX-License-Identifier: GPL-2.0+ ++/* ++ * Surface Fan driver for Surface System Aggregator Module. It provides access ++ * to the fan's rpm through the hwmon system. ++ * ++ * Copyright (C) 2023 Ivor Wanders ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++// SSAM ++SSAM_DEFINE_SYNC_REQUEST_CL_R(__ssam_fan_rpm_get, __le16, { ++ .target_category = SSAM_SSH_TC_FAN, ++ .command_id = 0x01, ++}); ++ ++// hwmon ++umode_t surface_fan_hwmon_is_visible(const void *drvdata, ++ enum hwmon_sensor_types type, u32 attr, ++ int channel) ++{ ++ return 0444; ++} ++ ++static int surface_fan_hwmon_read(struct device *dev, ++ enum hwmon_sensor_types type, u32 attr, ++ int channel, long *val) ++{ ++ struct ssam_device *sdev = dev_get_drvdata(dev); ++ int ret; ++ __le16 value; ++ ++ ret = __ssam_fan_rpm_get(sdev, &value); ++ if (ret) ++ return ret; ++ ++ *val = le16_to_cpu(value); ++ ++ return ret; ++} ++ ++static const struct hwmon_channel_info *const surface_fan_info[] = { ++ HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT), ++ NULL ++}; ++ ++static const struct hwmon_ops surface_fan_hwmon_ops = { ++ .is_visible = surface_fan_hwmon_is_visible, ++ .read = surface_fan_hwmon_read, ++}; ++ ++static const struct hwmon_chip_info surface_fan_chip_info = { ++ .ops = &surface_fan_hwmon_ops, ++ .info = surface_fan_info, ++}; ++ ++static int surface_fan_probe(struct ssam_device *sdev) ++{ ++ struct device *hdev; ++ ++ hdev = devm_hwmon_device_register_with_info(&sdev->dev, ++ "surface_fan", sdev, ++ &surface_fan_chip_info, ++ NULL); ++ if (IS_ERR(hdev)) ++ return PTR_ERR(hdev); ++ ++ return 0; ++} ++ ++static const struct ssam_device_id ssam_fan_match[] = { ++ { SSAM_SDEV(FAN, SAM, 0x01, 0x01) }, ++ {}, ++}; ++MODULE_DEVICE_TABLE(ssam, ssam_fan_match); ++ ++static struct ssam_device_driver surface_fan = { ++ .probe = surface_fan_probe, ++ .match_table = ssam_fan_match, ++ .driver = { ++ .name = "surface_fan", ++ .probe_type = PROBE_PREFER_ASYNCHRONOUS, ++ }, ++}; ++module_ssam_device_driver(surface_fan); ++ ++MODULE_AUTHOR("Ivor Wanders "); ++MODULE_DESCRIPTION("Fan Driver for Surface System Aggregator Module"); ++MODULE_LICENSE("GPL"); +-- +2.43.0 + +From 5f549c253e4df330fa8f311fe151df80e199bec4 Mon Sep 17 00:00:00 2001 +From: Maximilian Luz +Date: Sat, 30 Dec 2023 18:07:54 +0100 +Subject: [PATCH] hwmon: Add thermal sensor driver for Surface Aggregator + Module + +Some of the newer Microsoft Surface devices (such as the Surface Book +3 and Pro 9) have thermal sensors connected via the Surface Aggregator +Module (the embedded controller on those devices). Add a basic driver +to read out the temperature values of those sensors. + +Link: https://github.com/linux-surface/surface-aggregator-module/issues/59 +Signed-off-by: Maximilian Luz +Patchset: surface-sam +--- + drivers/hwmon/Kconfig | 10 +++ + drivers/hwmon/Makefile | 1 + + drivers/hwmon/surface_temp.c | 165 +++++++++++++++++++++++++++++++++++ + 3 files changed, 176 insertions(+) + create mode 100644 drivers/hwmon/surface_temp.c + +diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig +index 1cef428c79ea..ca20716911ad 100644 +--- a/drivers/hwmon/Kconfig ++++ b/drivers/hwmon/Kconfig +@@ -1996,6 +1996,16 @@ config SENSORS_SURFACE_FAN + + Select M or Y here, if you want to be able to read the fan's speed. + ++config SENSORS_SURFACE_TEMP ++ tristate "Microsoft Surface Thermal Sensor Driver" ++ depends on SURFACE_AGGREGATOR ++ help ++ Driver for monitoring thermal sensors connected via the Surface ++ Aggregator Module (embedded controller) on Microsoft Surface devices. ++ ++ This driver can also be built as a module. If so, the module ++ will be called surface_temp. ++ + config SENSORS_ADC128D818 + tristate "Texas Instruments ADC128D818" + depends on I2C +diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile +index 30a284fc5ab6..a6bcde6b4843 100644 +--- a/drivers/hwmon/Makefile ++++ b/drivers/hwmon/Makefile +@@ -201,6 +201,7 @@ obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o + obj-$(CONFIG_SENSORS_SPARX5) += sparx5-temp.o + obj-$(CONFIG_SENSORS_STTS751) += stts751.o + obj-$(CONFIG_SENSORS_SURFACE_FAN)+= surface_fan.o ++obj-$(CONFIG_SENSORS_SURFACE_TEMP)+= surface_temp.o + obj-$(CONFIG_SENSORS_SY7636A) += sy7636a-hwmon.o + obj-$(CONFIG_SENSORS_AMC6821) += amc6821.o + obj-$(CONFIG_SENSORS_TC74) += tc74.o +diff --git a/drivers/hwmon/surface_temp.c b/drivers/hwmon/surface_temp.c +new file mode 100644 +index 000000000000..48c3e826713f +--- /dev/null ++++ b/drivers/hwmon/surface_temp.c +@@ -0,0 +1,165 @@ ++// SPDX-License-Identifier: GPL-2.0+ ++/* ++ * Thermal sensor subsystem driver for Surface System Aggregator Module (SSAM). ++ * ++ * Copyright (C) 2022-2023 Maximilian Luz ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++ ++/* -- SAM interface. -------------------------------------------------------- */ ++ ++SSAM_DEFINE_SYNC_REQUEST_CL_R(__ssam_tmp_get_available_sensors, __le16, { ++ .target_category = SSAM_SSH_TC_TMP, ++ .command_id = 0x04, ++}); ++ ++SSAM_DEFINE_SYNC_REQUEST_MD_R(__ssam_tmp_get_temperature, __le16, { ++ .target_category = SSAM_SSH_TC_TMP, ++ .command_id = 0x01, ++}); ++ ++static int ssam_tmp_get_available_sensors(struct ssam_device *sdev, s16 *sensors) ++{ ++ __le16 sensors_le; ++ int status; ++ ++ status = __ssam_tmp_get_available_sensors(sdev, &sensors_le); ++ if (status) ++ return status; ++ ++ *sensors = le16_to_cpu(sensors_le); ++ return 0; ++} ++ ++static int ssam_tmp_get_temperature(struct ssam_device *sdev, u8 iid, long *temperature) ++{ ++ __le16 temp_le; ++ int status; ++ ++ status = __ssam_tmp_get_temperature(sdev->ctrl, sdev->uid.target, iid, &temp_le); ++ if (status) ++ return status; ++ ++ /* Convert 1/10 °K to 1/1000 °C */ ++ *temperature = (le16_to_cpu(temp_le) - 2731) * 100L; ++ return 0; ++} ++ ++ ++/* -- Driver.---------------------------------------------------------------- */ ++ ++struct ssam_temp { ++ struct ssam_device *sdev; ++ s16 sensors; ++}; ++ ++static umode_t ssam_temp_hwmon_is_visible(const void *data, ++ enum hwmon_sensor_types type, ++ u32 attr, int channel) ++{ ++ const struct ssam_temp *ssam_temp = data; ++ ++ if (!(ssam_temp->sensors & BIT(channel))) ++ return 0; ++ ++ return 0444; ++} ++ ++static int ssam_temp_hwmon_read(struct device *dev, ++ enum hwmon_sensor_types type, ++ u32 attr, int channel, long *value) ++{ ++ const struct ssam_temp *ssam_temp = dev_get_drvdata(dev); ++ ++ return ssam_tmp_get_temperature(ssam_temp->sdev, channel + 1, value); ++} ++ ++static const struct hwmon_channel_info * const ssam_temp_hwmon_info[] = { ++ HWMON_CHANNEL_INFO(chip, ++ HWMON_C_REGISTER_TZ), ++ /* We have at most 16 thermal sensor channels. */ ++ HWMON_CHANNEL_INFO(temp, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT, ++ HWMON_T_INPUT), ++ NULL ++}; ++ ++static const struct hwmon_ops ssam_temp_hwmon_ops = { ++ .is_visible = ssam_temp_hwmon_is_visible, ++ .read = ssam_temp_hwmon_read, ++}; ++ ++static const struct hwmon_chip_info ssam_temp_hwmon_chip_info = { ++ .ops = &ssam_temp_hwmon_ops, ++ .info = ssam_temp_hwmon_info, ++}; ++ ++static int ssam_temp_probe(struct ssam_device *sdev) ++{ ++ struct ssam_temp *ssam_temp; ++ struct device *hwmon_dev; ++ s16 sensors; ++ int status; ++ ++ status = ssam_tmp_get_available_sensors(sdev, &sensors); ++ if (status) ++ return status; ++ ++ ssam_temp = devm_kzalloc(&sdev->dev, sizeof(*ssam_temp), GFP_KERNEL); ++ if (!ssam_temp) ++ return -ENOMEM; ++ ++ ssam_temp->sdev = sdev; ++ ssam_temp->sensors = sensors; ++ ++ hwmon_dev = devm_hwmon_device_register_with_info(&sdev->dev, ++ "surface_thermal", ssam_temp, &ssam_temp_hwmon_chip_info, ++ NULL); ++ if (IS_ERR(hwmon_dev)) ++ return PTR_ERR(hwmon_dev); ++ ++ return 0; ++} ++ ++static const struct ssam_device_id ssam_temp_match[] = { ++ { SSAM_SDEV(TMP, SAM, 0x00, 0x02) }, ++ { }, ++}; ++MODULE_DEVICE_TABLE(ssam, ssam_temp_match); ++ ++static struct ssam_device_driver ssam_temp = { ++ .probe = ssam_temp_probe, ++ .match_table = ssam_temp_match, ++ .driver = { ++ .name = "surface_temp", ++ .probe_type = PROBE_PREFER_ASYNCHRONOUS, ++ }, ++}; ++module_ssam_device_driver(ssam_temp); ++ ++MODULE_AUTHOR("Maximilian Luz "); ++MODULE_DESCRIPTION("Thermal sensor subsystem driver for Surface System Aggregator Module"); ++MODULE_LICENSE("GPL"); +-- +2.43.0 + +From 3ccfa3b6be4794f247488f7e665ba91793ec09c7 Mon Sep 17 00:00:00 2001 +From: Maximilian Luz +Date: Sat, 30 Dec 2023 18:12:23 +0100 +Subject: [PATCH] hwmon: surface_temp: Add support for sensor names + +The thermal subsystem of the Surface Aggregator Module allows us to +query the names of the respective thermal sensors. Forward those to +userspace. + +Signed-off-by: Ivor Wanders +Co-Developed-by: Maximilian Luz +Signed-off-by: Maximilian Luz +Patchset: surface-sam +--- + drivers/hwmon/surface_temp.c | 113 +++++++++++++++++++++++++++++------ + 1 file changed, 96 insertions(+), 17 deletions(-) + +diff --git a/drivers/hwmon/surface_temp.c b/drivers/hwmon/surface_temp.c +index 48c3e826713f..4c08926139db 100644 +--- a/drivers/hwmon/surface_temp.c ++++ b/drivers/hwmon/surface_temp.c +@@ -17,6 +17,27 @@ + + /* -- SAM interface. -------------------------------------------------------- */ + ++/* ++ * Available sensors are indicated by a 16-bit bitfield, where a 1 marks the ++ * presence of a sensor. So we have at most 16 possible sensors/channels. ++ */ ++#define SSAM_TMP_SENSOR_MAX_COUNT 16 ++ ++/* ++ * All names observed so far are 6 characters long, but there's only ++ * zeros after the name, so perhaps they can be longer. This number reflects ++ * the maximum zero-padded space observed in the returned buffer. ++ */ ++#define SSAM_TMP_SENSOR_NAME_LENGTH 18 ++ ++struct ssam_tmp_get_name_rsp { ++ __le16 unknown1; ++ char unknown2; ++ char name[SSAM_TMP_SENSOR_NAME_LENGTH]; ++} __packed; ++ ++static_assert(sizeof(struct ssam_tmp_get_name_rsp) == 21); ++ + SSAM_DEFINE_SYNC_REQUEST_CL_R(__ssam_tmp_get_available_sensors, __le16, { + .target_category = SSAM_SSH_TC_TMP, + .command_id = 0x04, +@@ -27,6 +48,11 @@ SSAM_DEFINE_SYNC_REQUEST_MD_R(__ssam_tmp_get_temperature, __le16, { + .command_id = 0x01, + }); + ++SSAM_DEFINE_SYNC_REQUEST_MD_R(__ssam_tmp_get_name, struct ssam_tmp_get_name_rsp, { ++ .target_category = SSAM_SSH_TC_TMP, ++ .command_id = 0x0e, ++}); ++ + static int ssam_tmp_get_available_sensors(struct ssam_device *sdev, s16 *sensors) + { + __le16 sensors_le; +@@ -54,12 +80,37 @@ static int ssam_tmp_get_temperature(struct ssam_device *sdev, u8 iid, long *temp + return 0; + } + ++static int ssam_tmp_get_name(struct ssam_device *sdev, u8 iid, char *buf, size_t buf_len) ++{ ++ struct ssam_tmp_get_name_rsp name_rsp; ++ int status; ++ ++ status = __ssam_tmp_get_name(sdev->ctrl, sdev->uid.target, iid, &name_rsp); ++ if (status) ++ return status; ++ ++ /* ++ * This should not fail unless the name in the returned struct is not ++ * null-terminated or someone changed something in the struct ++ * definitions above, since our buffer and struct have the same ++ * capacity by design. So if this fails blow this up with a warning. ++ * Since the more likely cause is that the returned string isn't ++ * null-terminated, we might have received garbage (as opposed to just ++ * an incomplete string), so also fail the function. ++ */ ++ status = strscpy(buf, name_rsp.name, buf_len); ++ WARN_ON(status < 0); ++ ++ return status < 0 ? status : 0; ++} ++ + + /* -- Driver.---------------------------------------------------------------- */ + + struct ssam_temp { + struct ssam_device *sdev; + s16 sensors; ++ char names[SSAM_TMP_SENSOR_MAX_COUNT][SSAM_TMP_SENSOR_NAME_LENGTH]; + }; + + static umode_t ssam_temp_hwmon_is_visible(const void *data, +@@ -83,33 +134,47 @@ static int ssam_temp_hwmon_read(struct device *dev, + return ssam_tmp_get_temperature(ssam_temp->sdev, channel + 1, value); + } + ++static int ssam_temp_hwmon_read_string(struct device *dev, ++ enum hwmon_sensor_types type, ++ u32 attr, int channel, const char **str) ++{ ++ const struct ssam_temp *ssam_temp = dev_get_drvdata(dev); ++ ++ *str = ssam_temp->names[channel]; ++ return 0; ++} ++ + static const struct hwmon_channel_info * const ssam_temp_hwmon_info[] = { + HWMON_CHANNEL_INFO(chip, + HWMON_C_REGISTER_TZ), +- /* We have at most 16 thermal sensor channels. */ ++ /* ++ * We have at most SSAM_TMP_SENSOR_MAX_COUNT = 16 thermal sensor ++ * channels. ++ */ + HWMON_CHANNEL_INFO(temp, +- HWMON_T_INPUT, +- HWMON_T_INPUT, +- HWMON_T_INPUT, +- HWMON_T_INPUT, +- HWMON_T_INPUT, +- HWMON_T_INPUT, +- HWMON_T_INPUT, +- HWMON_T_INPUT, +- HWMON_T_INPUT, +- HWMON_T_INPUT, +- HWMON_T_INPUT, +- HWMON_T_INPUT, +- HWMON_T_INPUT, +- HWMON_T_INPUT, +- HWMON_T_INPUT, +- HWMON_T_INPUT), ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL, ++ HWMON_T_INPUT | HWMON_T_LABEL), + NULL + }; + + static const struct hwmon_ops ssam_temp_hwmon_ops = { + .is_visible = ssam_temp_hwmon_is_visible, + .read = ssam_temp_hwmon_read, ++ .read_string = ssam_temp_hwmon_read_string, + }; + + static const struct hwmon_chip_info ssam_temp_hwmon_chip_info = { +@@ -122,6 +187,7 @@ static int ssam_temp_probe(struct ssam_device *sdev) + struct ssam_temp *ssam_temp; + struct device *hwmon_dev; + s16 sensors; ++ int channel; + int status; + + status = ssam_tmp_get_available_sensors(sdev, &sensors); +@@ -135,6 +201,19 @@ static int ssam_temp_probe(struct ssam_device *sdev) + ssam_temp->sdev = sdev; + ssam_temp->sensors = sensors; + ++ /* Retrieve the name for each available sensor. */ ++ for (channel = 0; channel < SSAM_TMP_SENSOR_MAX_COUNT; channel++) ++ { ++ if (!(sensors & BIT(channel))) ++ continue; ++ ++ status = ssam_tmp_get_name(sdev, channel + 1, ++ ssam_temp->names[channel], ++ SSAM_TMP_SENSOR_NAME_LENGTH); ++ if (status) ++ return status; ++ } ++ + hwmon_dev = devm_hwmon_device_register_with_info(&sdev->dev, + "surface_thermal", ssam_temp, &ssam_temp_hwmon_chip_info, + NULL); +-- +2.43.0 + +From 8ccf7b86ad270655bd1e8cd0ab8d2ff475ad0ea7 Mon Sep 17 00:00:00 2001 +From: Maximilian Luz +Date: Sat, 30 Dec 2023 18:21:12 +0100 +Subject: [PATCH] platform/surface: aggregator_registry: Add support for + thermal sensors on the Surface Pro 9 + +The Surface Pro 9 has thermal sensors connected via the Surface +Aggregator Module. Add a device node to support those. + +Signed-off-by: Maximilian Luz +Patchset: surface-sam +--- + drivers/platform/surface/surface_aggregator_registry.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c +index f02a933160ff..67686042e009 100644 +--- a/drivers/platform/surface/surface_aggregator_registry.c ++++ b/drivers/platform/surface/surface_aggregator_registry.c +@@ -74,6 +74,12 @@ static const struct software_node ssam_node_tmp_pprof = { + .parent = &ssam_node_root, + }; + ++/* Thermal sensors. */ ++static const struct software_node ssam_node_tmp_sensors = { ++ .name = "ssam:01:03:01:00:02", ++ .parent = &ssam_node_root, ++}; ++ + /* Fan speed function. */ + static const struct software_node ssam_node_fan_speed = { + .name = "ssam:01:05:01:01:01", +@@ -325,6 +331,7 @@ static const struct software_node *ssam_node_group_sp9[] = { + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_pprof, ++ &ssam_node_tmp_sensors, + &ssam_node_fan_speed, + &ssam_node_pos_tablet_switch, + &ssam_node_hid_kip_keyboard, +-- +2.43.0 -From 3772b511c710c369b737fd0a111fbda63b028f1d Mon Sep 17 00:00:00 2001 +From 38a76c85dee37facde40f245d994c4209ccddd15 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sat, 25 Jul 2020 17:19:53 +0200 Subject: [PATCH] i2c: acpi: Implement RawBytes read access @@ -7034,7 +6564,7 @@ Patchset: surface-sam-over-hid 1 file changed, 35 insertions(+) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c -index d6037a3286690..a290ebc77aea2 100644 +index d6037a328669..a290ebc77aea 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -628,6 +628,28 @@ static int acpi_gsb_i2c_write_bytes(struct i2c_client *client, @@ -7087,9 +6617,9 @@ index d6037a3286690..a290ebc77aea2 100644 dev_warn(&adapter->dev, "protocol 0x%02x not supported for client 0x%02x\n", accessor_type, client->addr); -- -2.42.0 +2.43.0 -From f45a16750118da615fca44e7214204c83631ee7f Mon Sep 17 00:00:00 2001 +From f4ad3e5c368c11503d8b7af6a703f3972ebd5e98 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sat, 13 Feb 2021 16:41:18 +0100 Subject: [PATCH] platform/surface: Add driver for Surface Book 1 dGPU switch @@ -7112,7 +6642,7 @@ Patchset: surface-sam-over-hid create mode 100644 drivers/platform/surface/surfacebook1_dgpu_switch.c diff --git a/drivers/platform/surface/Kconfig b/drivers/platform/surface/Kconfig -index b629e82af97c0..68656e8f309ed 100644 +index b629e82af97c..68656e8f309e 100644 --- a/drivers/platform/surface/Kconfig +++ b/drivers/platform/surface/Kconfig @@ -149,6 +149,13 @@ config SURFACE_AGGREGATOR_TABLET_SWITCH @@ -7130,7 +6660,7 @@ index b629e82af97c0..68656e8f309ed 100644 tristate "Surface DTX (Detachment System) Driver" depends on SURFACE_AGGREGATOR diff --git a/drivers/platform/surface/Makefile b/drivers/platform/surface/Makefile -index 53344330939bf..7efcd0cdb5329 100644 +index 53344330939b..7efcd0cdb532 100644 --- a/drivers/platform/surface/Makefile +++ b/drivers/platform/surface/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_SURFACE_AGGREGATOR_CDEV) += surface_aggregator_cdev.o @@ -7143,7 +6673,7 @@ index 53344330939bf..7efcd0cdb5329 100644 obj-$(CONFIG_SURFACE_HOTPLUG) += surface_hotplug.o diff --git a/drivers/platform/surface/surfacebook1_dgpu_switch.c b/drivers/platform/surface/surfacebook1_dgpu_switch.c new file mode 100644 -index 0000000000000..8b816ed8f35c6 +index 000000000000..8b816ed8f35c --- /dev/null +++ b/drivers/platform/surface/surfacebook1_dgpu_switch.c @@ -0,0 +1,162 @@ @@ -7310,9 +6840,9 @@ index 0000000000000..8b816ed8f35c6 +MODULE_DESCRIPTION("Discrete GPU Power-Switch for Surface Book 1"); +MODULE_LICENSE("GPL"); -- -2.42.0 +2.43.0 -From a5d9cf4762a27e2bf7f38c0d5a223b9df8b4ba8a Mon Sep 17 00:00:00 2001 +From 96cb53fd556f88f97d61b237c6015cec946865d5 Mon Sep 17 00:00:00 2001 From: Sachi King Date: Tue, 5 Oct 2021 00:05:09 +1100 Subject: [PATCH] Input: soc_button_array - support AMD variant Surface devices @@ -7334,10 +6864,10 @@ Patchset: surface-button 1 file changed, 8 insertions(+), 25 deletions(-) diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c -index e79f5497948b8..2bddbe6e9ea4d 100644 +index f6d060377d18..b8603f74eb28 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c -@@ -537,8 +537,8 @@ static const struct soc_device_data soc_device_MSHW0028 = { +@@ -540,8 +540,8 @@ static const struct soc_device_data soc_device_MSHW0028 = { * Both, the Surface Pro 4 (surfacepro3_button.c) and the above mentioned * devices use MSHW0040 for power and volume buttons, however the way they * have to be addressed differs. Make sure that we only load this drivers @@ -7348,7 +6878,7 @@ index e79f5497948b8..2bddbe6e9ea4d 100644 */ #define MSHW0040_DSM_REVISION 0x01 #define MSHW0040_DSM_GET_OMPR 0x02 // get OEM Platform Revision -@@ -549,31 +549,14 @@ static const guid_t MSHW0040_DSM_UUID = +@@ -552,31 +552,14 @@ static const guid_t MSHW0040_DSM_UUID = static int soc_device_check_MSHW0040(struct device *dev) { acpi_handle handle = ACPI_HANDLE(dev); @@ -7387,9 +6917,9 @@ index e79f5497948b8..2bddbe6e9ea4d 100644 /* -- -2.42.0 +2.43.0 -From 66f0a34801ad81ff08cc3ae0e175e0958959c461 Mon Sep 17 00:00:00 2001 +From 7909f30b15796e8df43a6d4ea32cbbd40627c410 Mon Sep 17 00:00:00 2001 From: Sachi King Date: Tue, 5 Oct 2021 00:22:57 +1100 Subject: [PATCH] platform/surface: surfacepro3_button: don't load on amd @@ -7410,7 +6940,7 @@ Patchset: surface-button 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/drivers/platform/surface/surfacepro3_button.c b/drivers/platform/surface/surfacepro3_button.c -index 2755601f979cd..4240c98ca2265 100644 +index 2755601f979c..4240c98ca226 100644 --- a/drivers/platform/surface/surfacepro3_button.c +++ b/drivers/platform/surface/surfacepro3_button.c @@ -149,7 +149,8 @@ static int surface_button_resume(struct device *dev) @@ -7459,9 +6989,9 @@ index 2755601f979cd..4240c98ca2265 100644 -- -2.42.0 +2.43.0 -From a55587ce4f5065bedb604f9031082ad47612a163 Mon Sep 17 00:00:00 2001 +From 28ea3660b6680bfd528ca05c543e69b8a2ad412c Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sat, 18 Feb 2023 01:02:49 +0100 Subject: [PATCH] USB: quirks: Add USB_QUIRK_DELAY_INIT for Surface Go 3 @@ -7486,7 +7016,7 @@ Patchset: surface-typecover 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c -index 15e9bd180a1d2..0d70461d01e16 100644 +index 15e9bd180a1d..0d70461d01e1 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -220,6 +220,9 @@ static const struct usb_device_id usb_quirk_list[] = { @@ -7500,9 +7030,9 @@ index 15e9bd180a1d2..0d70461d01e16 100644 { USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME }, -- -2.42.0 +2.43.0 -From 678999792d6b1c72e56c6b63fc3909b93db47b32 Mon Sep 17 00:00:00 2001 +From 039ed906cfe0578e78c40d786433e9b144c56785 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Thu, 5 Nov 2020 13:09:45 +0100 Subject: [PATCH] hid/multitouch: Turn off Type Cover keyboard backlight when @@ -7538,7 +7068,7 @@ Patchset: surface-typecover 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c -index 8db4ae05febc8..99a5efef45258 100644 +index fd5b0637dad6..0f49d8fa6333 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -34,7 +34,10 @@ @@ -7712,7 +7242,7 @@ index 8db4ae05febc8..99a5efef45258 100644 ret = sysfs_create_group(&hdev->dev.kobj, &mt_attribute_group); if (ret) -@@ -1842,6 +1932,7 @@ static void mt_remove(struct hid_device *hdev) +@@ -1840,6 +1930,7 @@ static void mt_remove(struct hid_device *hdev) { struct mt_device *td = hid_get_drvdata(hdev); @@ -7720,7 +7250,7 @@ index 8db4ae05febc8..99a5efef45258 100644 del_timer_sync(&td->release_timer); sysfs_remove_group(&hdev->dev.kobj, &mt_attribute_group); -@@ -2223,6 +2314,11 @@ static const struct hid_device_id mt_devices[] = { +@@ -2226,6 +2317,11 @@ static const struct hid_device_id mt_devices[] = { MT_USB_DEVICE(USB_VENDOR_ID_XIROKU, USB_DEVICE_ID_XIROKU_CSR2) }, @@ -7733,9 +7263,9 @@ index 8db4ae05febc8..99a5efef45258 100644 { .driver_data = MT_CLS_GOOGLE, HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE, -- -2.42.0 +2.43.0 -From 12427f01e38ebf653ccf44faefdcb92110c43c20 Mon Sep 17 00:00:00 2001 +From 97419c9be08b7b3e4311aa989fa8f91d0549a469 Mon Sep 17 00:00:00 2001 From: PJungkamp Date: Fri, 25 Feb 2022 12:04:25 +0100 Subject: [PATCH] hid/multitouch: Add support for surface pro type cover tablet @@ -7764,7 +7294,7 @@ Patchset: surface-typecover 1 file changed, 122 insertions(+), 26 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c -index 99a5efef45258..6ae43ea90bcd5 100644 +index 0f49d8fa6333..1fad1199775b 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -77,6 +77,7 @@ MODULE_LICENSE("GPL"); @@ -7974,7 +7504,7 @@ index 99a5efef45258..6ae43ea90bcd5 100644 hid_err(hdev, "couldn't find backlight field\n"); goto out; } -@@ -1909,13 +1975,24 @@ static int mt_suspend(struct hid_device *hdev, pm_message_t state) +@@ -1908,13 +1974,24 @@ static int mt_suspend(struct hid_device *hdev, pm_message_t state) static int mt_reset_resume(struct hid_device *hdev) { @@ -7999,7 +7529,7 @@ index 99a5efef45258..6ae43ea90bcd5 100644 /* Some Elan legacy devices require SET_IDLE to be set on resume. * It should be safe to send it to other devices too. * Tested on 3M, Stantum, Cypress, Zytronic, eGalax, and Elan panels. */ -@@ -1924,6 +2001,10 @@ static int mt_resume(struct hid_device *hdev) +@@ -1923,12 +2000,31 @@ static int mt_resume(struct hid_device *hdev) mt_set_modes(hdev, HID_LATENCY_NORMAL, true, true); @@ -8009,8 +7539,7 @@ index 99a5efef45258..6ae43ea90bcd5 100644 + return 0; } - #endif -@@ -1931,6 +2012,21 @@ static int mt_resume(struct hid_device *hdev) + static void mt_remove(struct hid_device *hdev) { struct mt_device *td = hid_get_drvdata(hdev); @@ -8033,9 +7562,9 @@ index 99a5efef45258..6ae43ea90bcd5 100644 unregister_pm_notifier(&td->pm_notifier); del_timer_sync(&td->release_timer); -- -2.42.0 +2.43.0 -From 151f9dba2f3d6d066d160128da109a0173a3ff4c Mon Sep 17 00:00:00 2001 +From 3854d7e575b1091a994c58ef8ee2a89f4efced12 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 19 Feb 2023 22:12:24 +0100 Subject: [PATCH] PCI: Add quirk to prevent calling shutdown mehtod @@ -8060,7 +7589,7 @@ Patchset: surface-shutdown 3 files changed, 40 insertions(+) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c -index 51ec9e7e784f0..40554890d7211 100644 +index 51ec9e7e784f..40554890d721 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -507,6 +507,9 @@ static void pci_device_shutdown(struct device *dev) @@ -8074,13 +7603,14 @@ index 51ec9e7e784f0..40554890d7211 100644 if (drv && drv->shutdown) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c -index e3e915329510f..666ff1e9b6d7b 100644 +index d55a3ffae4b8..e8614d8476fe 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c -@@ -6212,6 +6212,42 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5020, of_pci_make_dev_node); - DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5021, of_pci_make_dev_node); - DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REDHAT, 0x0005, of_pci_make_dev_node); - +@@ -6227,3 +6227,39 @@ static void pci_fixup_d3cold_delay_1sec(struct pci_dev *pdev) + pdev->d3cold_delay = 1000; + } + DECLARE_PCI_FIXUP_FINAL(0x5555, 0x0004, pci_fixup_d3cold_delay_1sec); ++ +static const struct dmi_system_id no_shutdown_dmi_table[] = { + /* + * Systems on which some devices should not be touched during shutdown. @@ -8116,13 +7646,8 @@ index e3e915329510f..666ff1e9b6d7b 100644 +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x462f, quirk_no_shutdown); // Thunderbolt 4 PCI Express Root Port +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x466d, quirk_no_shutdown); // Thunderbolt 4 NHI +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x46a8, quirk_no_shutdown); // GPU -+ - /* - * Devices known to require a longer delay before first config space access - * after reset recovery or resume from D3cold: - diff --git a/include/linux/pci.h b/include/linux/pci.h -index 8c7c2c3c6c652..0c223b04dff91 100644 +index bc80960fad7c..eec5704d1000 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -465,6 +465,7 @@ struct pci_dev { @@ -8134,9 +7659,9 @@ index 8c7c2c3c6c652..0c223b04dff91 100644 atomic_t enable_cnt; /* pci_enable_device has been called */ -- -2.42.0 +2.43.0 -From 912e956823b3cadd7203d3ce94418d162ff701be Mon Sep 17 00:00:00 2001 +From d9ddc9ae99c11ebc912a1a8dde46d783e873508b Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 12 Mar 2023 01:41:57 +0100 Subject: [PATCH] platform/surface: gpe: Add support for Surface Pro 9 @@ -8150,7 +7675,7 @@ Patchset: surface-gpe 1 file changed, 17 insertions(+) diff --git a/drivers/platform/surface/surface_gpe.c b/drivers/platform/surface/surface_gpe.c -index c219b840d491a..69c4352e8406b 100644 +index 62fd4004db31..103fc4468262 100644 --- a/drivers/platform/surface/surface_gpe.c +++ b/drivers/platform/surface/surface_gpe.c @@ -41,6 +41,11 @@ static const struct property_entry lid_device_props_l4F[] = { @@ -8185,9 +7710,9 @@ index c219b840d491a..69c4352e8406b 100644 .ident = "Surface Book 1", .matches = { -- -2.42.0 +2.43.0 -From df083025f8c63824279c19de8ec3339440f819c9 Mon Sep 17 00:00:00 2001 +From 5fdcd780891777ef73585adf610593e6e097e6d6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 10 Oct 2021 20:56:57 +0200 Subject: [PATCH] ACPI: delay enumeration of devices with a _DEP pointing to an @@ -8247,10 +7772,10 @@ Patchset: cameras 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c -index 691d4b7686ee7..9283217689279 100644 +index 02bb2cce423f..b123138d3dc0 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c -@@ -2108,6 +2108,9 @@ static acpi_status acpi_bus_check_add_2(acpi_handle handle, u32 lvl_not_used, +@@ -2114,6 +2114,9 @@ static acpi_status acpi_bus_check_add_2(acpi_handle handle, u32 lvl_not_used, static void acpi_default_enumeration(struct acpi_device *device) { @@ -8261,9 +7786,9 @@ index 691d4b7686ee7..9283217689279 100644 * Do not enumerate devices with enumeration_by_parent flag set as * they will be enumerated by their respective parents. -- -2.42.0 +2.43.0 -From 87650a001d3068a8b614fd688e21bb87c2d3a3e6 Mon Sep 17 00:00:00 2001 +From eb19f5e13f14a8973920d406125f205945558fb9 Mon Sep 17 00:00:00 2001 From: zouxiaoh Date: Fri, 25 Jun 2021 08:52:59 +0800 Subject: [PATCH] iommu: intel-ipu: use IOMMU passthrough mode for Intel IPUs @@ -8289,7 +7814,7 @@ Patchset: cameras 1 file changed, 30 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c -index 5a627e081797c..da866ac6b30ba 100644 +index cc6569613255..8a532d32efdd 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -38,6 +38,12 @@ @@ -8305,7 +7830,7 @@ index 5a627e081797c..da866ac6b30ba 100644 #define IS_IPTS(pdev) ((pdev)->vendor == PCI_VENDOR_ID_INTEL && \ ((pdev)->device == 0x9d3e)) #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) -@@ -295,12 +301,14 @@ EXPORT_SYMBOL_GPL(intel_iommu_enabled); +@@ -294,12 +300,14 @@ EXPORT_SYMBOL_GPL(intel_iommu_enabled); static int dmar_map_gfx = 1; static int dmar_map_ipts = 1; @@ -8320,7 +7845,7 @@ index 5a627e081797c..da866ac6b30ba 100644 #define IDENTMAP_IPTS 16 const struct iommu_ops intel_iommu_ops; -@@ -2547,6 +2555,9 @@ static int device_def_domain_type(struct device *dev) +@@ -2553,6 +2561,9 @@ static int device_def_domain_type(struct device *dev) if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) return IOMMU_DOMAIN_IDENTITY; @@ -8330,7 +7855,7 @@ index 5a627e081797c..da866ac6b30ba 100644 if ((iommu_identity_mapping & IDENTMAP_IPTS) && IS_IPTS(pdev)) return IOMMU_DOMAIN_IDENTITY; } -@@ -2856,6 +2867,9 @@ static int __init init_dmars(void) +@@ -2862,6 +2873,9 @@ static int __init init_dmars(void) if (!dmar_map_gfx) iommu_identity_mapping |= IDENTMAP_GFX; @@ -8340,7 +7865,7 @@ index 5a627e081797c..da866ac6b30ba 100644 if (!dmar_map_ipts) iommu_identity_mapping |= IDENTMAP_IPTS; -@@ -4838,6 +4852,18 @@ static void quirk_iommu_igfx(struct pci_dev *dev) +@@ -4987,6 +5001,18 @@ static void quirk_iommu_igfx(struct pci_dev *dev) dmar_map_gfx = 0; } @@ -8359,7 +7884,7 @@ index 5a627e081797c..da866ac6b30ba 100644 static void quirk_iommu_ipts(struct pci_dev *dev) { if (!IS_IPTS(dev)) -@@ -4849,6 +4875,7 @@ static void quirk_iommu_ipts(struct pci_dev *dev) +@@ -4998,6 +5024,7 @@ static void quirk_iommu_ipts(struct pci_dev *dev) pci_info(dev, "Passthrough IOMMU for IPTS\n"); dmar_map_ipts = 0; } @@ -8367,7 +7892,7 @@ index 5a627e081797c..da866ac6b30ba 100644 /* G4x/GM45 integrated gfx dmar support is totally busted. */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx); -@@ -4884,6 +4911,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); +@@ -5033,6 +5060,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx); @@ -8378,9 +7903,9 @@ index 5a627e081797c..da866ac6b30ba 100644 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9D3E, quirk_iommu_ipts); -- -2.42.0 +2.43.0 -From 76fec27d978bf7708a60862d4aab2e1fe7ec3f27 Mon Sep 17 00:00:00 2001 +From 1b16e7cbcbf699e4d841424568e0de1cee048d93 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Sun, 10 Oct 2021 20:57:02 +0200 Subject: [PATCH] platform/x86: int3472: Enable I2c daisy chain @@ -8397,7 +7922,7 @@ Patchset: cameras 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/intel/int3472/tps68470.c b/drivers/platform/x86/intel/int3472/tps68470.c -index 1e107fd49f828..e3e1696e7f0ee 100644 +index 1e107fd49f82..e3e1696e7f0e 100644 --- a/drivers/platform/x86/intel/int3472/tps68470.c +++ b/drivers/platform/x86/intel/int3472/tps68470.c @@ -46,6 +46,13 @@ static int tps68470_chip_init(struct device *dev, struct regmap *regmap) @@ -8415,9 +7940,9 @@ index 1e107fd49f828..e3e1696e7f0ee 100644 return 0; -- -2.42.0 +2.43.0 -From 232a0f88ecc21141c6f0d94cc74eb63c7869c217 Mon Sep 17 00:00:00 2001 +From a856e6ec1aa1ce0e88abdd423a151f2bbddb8134 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Thu, 2 Mar 2023 12:59:39 +0000 Subject: [PATCH] platform/x86: int3472: Remap reset GPIO for INT347E @@ -8439,20 +7964,19 @@ Patchset: cameras 1 file changed, 14 insertions(+) diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c -index e33c2d75975cf..c0c90ae66b705 100644 +index 07b302e09340..1d3097bc7e48 100644 --- a/drivers/platform/x86/intel/int3472/discrete.c +++ b/drivers/platform/x86/intel/int3472/discrete.c -@@ -57,6 +57,9 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347 +@@ -83,12 +83,26 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347 const char *func, u32 polarity) { - char *path = agpio->resource_source.string_ptr; + int ret; + const struct acpi_device_id ov7251_ids[] = { + { "INT347E" }, + }; - struct gpiod_lookup *table_entry; - struct acpi_device *adev; - acpi_handle handle; -@@ -67,6 +70,17 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347 + + if (int3472->n_sensor_gpios >= INT3472_MAX_SENSOR_GPIOS) { + dev_warn(int3472->dev, "Too many GPIOs mapped\n"); return -EINVAL; } @@ -8467,13 +7991,13 @@ index e33c2d75975cf..c0c90ae66b705 100644 + polarity = GPIO_ACTIVE_HIGH; + } + - status = acpi_get_handle(NULL, path, &handle); - if (ACPI_FAILURE(status)) - return -EINVAL; + ret = skl_int3472_fill_gpiod_lookup(&int3472->gpios.table[int3472->n_sensor_gpios], + agpio, func, polarity); + if (ret) -- -2.42.0 +2.43.0 -From 0cfd5c05a675388bbb2edfa87423dc5ad931cc97 Mon Sep 17 00:00:00 2001 +From a7a10c4493fe0a381f12fd6a20a024e7797bd37c Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Tue, 21 Mar 2023 13:45:26 +0000 Subject: [PATCH] media: i2c: Clarify that gain is Analogue gain in OV7251 @@ -8488,7 +8012,7 @@ Patchset: cameras 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/i2c/ov7251.c b/drivers/media/i2c/ov7251.c -index 675fb37a6feae..43b30db08c9e4 100644 +index 6582cc0e2384..fd0796b6e07e 100644 --- a/drivers/media/i2c/ov7251.c +++ b/drivers/media/i2c/ov7251.c @@ -1051,7 +1051,7 @@ static int ov7251_s_ctrl(struct v4l2_ctrl *ctrl) @@ -8500,7 +8024,7 @@ index 675fb37a6feae..43b30db08c9e4 100644 ret = ov7251_set_gain(ov7251, ctrl->val); break; case V4L2_CID_TEST_PATTERN: -@@ -1551,7 +1551,7 @@ static int ov7251_init_ctrls(struct ov7251 *ov7251) +@@ -1553,7 +1553,7 @@ static int ov7251_init_ctrls(struct ov7251 *ov7251) ov7251->exposure = v4l2_ctrl_new_std(&ov7251->ctrls, &ov7251_ctrl_ops, V4L2_CID_EXPOSURE, 1, 32, 1, 32); ov7251->gain = v4l2_ctrl_new_std(&ov7251->ctrls, &ov7251_ctrl_ops, @@ -8510,9 +8034,9 @@ index 675fb37a6feae..43b30db08c9e4 100644 V4L2_CID_TEST_PATTERN, ARRAY_SIZE(ov7251_test_pattern_menu) - 1, -- -2.42.0 +2.43.0 -From 18fa273c21f1dd86160f18242a81947392272443 Mon Sep 17 00:00:00 2001 +From e96fa67c9172fac9aa6e68199cf7e29d074c21e6 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Wed, 22 Mar 2023 11:01:42 +0000 Subject: [PATCH] media: v4l2-core: Acquire privacy led in @@ -8531,7 +8055,7 @@ Patchset: cameras 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-async.c b/drivers/media/v4l2-core/v4l2-async.c -index 091e8cf4114ba..cca10f5355844 100644 +index 8cfd593d293d..c32f0d1b29d4 100644 --- a/drivers/media/v4l2-core/v4l2-async.c +++ b/drivers/media/v4l2-core/v4l2-async.c @@ -796,6 +796,10 @@ int v4l2_async_register_subdev(struct v4l2_subdev *sd) @@ -8546,7 +8070,7 @@ index 091e8cf4114ba..cca10f5355844 100644 * No reference taken. The reference is held by the device (struct * v4l2_subdev.dev), and async sub-device does not exist independently diff --git a/drivers/media/v4l2-core/v4l2-fwnode.c b/drivers/media/v4l2-core/v4l2-fwnode.c -index 7f181fbbb1407..1c0347de4e216 100644 +index 7f181fbbb140..1c0347de4e21 100644 --- a/drivers/media/v4l2-core/v4l2-fwnode.c +++ b/drivers/media/v4l2-core/v4l2-fwnode.c @@ -1217,10 +1217,6 @@ int v4l2_async_register_subdev_sensor(struct v4l2_subdev *sd) @@ -8561,9 +8085,9 @@ index 7f181fbbb1407..1c0347de4e216 100644 if (ret < 0) goto out_cleanup; -- -2.42.0 +2.43.0 -From 07e01113f2641afab78b155d42e9d9d399a9e164 Mon Sep 17 00:00:00 2001 +From 68dac72bec1c99890d35d6bfd1b1f66e0cf8789c Mon Sep 17 00:00:00 2001 From: Kate Hsuan Date: Tue, 21 Mar 2023 23:37:16 +0800 Subject: [PATCH] platform: x86: int3472: Add MFD cell for tps68470 LED @@ -8579,7 +8103,7 @@ Patchset: cameras 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/int3472/tps68470.c b/drivers/platform/x86/intel/int3472/tps68470.c -index e3e1696e7f0ee..423dc555093f7 100644 +index e3e1696e7f0e..423dc555093f 100644 --- a/drivers/platform/x86/intel/int3472/tps68470.c +++ b/drivers/platform/x86/intel/int3472/tps68470.c @@ -17,7 +17,7 @@ @@ -8602,9 +8126,9 @@ index e3e1696e7f0ee..423dc555093f7 100644 for (i = 0; i < board_data->n_gpiod_lookups; i++) gpiod_add_lookup_table(board_data->tps68470_gpio_lookup_tables[i]); -- -2.42.0 +2.43.0 -From a704bf822539e09b00015110b48bc997692c92ce Mon Sep 17 00:00:00 2001 +From 3f446f24aecaba808693f0173e28972e651fa87d Mon Sep 17 00:00:00 2001 From: Kate Hsuan Date: Tue, 21 Mar 2023 23:37:17 +0800 Subject: [PATCH] include: mfd: tps68470: Add masks for LEDA and LEDB @@ -8622,7 +8146,7 @@ Patchset: cameras 1 file changed, 5 insertions(+) diff --git a/include/linux/mfd/tps68470.h b/include/linux/mfd/tps68470.h -index 7807fa329db00..2d2abb25b944f 100644 +index 7807fa329db0..2d2abb25b944 100644 --- a/include/linux/mfd/tps68470.h +++ b/include/linux/mfd/tps68470.h @@ -34,6 +34,7 @@ @@ -8643,9 +8167,9 @@ index 7807fa329db00..2d2abb25b944f 100644 + #endif /* __LINUX_MFD_TPS68470_H */ -- -2.42.0 +2.43.0 -From c8a6ce96be3a4dca7e9e99613b28494d10b4ade0 Mon Sep 17 00:00:00 2001 +From a0fe4ec438c5edb9f4360c8a2a5f5269d05c44ef Mon Sep 17 00:00:00 2001 From: Kate Hsuan Date: Tue, 21 Mar 2023 23:37:18 +0800 Subject: [PATCH] leds: tps68470: Add LED control for tps68470 @@ -8668,10 +8192,10 @@ Patchset: cameras create mode 100644 drivers/leds/leds-tps68470.c diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig -index b92208eccdea9..312c0c21cc5ef 100644 +index a3a9ac5b5338..0bc6845b5d29 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig -@@ -873,6 +873,18 @@ config LEDS_TPS6105X +@@ -875,6 +875,18 @@ config LEDS_TPS6105X It is a single boost converter primarily for white LEDs and audio amplifiers. @@ -8691,7 +8215,7 @@ index b92208eccdea9..312c0c21cc5ef 100644 tristate "LED support for SGI Octane machines" depends on LEDS_CLASS diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile -index d7348e8bc019a..10caea4e7c614 100644 +index d7348e8bc019..10caea4e7c61 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -84,6 +84,7 @@ obj-$(CONFIG_LEDS_TCA6507) += leds-tca6507.o @@ -8704,7 +8228,7 @@ index d7348e8bc019a..10caea4e7c614 100644 obj-$(CONFIG_LEDS_WM8350) += leds-wm8350.o diff --git a/drivers/leds/leds-tps68470.c b/drivers/leds/leds-tps68470.c new file mode 100644 -index 0000000000000..35aeb5db89c8f +index 000000000000..35aeb5db89c8 --- /dev/null +++ b/drivers/leds/leds-tps68470.c @@ -0,0 +1,185 @@ @@ -8894,9 +8418,9 @@ index 0000000000000..35aeb5db89c8f +MODULE_DESCRIPTION("LED driver for TPS68470 PMIC"); +MODULE_LICENSE("GPL v2"); -- -2.42.0 +2.43.0 -From 82252c3764ecee6c09218077759072f15001f9ee Mon Sep 17 00:00:00 2001 +From 04069751b144350632ec45b5b25c2cc01d5f34ef Mon Sep 17 00:00:00 2001 From: Sachi King Date: Sat, 29 May 2021 17:47:38 +1000 Subject: [PATCH] ACPI: Add quirk for Surface Laptop 4 AMD missing irq 7 @@ -8919,7 +8443,7 @@ Patchset: amd-gpio 1 file changed, 17 insertions(+) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c -index c55c0ef47a187..f29740cf89ff6 100644 +index 85a3ce2a3666..2c0e04a3a697 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -22,6 +22,7 @@ @@ -8930,7 +8454,7 @@ index c55c0ef47a187..f29740cf89ff6 100644 #include #include -@@ -1255,6 +1256,17 @@ static void __init mp_config_acpi_legacy_irqs(void) +@@ -1251,6 +1252,17 @@ static void __init mp_config_acpi_legacy_irqs(void) } } @@ -8948,7 +8472,7 @@ index c55c0ef47a187..f29740cf89ff6 100644 /* * Parse IOAPIC related entries in MADT * returns 0 on success, < 0 on error -@@ -1310,6 +1322,11 @@ static int __init acpi_parse_madt_ioapic_entries(void) +@@ -1306,6 +1318,11 @@ static int __init acpi_parse_madt_ioapic_entries(void) acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0, acpi_gbl_FADT.sci_interrupt); @@ -8961,9 +8485,9 @@ index c55c0ef47a187..f29740cf89ff6 100644 mp_config_acpi_legacy_irqs(); -- -2.42.0 +2.43.0 -From 52e3f50633128a93bf99ca5c97f98929da66a9ed Mon Sep 17 00:00:00 2001 +From 8e2f2b852776fca1dd0ab8728be2303051cb19e1 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Thu, 3 Jun 2021 14:04:26 +0200 Subject: [PATCH] ACPI: Add AMD 13" Surface Laptop 4 model to irq 7 override @@ -8978,10 +8502,10 @@ Patchset: amd-gpio 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c -index f29740cf89ff6..247d2a8bcdf4b 100644 +index 2c0e04a3a697..b0e1dab3d2ec 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c -@@ -1258,12 +1258,19 @@ static void __init mp_config_acpi_legacy_irqs(void) +@@ -1254,12 +1254,19 @@ static void __init mp_config_acpi_legacy_irqs(void) static const struct dmi_system_id surface_quirk[] __initconst = { { @@ -9003,9 +8527,9 @@ index f29740cf89ff6..247d2a8bcdf4b 100644 }; -- -2.42.0 +2.43.0 -From 8cd23b1bb3a8b7a3ef7cec2c37e7e46e6397a858 Mon Sep 17 00:00:00 2001 +From 4e36132e272de3d84833b799be56c2b460db08b6 Mon Sep 17 00:00:00 2001 From: "Bart Groeneveld | GPX Solutions B.V" Date: Mon, 5 Dec 2022 16:08:46 +0100 Subject: [PATCH] acpi: allow usage of acpi_tad on HW-reduced platforms @@ -9028,7 +8552,7 @@ Patchset: rtc 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/acpi_tad.c b/drivers/acpi/acpi_tad.c -index 33c3b16af556b..900445d06623d 100644 +index 33c3b16af556..900445d06623 100644 --- a/drivers/acpi/acpi_tad.c +++ b/drivers/acpi/acpi_tad.c @@ -432,6 +432,14 @@ static ssize_t caps_show(struct device *dev, struct device_attribute *attr, @@ -9113,5 +8637,5 @@ index 33c3b16af556b..900445d06623d 100644 ret = sysfs_create_group(&dev->kobj, &acpi_tad_dc_attr_group); if (ret) -- -2.42.0 +2.43.0 diff --git a/SOURCES/mod-internal.list b/SOURCES/mod-internal.list index 1d535d8..a63d55f 100644 --- a/SOURCES/mod-internal.list +++ b/SOURCES/mod-internal.list @@ -1,5 +1,7 @@ bitfield_kunit checksum_kunit +cfg80211-tests +clk-fractional-divider_test clk-gate_test clk_test cmdline_kunit @@ -22,10 +24,12 @@ drm_mm_test drm_modes_test drm_plane_helper_test drm_probe_helper_test +drm_exec_test drm_rect_test ext4-inode-test fat_test fortify_kunit +gso_test gss_krb5_test handshake-test hashtable_test @@ -42,6 +46,8 @@ lib_test list-test locktorture mac80211_hwsim +mac80211-tests +mean_and_variance_test memcpy_kunit mptcp_crypto_test mptcp_token_test @@ -49,6 +55,7 @@ mtty netdevsim overflow_kunit pktgen +property-entry-test rational-test rcuscale rcutorture @@ -59,6 +66,7 @@ rocker scftorture siphash_kunit slub_kunit +snd-hda-cirrus-scodec-test soc-topology-test soc-utils-test stackinit_kunit diff --git a/SOURCES/nouveau-gsp-default.patch b/SOURCES/nouveau-gsp-default.patch new file mode 100644 index 0000000..6e48562 --- /dev/null +++ b/SOURCES/nouveau-gsp-default.patch @@ -0,0 +1,23 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Jan200101 +Date: Thu, 8 Feb 2024 00:14:36 +0100 +Subject: [PATCH] enable Nouveau GSP by default on all platforms + +Signed-off-by: Jan200101 +--- + drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +index 9ee58e2a0eb2..e7864dedf01b 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +@@ -2296,7 +2296,7 @@ r535_gsp_load(struct nvkm_gsp *gsp, int ver, const struct nvkm_gsp_fwif *fwif) + struct nvkm_subdev *subdev = &gsp->subdev; + int ret; + +- if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", fwif->enable)) ++ if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", true)) + return -EINVAL; + + if ((ret = r535_gsp_load_fw(gsp, "gsp", fwif->ver, &gsp->fws.rm)) || diff --git a/SOURCES/patch-6.6-redhat.patch b/SOURCES/patch-6.6-redhat.patch deleted file mode 100644 index 7959094..0000000 --- a/SOURCES/patch-6.6-redhat.patch +++ /dev/null @@ -1,2292 +0,0 @@ - Makefile | 20 +- - arch/s390/include/asm/ipl.h | 1 + - arch/s390/kernel/ipl.c | 5 + - arch/s390/kernel/setup.c | 4 + - arch/x86/kernel/setup.c | 22 +- - drivers/acpi/apei/hest.c | 8 + - drivers/acpi/irq.c | 17 +- - drivers/acpi/scan.c | 9 + - drivers/ata/libahci.c | 18 ++ - drivers/char/ipmi/ipmi_dmi.c | 15 + - drivers/char/ipmi/ipmi_msghandler.c | 16 +- - drivers/firmware/efi/Makefile | 1 + - drivers/firmware/efi/efi.c | 124 +++++-- - drivers/firmware/efi/secureboot.c | 38 +++ - drivers/firmware/sysfb.c | 18 +- - drivers/hid/hid-rmi.c | 66 ---- - drivers/hwtracing/coresight/coresight-etm4x-core.c | 19 ++ - drivers/input/rmi4/rmi_driver.c | 124 ++++--- - drivers/iommu/iommu.c | 22 ++ - drivers/net/wireless/ath/ath10k/wmi-tlv.c | 4 + - drivers/pci/quirks.c | 24 ++ - drivers/rtc/rtc-cmos.c | 18 +- - drivers/scsi/sd.c | 10 + - drivers/usb/core/hub.c | 7 + - include/linux/efi.h | 22 +- - include/linux/lsm_hook_defs.h | 2 + - include/linux/module.h | 1 + - include/linux/rh_kabi.h | 172 ++++++++++ - include/linux/rmi.h | 1 + - include/linux/security.h | 5 + - kernel/module/main.c | 2 + - kernel/module/signing.c | 9 +- - scripts/mod/modpost.c | 8 + - scripts/tags.sh | 2 + - security/integrity/platform_certs/load_uefi.c | 6 +- - security/lockdown/Kconfig | 13 + - security/lockdown/lockdown.c | 1 + - security/security.c | 12 + - sound/pci/hda/cs35l41_hda.c | 106 +++++- - sound/pci/hda/cs35l41_hda.h | 8 +- - sound/pci/hda/cs35l41_hda_property.c | 355 +++++++++++++++++++-- - sound/pci/hda/hda_component.h | 4 + - sound/pci/hda/patch_realtek.c | 34 +- - 43 files changed, 1132 insertions(+), 241 deletions(-) - -diff --git a/Makefile b/Makefile -index bad16eda67e2..cfd8719528dd 100644 ---- a/Makefile -+++ b/Makefile -@@ -22,6 +22,18 @@ $(if $(filter __%, $(MAKECMDGOALS)), \ - PHONY := __all - __all: - -+# Set RHEL variables -+# Note that this ifdef'ery is required to handle when building with -+# the O= mechanism (relocate the object file results) due to upstream -+# commit 67d7c302 which broke our RHEL include file -+ifneq ($(realpath source),) -+include $(realpath source)/Makefile.rhelver -+else -+ifneq ($(realpath Makefile.rhelver),) -+include Makefile.rhelver -+endif -+endif -+ - # We are using a recursive build, so we need to do a little thinking - # to get the ordering right. - # -@@ -1250,7 +1262,13 @@ define filechk_version.h - ((c) > 255 ? 255 : (c)))'; \ - echo \#define LINUX_VERSION_MAJOR $(VERSION); \ - echo \#define LINUX_VERSION_PATCHLEVEL $(PATCHLEVEL); \ -- echo \#define LINUX_VERSION_SUBLEVEL $(SUBLEVEL) -+ echo \#define LINUX_VERSION_SUBLEVEL $(SUBLEVEL); \ -+ echo '#define RHEL_MAJOR $(RHEL_MAJOR)'; \ -+ echo '#define RHEL_MINOR $(RHEL_MINOR)'; \ -+ echo '#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b))'; \ -+ echo '#define RHEL_RELEASE_CODE \ -+ $(shell expr $(RHEL_MAJOR) \* 256 + $(RHEL_MINOR))'; \ -+ echo '#define RHEL_RELEASE "$(RHEL_RELEASE)"' - endef - - $(version_h): PATCHLEVEL := $(or $(PATCHLEVEL), 0) -diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h -index b0d00032479d..afb9544fb007 100644 ---- a/arch/s390/include/asm/ipl.h -+++ b/arch/s390/include/asm/ipl.h -@@ -139,6 +139,7 @@ int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf, - unsigned char flags, unsigned short cert); - int ipl_report_add_certificate(struct ipl_report *report, void *key, - unsigned long addr, unsigned long len); -+bool ipl_get_secureboot(void); - - /* - * DIAG 308 support -diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c -index 8d0b95c17312..118ae555a179 100644 ---- a/arch/s390/kernel/ipl.c -+++ b/arch/s390/kernel/ipl.c -@@ -2520,3 +2520,8 @@ int ipl_report_free(struct ipl_report *report) - } - - #endif -+ -+bool ipl_get_secureboot(void) -+{ -+ return !!ipl_secure_flag; -+} -diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c -index de6ad0fb2328..5cc2758be027 100644 ---- a/arch/s390/kernel/setup.c -+++ b/arch/s390/kernel/setup.c -@@ -49,6 +49,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -914,6 +915,9 @@ void __init setup_arch(char **cmdline_p) - - log_component_list(); - -+ if (ipl_get_secureboot()) -+ security_lock_kernel_down("Secure IPL mode", LOCKDOWN_INTEGRITY_MAX); -+ - /* Have one command line that is parsed and saved in /proc/cmdline */ - /* boot_command_line has been already set up in early.c */ - *cmdline_p = boot_command_line; -diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c -index b098b1fa2470..a159419e60df 100644 ---- a/arch/x86/kernel/setup.c -+++ b/arch/x86/kernel/setup.c -@@ -21,6 +21,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1028,6 +1029,13 @@ void __init setup_arch(char **cmdline_p) - if (efi_enabled(EFI_BOOT)) - efi_init(); - -+ efi_set_secure_boot(boot_params.secure_boot); -+ -+#ifdef CONFIG_LOCK_DOWN_IN_EFI_SECURE_BOOT -+ if (efi_enabled(EFI_SECURE_BOOT)) -+ security_lock_kernel_down("EFI Secure Boot mode", LOCKDOWN_INTEGRITY_MAX); -+#endif -+ - reserve_ibft_region(); - dmi_setup(); - -@@ -1189,19 +1197,7 @@ void __init setup_arch(char **cmdline_p) - /* Allocate bigger log buffer */ - setup_log_buf(1); - -- if (efi_enabled(EFI_BOOT)) { -- switch (boot_params.secure_boot) { -- case efi_secureboot_mode_disabled: -- pr_info("Secure boot disabled\n"); -- break; -- case efi_secureboot_mode_enabled: -- pr_info("Secure boot enabled\n"); -- break; -- default: -- pr_info("Secure boot could not be determined\n"); -- break; -- } -- } -+ efi_set_secure_boot(boot_params.secure_boot); - - reserve_initrd(); - -diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c -index 6aef1ee5e1bd..8f146b1b4972 100644 ---- a/drivers/acpi/apei/hest.c -+++ b/drivers/acpi/apei/hest.c -@@ -96,6 +96,14 @@ static int apei_hest_parse(apei_hest_func_t func, void *data) - if (hest_disable || !hest_tab) - return -EINVAL; - -+#ifdef CONFIG_ARM64 -+ /* Ignore broken firmware */ -+ if (!strncmp(hest_tab->header.oem_id, "HPE ", 6) && -+ !strncmp(hest_tab->header.oem_table_id, "ProLiant", 8) && -+ MIDR_IMPLEMENTOR(read_cpuid_id()) == ARM_CPU_IMP_APM) -+ return -EINVAL; -+#endif -+ - hest_hdr = (struct acpi_hest_header *)(hest_tab + 1); - for (i = 0; i < hest_tab->error_source_count; i++) { - len = hest_esrc_len(hest_hdr); -diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c -index 1687483ff319..390b67f19181 100644 ---- a/drivers/acpi/irq.c -+++ b/drivers/acpi/irq.c -@@ -143,6 +143,7 @@ struct acpi_irq_parse_one_ctx { - unsigned int index; - unsigned long *res_flags; - struct irq_fwspec *fwspec; -+ bool skip_producer_check; - }; - - /** -@@ -216,7 +217,8 @@ static acpi_status acpi_irq_parse_one_cb(struct acpi_resource *ares, - return AE_CTRL_TERMINATE; - case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: - eirq = &ares->data.extended_irq; -- if (eirq->producer_consumer == ACPI_PRODUCER) -+ if (!ctx->skip_producer_check && -+ eirq->producer_consumer == ACPI_PRODUCER) - return AE_OK; - if (ctx->index >= eirq->interrupt_count) { - ctx->index -= eirq->interrupt_count; -@@ -252,8 +254,19 @@ static acpi_status acpi_irq_parse_one_cb(struct acpi_resource *ares, - static int acpi_irq_parse_one(acpi_handle handle, unsigned int index, - struct irq_fwspec *fwspec, unsigned long *flags) - { -- struct acpi_irq_parse_one_ctx ctx = { -EINVAL, index, flags, fwspec }; -+ struct acpi_irq_parse_one_ctx ctx = { -EINVAL, index, flags, fwspec, false }; - -+ /* -+ * Firmware on arm64-based HPE m400 platform incorrectly marks -+ * its UART interrupt as ACPI_PRODUCER rather than ACPI_CONSUMER. -+ * Don't do the producer/consumer check for that device. -+ */ -+ if (IS_ENABLED(CONFIG_ARM64)) { -+ struct acpi_device *adev = acpi_get_acpi_dev(handle); -+ -+ if (adev && !strcmp(acpi_device_hid(adev), "APMC0D08")) -+ ctx.skip_producer_check = true; -+ } - acpi_walk_resources(handle, METHOD_NAME__CRS, acpi_irq_parse_one_cb, &ctx); - return ctx.rc; - } -diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c -index 1d249d0f61ae..f064f4c6405a 100644 ---- a/drivers/acpi/scan.c -+++ b/drivers/acpi/scan.c -@@ -1757,6 +1757,15 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) - if (!acpi_match_device_ids(device, ignore_serial_bus_ids)) - return false; - -+ /* -+ * Firmware on some arm64 X-Gene platforms will make the UART -+ * device appear as both a UART and a slave of that UART. Just -+ * bail out here for X-Gene UARTs. -+ */ -+ if (IS_ENABLED(CONFIG_ARM64) && -+ !strcmp(acpi_device_hid(device), "APMC0D08")) -+ return false; -+ - INIT_LIST_HEAD(&resource_list); - acpi_dev_get_resources(device, &resource_list, - acpi_check_serial_bus_slave, -diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c -index f1263364fa97..24ac410f4366 100644 ---- a/drivers/ata/libahci.c -+++ b/drivers/ata/libahci.c -@@ -729,6 +729,24 @@ int ahci_stop_engine(struct ata_port *ap) - tmp &= ~PORT_CMD_START; - writel(tmp, port_mmio + PORT_CMD); - -+#ifdef CONFIG_ARM64 -+ /* Rev Ax of Cavium CN99XX needs a hack for port stop */ -+ if (dev_is_pci(ap->host->dev) && -+ to_pci_dev(ap->host->dev)->vendor == 0x14e4 && -+ to_pci_dev(ap->host->dev)->device == 0x9027 && -+ midr_is_cpu_model_range(read_cpuid_id(), -+ MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN), -+ MIDR_CPU_VAR_REV(0, 0), -+ MIDR_CPU_VAR_REV(0, MIDR_REVISION_MASK))) { -+ tmp = readl(hpriv->mmio + 0x8000); -+ udelay(100); -+ writel(tmp | (1 << 26), hpriv->mmio + 0x8000); -+ udelay(100); -+ writel(tmp & ~(1 << 26), hpriv->mmio + 0x8000); -+ dev_warn(ap->host->dev, "CN99XX SATA reset workaround applied\n"); -+ } -+#endif -+ - /* wait for engine to stop. This could be as long as 500 msec */ - tmp = ata_wait_register(ap, port_mmio + PORT_CMD, - PORT_CMD_LIST_ON, PORT_CMD_LIST_ON, 1, 500); -diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c -index bbf7029e224b..cf7faa970dd6 100644 ---- a/drivers/char/ipmi/ipmi_dmi.c -+++ b/drivers/char/ipmi/ipmi_dmi.c -@@ -215,6 +215,21 @@ static int __init scan_for_dmi_ipmi(void) - { - const struct dmi_device *dev = NULL; - -+#ifdef CONFIG_ARM64 -+ /* RHEL-only -+ * If this is ARM-based HPE m400, return now, because that platform -+ * reports the host-side ipmi address as intel port-io space, which -+ * does not exist in the ARM architecture. -+ */ -+ const char *dmistr = dmi_get_system_info(DMI_PRODUCT_NAME); -+ -+ if (dmistr && (strcmp("ProLiant m400 Server", dmistr) == 0)) { -+ pr_debug("%s does not support host ipmi\n", dmistr); -+ return 0; -+ } -+ /* END RHEL-only */ -+#endif -+ - while ((dev = dmi_find_device(DMI_DEV_TYPE_IPMI, NULL, dev))) - dmi_decode_ipmi((const struct dmi_header *) dev->device_data); - -diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c -index 186f1fee7534..93e3a76596ff 100644 ---- a/drivers/char/ipmi/ipmi_msghandler.c -+++ b/drivers/char/ipmi/ipmi_msghandler.c -@@ -35,6 +35,7 @@ - #include - #include - #include -+#include - #include - - #define IPMI_DRIVER_VERSION "39.2" -@@ -5516,8 +5517,21 @@ static int __init ipmi_init_msghandler_mod(void) - { - int rv; - -- pr_info("version " IPMI_DRIVER_VERSION "\n"); -+#ifdef CONFIG_ARM64 -+ /* RHEL-only -+ * If this is ARM-based HPE m400, return now, because that platform -+ * reports the host-side ipmi address as intel port-io space, which -+ * does not exist in the ARM architecture. -+ */ -+ const char *dmistr = dmi_get_system_info(DMI_PRODUCT_NAME); - -+ if (dmistr && (strcmp("ProLiant m400 Server", dmistr) == 0)) { -+ pr_debug("%s does not support host ipmi\n", dmistr); -+ return -ENOSYS; -+ } -+ /* END RHEL-only */ -+#endif -+ pr_info("version " IPMI_DRIVER_VERSION "\n"); - mutex_lock(&ipmi_interfaces_mutex); - rv = ipmi_register_driver(); - mutex_unlock(&ipmi_interfaces_mutex); -diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile -index e489fefd23da..f2dfae764fb5 100644 ---- a/drivers/firmware/efi/Makefile -+++ b/drivers/firmware/efi/Makefile -@@ -25,6 +25,7 @@ subdir-$(CONFIG_EFI_STUB) += libstub - obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o - obj-$(CONFIG_EFI_TEST) += test/ - obj-$(CONFIG_EFI_DEV_PATH_PARSER) += dev-path-parser.o -+obj-$(CONFIG_EFI) += secureboot.o - obj-$(CONFIG_APPLE_PROPERTIES) += apple-properties.o - obj-$(CONFIG_EFI_RCI2_TABLE) += rci2-table.o - obj-$(CONFIG_EFI_EMBEDDED_FIRMWARE) += embedded-firmware.o -diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c -index 1974f0ad32ba..6ba6391494ec 100644 ---- a/drivers/firmware/efi/efi.c -+++ b/drivers/firmware/efi/efi.c -@@ -32,6 +32,7 @@ - #include - #include - #include -+#include - - #include - -@@ -983,40 +984,101 @@ int efi_mem_type(unsigned long phys_addr) - } - #endif - -+struct efi_error_code { -+ efi_status_t status; -+ int errno; -+ const char *description; -+}; -+ -+static const struct efi_error_code efi_error_codes[] = { -+ { EFI_SUCCESS, 0, "Success"}, -+#if 0 -+ { EFI_LOAD_ERROR, -EPICK_AN_ERRNO, "Load Error"}, -+#endif -+ { EFI_INVALID_PARAMETER, -EINVAL, "Invalid Parameter"}, -+ { EFI_UNSUPPORTED, -ENOSYS, "Unsupported"}, -+ { EFI_BAD_BUFFER_SIZE, -ENOSPC, "Bad Buffer Size"}, -+ { EFI_BUFFER_TOO_SMALL, -ENOSPC, "Buffer Too Small"}, -+ { EFI_NOT_READY, -EAGAIN, "Not Ready"}, -+ { EFI_DEVICE_ERROR, -EIO, "Device Error"}, -+ { EFI_WRITE_PROTECTED, -EROFS, "Write Protected"}, -+ { EFI_OUT_OF_RESOURCES, -ENOMEM, "Out of Resources"}, -+#if 0 -+ { EFI_VOLUME_CORRUPTED, -EPICK_AN_ERRNO, "Volume Corrupt"}, -+ { EFI_VOLUME_FULL, -EPICK_AN_ERRNO, "Volume Full"}, -+ { EFI_NO_MEDIA, -EPICK_AN_ERRNO, "No Media"}, -+ { EFI_MEDIA_CHANGED, -EPICK_AN_ERRNO, "Media changed"}, -+#endif -+ { EFI_NOT_FOUND, -ENOENT, "Not Found"}, -+#if 0 -+ { EFI_ACCESS_DENIED, -EPICK_AN_ERRNO, "Access Denied"}, -+ { EFI_NO_RESPONSE, -EPICK_AN_ERRNO, "No Response"}, -+ { EFI_NO_MAPPING, -EPICK_AN_ERRNO, "No mapping"}, -+ { EFI_TIMEOUT, -EPICK_AN_ERRNO, "Time out"}, -+ { EFI_NOT_STARTED, -EPICK_AN_ERRNO, "Not started"}, -+ { EFI_ALREADY_STARTED, -EPICK_AN_ERRNO, "Already started"}, -+#endif -+ { EFI_ABORTED, -EINTR, "Aborted"}, -+#if 0 -+ { EFI_ICMP_ERROR, -EPICK_AN_ERRNO, "ICMP Error"}, -+ { EFI_TFTP_ERROR, -EPICK_AN_ERRNO, "TFTP Error"}, -+ { EFI_PROTOCOL_ERROR, -EPICK_AN_ERRNO, "Protocol Error"}, -+ { EFI_INCOMPATIBLE_VERSION, -EPICK_AN_ERRNO, "Incompatible Version"}, -+#endif -+ { EFI_SECURITY_VIOLATION, -EACCES, "Security Policy Violation"}, -+#if 0 -+ { EFI_CRC_ERROR, -EPICK_AN_ERRNO, "CRC Error"}, -+ { EFI_END_OF_MEDIA, -EPICK_AN_ERRNO, "End of Media"}, -+ { EFI_END_OF_FILE, -EPICK_AN_ERRNO, "End of File"}, -+ { EFI_INVALID_LANGUAGE, -EPICK_AN_ERRNO, "Invalid Languages"}, -+ { EFI_COMPROMISED_DATA, -EPICK_AN_ERRNO, "Compromised Data"}, -+ -+ // warnings -+ { EFI_WARN_UNKOWN_GLYPH, -EPICK_AN_ERRNO, "Warning Unknown Glyph"}, -+ { EFI_WARN_DELETE_FAILURE, -EPICK_AN_ERRNO, "Warning Delete Failure"}, -+ { EFI_WARN_WRITE_FAILURE, -EPICK_AN_ERRNO, "Warning Write Failure"}, -+ { EFI_WARN_BUFFER_TOO_SMALL, -EPICK_AN_ERRNO, "Warning Buffer Too Small"}, -+#endif -+}; -+ -+static int -+efi_status_cmp_bsearch(const void *key, const void *item) -+{ -+ u64 status = (u64)(uintptr_t)key; -+ struct efi_error_code *code = (struct efi_error_code *)item; -+ -+ if (status < code->status) -+ return -1; -+ if (status > code->status) -+ return 1; -+ return 0; -+} -+ - int efi_status_to_err(efi_status_t status) - { -- int err; -- -- switch (status) { -- case EFI_SUCCESS: -- err = 0; -- break; -- case EFI_INVALID_PARAMETER: -- err = -EINVAL; -- break; -- case EFI_OUT_OF_RESOURCES: -- err = -ENOSPC; -- break; -- case EFI_DEVICE_ERROR: -- err = -EIO; -- break; -- case EFI_WRITE_PROTECTED: -- err = -EROFS; -- break; -- case EFI_SECURITY_VIOLATION: -- err = -EACCES; -- break; -- case EFI_NOT_FOUND: -- err = -ENOENT; -- break; -- case EFI_ABORTED: -- err = -EINTR; -- break; -- default: -- err = -EINVAL; -- } -+ struct efi_error_code *found; -+ size_t num = sizeof(efi_error_codes) / sizeof(struct efi_error_code); - -- return err; -+ found = bsearch((void *)(uintptr_t)status, efi_error_codes, -+ sizeof(struct efi_error_code), num, -+ efi_status_cmp_bsearch); -+ if (!found) -+ return -EINVAL; -+ return found->errno; -+} -+ -+const char * -+efi_status_to_str(efi_status_t status) -+{ -+ struct efi_error_code *found; -+ size_t num = sizeof(efi_error_codes) / sizeof(struct efi_error_code); -+ -+ found = bsearch((void *)(uintptr_t)status, efi_error_codes, -+ sizeof(struct efi_error_code), num, -+ efi_status_cmp_bsearch); -+ if (!found) -+ return "Unknown error code"; -+ return found->description; - } - EXPORT_SYMBOL_GPL(efi_status_to_err); - -diff --git a/drivers/firmware/efi/secureboot.c b/drivers/firmware/efi/secureboot.c -new file mode 100644 -index 000000000000..de0a3714a5d4 ---- /dev/null -+++ b/drivers/firmware/efi/secureboot.c -@@ -0,0 +1,38 @@ -+/* Core kernel secure boot support. -+ * -+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. -+ * Written by David Howells (dhowells@redhat.com) -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public Licence -+ * as published by the Free Software Foundation; either version -+ * 2 of the Licence, or (at your option) any later version. -+ */ -+ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+ -+/* -+ * Decide what to do when UEFI secure boot mode is enabled. -+ */ -+void __init efi_set_secure_boot(enum efi_secureboot_mode mode) -+{ -+ if (efi_enabled(EFI_BOOT)) { -+ switch (mode) { -+ case efi_secureboot_mode_disabled: -+ pr_info("Secure boot disabled\n"); -+ break; -+ case efi_secureboot_mode_enabled: -+ set_bit(EFI_SECURE_BOOT, &efi.flags); -+ pr_info("Secure boot enabled\n"); -+ break; -+ default: -+ pr_warn("Secure boot could not be determined (mode %u)\n", -+ mode); -+ break; -+ } -+ } -+} -diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c -index 82fcfd29bc4d..17b7e096b682 100644 ---- a/drivers/firmware/sysfb.c -+++ b/drivers/firmware/sysfb.c -@@ -34,6 +34,22 @@ - #include - #include - -+static int skip_simpledrm; -+ -+static int __init simpledrm_disable(char *opt) -+{ -+ if (!opt) -+ return -EINVAL; -+ -+ get_option(&opt, &skip_simpledrm); -+ -+ if (skip_simpledrm) -+ pr_info("The simpledrm driver will not be probed\n"); -+ -+ return 0; -+} -+early_param("nvidia-drm.modeset", simpledrm_disable); -+ - static struct platform_device *pd; - static DEFINE_MUTEX(disable_lock); - static bool disabled; -@@ -85,7 +101,7 @@ static __init int sysfb_init(void) - - /* try to create a simple-framebuffer device */ - compatible = sysfb_parse_mode(si, &mode); -- if (compatible) { -+ if (compatible && !skip_simpledrm) { - pd = sysfb_create_simplefb(si, &mode); - if (!IS_ERR(pd)) - goto unlock_mutex; -diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c -index 84e7ba5314d3..efc96776f761 100644 ---- a/drivers/hid/hid-rmi.c -+++ b/drivers/hid/hid-rmi.c -@@ -321,21 +321,12 @@ static int rmi_input_event(struct hid_device *hdev, u8 *data, int size) - { - struct rmi_data *hdata = hid_get_drvdata(hdev); - struct rmi_device *rmi_dev = hdata->xport.rmi_dev; -- unsigned long flags; - - if (!(test_bit(RMI_STARTED, &hdata->flags))) - return 0; - -- pm_wakeup_event(hdev->dev.parent, 0); -- -- local_irq_save(flags); -- - rmi_set_attn_data(rmi_dev, data[1], &data[2], size - 2); - -- generic_handle_irq(hdata->rmi_irq); -- -- local_irq_restore(flags); -- - return 1; - } - -@@ -591,56 +582,6 @@ static const struct rmi_transport_ops hid_rmi_ops = { - .reset = rmi_hid_reset, - }; - --static void rmi_irq_teardown(void *data) --{ -- struct rmi_data *hdata = data; -- struct irq_domain *domain = hdata->domain; -- -- if (!domain) -- return; -- -- irq_dispose_mapping(irq_find_mapping(domain, 0)); -- -- irq_domain_remove(domain); -- hdata->domain = NULL; -- hdata->rmi_irq = 0; --} -- --static int rmi_irq_map(struct irq_domain *h, unsigned int virq, -- irq_hw_number_t hw_irq_num) --{ -- irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); -- -- return 0; --} -- --static const struct irq_domain_ops rmi_irq_ops = { -- .map = rmi_irq_map, --}; -- --static int rmi_setup_irq_domain(struct hid_device *hdev) --{ -- struct rmi_data *hdata = hid_get_drvdata(hdev); -- int ret; -- -- hdata->domain = irq_domain_create_linear(hdev->dev.fwnode, 1, -- &rmi_irq_ops, hdata); -- if (!hdata->domain) -- return -ENOMEM; -- -- ret = devm_add_action_or_reset(&hdev->dev, &rmi_irq_teardown, hdata); -- if (ret) -- return ret; -- -- hdata->rmi_irq = irq_create_mapping(hdata->domain, 0); -- if (hdata->rmi_irq <= 0) { -- hid_err(hdev, "Can't allocate an IRQ\n"); -- return hdata->rmi_irq < 0 ? hdata->rmi_irq : -ENXIO; -- } -- -- return 0; --} -- - static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id) - { - struct rmi_data *data = NULL; -@@ -713,18 +654,11 @@ static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id) - - mutex_init(&data->page_mutex); - -- ret = rmi_setup_irq_domain(hdev); -- if (ret) { -- hid_err(hdev, "failed to allocate IRQ domain\n"); -- return ret; -- } -- - if (data->device_flags & RMI_DEVICE_HAS_PHYS_BUTTONS) - rmi_hid_pdata.gpio_data.disable = true; - - data->xport.dev = hdev->dev.parent; - data->xport.pdata = rmi_hid_pdata; -- data->xport.pdata.irq = data->rmi_irq; - data->xport.proto_name = "hid"; - data->xport.ops = &hid_rmi_ops; - -diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c -index 34aee59dd147..7c5a7f7c11bd 100644 ---- a/drivers/hwtracing/coresight/coresight-etm4x-core.c -+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c -@@ -10,6 +10,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -2303,6 +2304,16 @@ static const struct amba_id etm4_ids[] = { - {}, - }; - -+static const struct dmi_system_id broken_coresight[] = { -+ { -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "HPE"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "Apollo 70"), -+ }, -+ }, -+ { } /* terminating entry */ -+}; -+ - MODULE_DEVICE_TABLE(amba, etm4_ids); - - static struct amba_driver etm4x_amba_driver = { -@@ -2372,6 +2383,11 @@ static int __init etm4x_init(void) - { - int ret; - -+ if (dmi_check_system(broken_coresight)) { -+ pr_info("ETM4 disabled due to firmware bug\n"); -+ return 0; -+ } -+ - ret = etm4_pm_setup(); - - /* etm4_pm_setup() does its own cleanup - exit on error */ -@@ -2398,6 +2414,9 @@ static int __init etm4x_init(void) - - static void __exit etm4x_exit(void) - { -+ if (dmi_check_system(broken_coresight)) -+ return; -+ - amba_driver_unregister(&etm4x_amba_driver); - platform_driver_unregister(&etm4_platform_driver); - etm4_pm_clear(); -diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c -index 258d5fe3d395..f7298e3dc8f3 100644 ---- a/drivers/input/rmi4/rmi_driver.c -+++ b/drivers/input/rmi4/rmi_driver.c -@@ -182,34 +182,47 @@ void rmi_set_attn_data(struct rmi_device *rmi_dev, unsigned long irq_status, - attn_data.data = fifo_data; - - kfifo_put(&drvdata->attn_fifo, attn_data); -+ -+ schedule_work(&drvdata->attn_work); - } - EXPORT_SYMBOL_GPL(rmi_set_attn_data); - --static irqreturn_t rmi_irq_fn(int irq, void *dev_id) -+static void attn_callback(struct work_struct *work) - { -- struct rmi_device *rmi_dev = dev_id; -- struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev); -+ struct rmi_driver_data *drvdata = container_of(work, -+ struct rmi_driver_data, -+ attn_work); - struct rmi4_attn_data attn_data = {0}; - int ret, count; - - count = kfifo_get(&drvdata->attn_fifo, &attn_data); -- if (count) { -- *(drvdata->irq_status) = attn_data.irq_status; -- drvdata->attn_data = attn_data; -- } -+ if (!count) -+ return; - -- ret = rmi_process_interrupt_requests(rmi_dev); -+ *(drvdata->irq_status) = attn_data.irq_status; -+ drvdata->attn_data = attn_data; -+ -+ ret = rmi_process_interrupt_requests(drvdata->rmi_dev); - if (ret) -- rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev, -+ rmi_dbg(RMI_DEBUG_CORE, &drvdata->rmi_dev->dev, - "Failed to process interrupt request: %d\n", ret); - -- if (count) { -- kfree(attn_data.data); -- drvdata->attn_data.data = NULL; -- } -+ kfree(attn_data.data); -+ drvdata->attn_data.data = NULL; - - if (!kfifo_is_empty(&drvdata->attn_fifo)) -- return rmi_irq_fn(irq, dev_id); -+ schedule_work(&drvdata->attn_work); -+} -+ -+static irqreturn_t rmi_irq_fn(int irq, void *dev_id) -+{ -+ struct rmi_device *rmi_dev = dev_id; -+ int ret; -+ -+ ret = rmi_process_interrupt_requests(rmi_dev); -+ if (ret) -+ rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev, -+ "Failed to process interrupt request: %d\n", ret); - - return IRQ_HANDLED; - } -@@ -217,7 +230,6 @@ static irqreturn_t rmi_irq_fn(int irq, void *dev_id) - static int rmi_irq_init(struct rmi_device *rmi_dev) - { - struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev); -- struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev); - int irq_flags = irq_get_trigger_type(pdata->irq); - int ret; - -@@ -235,8 +247,6 @@ static int rmi_irq_init(struct rmi_device *rmi_dev) - return ret; - } - -- data->enabled = true; -- - return 0; - } - -@@ -886,23 +896,27 @@ void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake) - if (data->enabled) - goto out; - -- enable_irq(irq); -- data->enabled = true; -- if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) { -- retval = disable_irq_wake(irq); -- if (retval) -- dev_warn(&rmi_dev->dev, -- "Failed to disable irq for wake: %d\n", -- retval); -- } -+ if (irq) { -+ enable_irq(irq); -+ data->enabled = true; -+ if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) { -+ retval = disable_irq_wake(irq); -+ if (retval) -+ dev_warn(&rmi_dev->dev, -+ "Failed to disable irq for wake: %d\n", -+ retval); -+ } - -- /* -- * Call rmi_process_interrupt_requests() after enabling irq, -- * otherwise we may lose interrupt on edge-triggered systems. -- */ -- irq_flags = irq_get_trigger_type(pdata->irq); -- if (irq_flags & IRQ_TYPE_EDGE_BOTH) -- rmi_process_interrupt_requests(rmi_dev); -+ /* -+ * Call rmi_process_interrupt_requests() after enabling irq, -+ * otherwise we may lose interrupt on edge-triggered systems. -+ */ -+ irq_flags = irq_get_trigger_type(pdata->irq); -+ if (irq_flags & IRQ_TYPE_EDGE_BOTH) -+ rmi_process_interrupt_requests(rmi_dev); -+ } else { -+ data->enabled = true; -+ } - - out: - mutex_unlock(&data->enabled_mutex); -@@ -922,20 +936,22 @@ void rmi_disable_irq(struct rmi_device *rmi_dev, bool enable_wake) - goto out; - - data->enabled = false; -- disable_irq(irq); -- if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) { -- retval = enable_irq_wake(irq); -- if (retval) -- dev_warn(&rmi_dev->dev, -- "Failed to enable irq for wake: %d\n", -- retval); -- } -- -- /* make sure the fifo is clean */ -- while (!kfifo_is_empty(&data->attn_fifo)) { -- count = kfifo_get(&data->attn_fifo, &attn_data); -- if (count) -- kfree(attn_data.data); -+ if (irq) { -+ disable_irq(irq); -+ if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) { -+ retval = enable_irq_wake(irq); -+ if (retval) -+ dev_warn(&rmi_dev->dev, -+ "Failed to enable irq for wake: %d\n", -+ retval); -+ } -+ } else { -+ /* make sure the fifo is clean */ -+ while (!kfifo_is_empty(&data->attn_fifo)) { -+ count = kfifo_get(&data->attn_fifo, &attn_data); -+ if (count) -+ kfree(attn_data.data); -+ } - } - - out: -@@ -981,6 +997,8 @@ static int rmi_driver_remove(struct device *dev) - irq_domain_remove(data->irqdomain); - data->irqdomain = NULL; - -+ cancel_work_sync(&data->attn_work); -+ - rmi_f34_remove_sysfs(rmi_dev); - rmi_free_function_list(rmi_dev); - -@@ -1219,9 +1237,15 @@ static int rmi_driver_probe(struct device *dev) - } - } - -- retval = rmi_irq_init(rmi_dev); -- if (retval < 0) -- goto err_destroy_functions; -+ if (pdata->irq) { -+ retval = rmi_irq_init(rmi_dev); -+ if (retval < 0) -+ goto err_destroy_functions; -+ } -+ -+ data->enabled = true; -+ -+ INIT_WORK(&data->attn_work, attn_callback); - - if (data->f01_container->dev.driver) { - /* Driver already bound, so enable ATTN now. */ -diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c -index 3a67e636287a..eb5e796277d6 100644 ---- a/drivers/iommu/iommu.c -+++ b/drivers/iommu/iommu.c -@@ -8,6 +8,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -2931,6 +2932,27 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) - } - EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); - -+#ifdef CONFIG_ARM64 -+static int __init iommu_quirks(void) -+{ -+ const char *vendor, *name; -+ -+ vendor = dmi_get_system_info(DMI_SYS_VENDOR); -+ name = dmi_get_system_info(DMI_PRODUCT_NAME); -+ -+ if (vendor && -+ (strncmp(vendor, "GIGABYTE", 8) == 0 && name && -+ (strncmp(name, "R120", 4) == 0 || -+ strncmp(name, "R270", 4) == 0))) { -+ pr_warn("Gigabyte %s detected, force iommu passthrough mode", name); -+ iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; -+ } -+ -+ return 0; -+} -+arch_initcall(iommu_quirks); -+#endif -+ - /** - * iommu_setup_default_domain - Set the default_domain for the group - * @group: Group to change -diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c -index 6b6aa3c36744..0ce08e9a0a3d 100644 ---- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c -+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c -@@ -851,6 +851,10 @@ ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev(struct ath10k *ar, struct sk_buff *skb, - } - - ev = tb[WMI_TLV_TAG_STRUCT_MGMT_TX_COMPL_EVENT]; -+ if (!ev) { -+ kfree(tb); -+ return -EPROTO; -+ } - - arg->desc_id = ev->desc_id; - arg->status = ev->status; -diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c -index e0081914052f..ae2d04c2f2b3 100644 ---- a/drivers/pci/quirks.c -+++ b/drivers/pci/quirks.c -@@ -4410,6 +4410,30 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9000, - DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9084, - quirk_bridge_cavm_thrx2_pcie_root); - -+/* -+ * PCI BAR 5 is not setup correctly for the on-board AHCI controller -+ * on Broadcom's Vulcan processor. Added a quirk to fix BAR 5 by -+ * using BAR 4's resources which are populated correctly and NOT -+ * actually used by the AHCI controller. -+ */ -+static void quirk_fix_vulcan_ahci_bars(struct pci_dev *dev) -+{ -+ struct resource *r = &dev->resource[4]; -+ -+ if (!(r->flags & IORESOURCE_MEM) || (r->start == 0)) -+ return; -+ -+ /* Set BAR5 resource to BAR4 */ -+ dev->resource[5] = *r; -+ -+ /* Update BAR5 in pci config space */ -+ pci_write_config_dword(dev, PCI_BASE_ADDRESS_5, r->start); -+ -+ /* Clear BAR4's resource */ -+ memset(r, 0, sizeof(*r)); -+} -+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9027, quirk_fix_vulcan_ahci_bars); -+ - /* - * Intersil/Techwell TW686[4589]-based video capture cards have an empty (zero) - * class code. Fix it. -diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c -index 228fb2d11c70..696cfa7025de 100644 ---- a/drivers/rtc/rtc-cmos.c -+++ b/drivers/rtc/rtc-cmos.c -@@ -818,18 +818,24 @@ static void rtc_wake_off(struct device *dev) - } - - #ifdef CONFIG_X86 --/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */ - static void use_acpi_alarm_quirks(void) - { -- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) -+ switch (boot_cpu_data.x86_vendor) { -+ case X86_VENDOR_INTEL: -+ if (dmi_get_bios_year() < 2015) -+ return; -+ break; -+ case X86_VENDOR_AMD: -+ case X86_VENDOR_HYGON: -+ if (dmi_get_bios_year() < 2021) -+ return; -+ break; -+ default: - return; -- -+ } - if (!is_hpet_enabled()) - return; - -- if (dmi_get_bios_year() < 2015) -- return; -- - use_acpi_alarm = true; - } - #else -diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c -index c2e8d9e27749..c24dbb681664 100644 ---- a/drivers/scsi/sd.c -+++ b/drivers/scsi/sd.c -@@ -118,6 +118,14 @@ static const char *sd_cache_types[] = { - "write back, no read (daft)" - }; - -+static const char *sd_probe_types[] = { "async", "sync" }; -+ -+static char sd_probe_type[6] = "async"; -+module_param_string(probe, sd_probe_type, sizeof(sd_probe_type), -+ S_IRUGO|S_IWUSR); -+MODULE_PARM_DESC(probe, "async or sync. Setting to 'sync' disables asynchronous " -+ "device number assignments (sda, sdb, ...)."); -+ - static void sd_set_flush_flag(struct scsi_disk *sdkp) - { - bool wc = false, fua = false; -@@ -4045,6 +4053,8 @@ static int __init init_sd(void) - goto err_out_class; - } - -+ if (!strcmp(sd_probe_type, "sync")) -+ sd_template.gendrv.probe_type = PROBE_FORCE_SYNCHRONOUS; - err = scsi_register_driver(&sd_template.gendrv); - if (err) - goto err_out_driver; -diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c -index dfc30cebd4c4..ce1e2cf26478 100644 ---- a/drivers/usb/core/hub.c -+++ b/drivers/usb/core/hub.c -@@ -5759,6 +5759,13 @@ static void hub_event(struct work_struct *work) - (u16) hub->change_bits[0], - (u16) hub->event_bits[0]); - -+ /* Don't disconnect USB-SATA on TrimSlice */ -+ if (strcmp(dev_name(hdev->bus->controller), "tegra-ehci.0") == 0) { -+ if ((hdev->state == 7) && (hub->change_bits[0] == 0) && -+ (hub->event_bits[0] == 0x2)) -+ hub->event_bits[0] = 0; -+ } -+ - /* Lock the device, then check to see if we were - * disconnected while waiting for the lock to succeed. */ - usb_lock_device(hdev); -diff --git a/include/linux/efi.h b/include/linux/efi.h -index 80b21d1c6eaf..b66c0683f2fc 100644 ---- a/include/linux/efi.h -+++ b/include/linux/efi.h -@@ -44,6 +44,8 @@ struct screen_info; - #define EFI_ABORTED (21 | (1UL << (BITS_PER_LONG-1))) - #define EFI_SECURITY_VIOLATION (26 | (1UL << (BITS_PER_LONG-1))) - -+#define EFI_IS_ERROR(x) ((x) & (1UL << (BITS_PER_LONG-1))) -+ - typedef unsigned long efi_status_t; - typedef u8 efi_bool_t; - typedef u16 efi_char16_t; /* UNICODE character */ -@@ -871,6 +873,14 @@ extern int __init efi_setup_pcdp_console(char *); - #define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ - #define EFI_MEM_NO_SOFT_RESERVE 11 /* Is the kernel configured to ignore soft reservations? */ - #define EFI_PRESERVE_BS_REGIONS 12 /* Are EFI boot-services memory segments available? */ -+#define EFI_SECURE_BOOT 13 /* Are we in Secure Boot mode? */ -+ -+enum efi_secureboot_mode { -+ efi_secureboot_mode_unset, -+ efi_secureboot_mode_unknown, -+ efi_secureboot_mode_disabled, -+ efi_secureboot_mode_enabled, -+}; - - #ifdef CONFIG_EFI - /* -@@ -882,6 +892,8 @@ static inline bool efi_enabled(int feature) - } - extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); - -+extern void __init efi_set_secure_boot(enum efi_secureboot_mode mode); -+ - bool __pure __efi_soft_reserve_enabled(void); - - static inline bool __pure efi_soft_reserve_enabled(void) -@@ -903,6 +915,8 @@ static inline bool efi_enabled(int feature) - static inline void - efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {} - -+static inline void efi_set_secure_boot(enum efi_secureboot_mode mode) {} -+ - static inline bool efi_soft_reserve_enabled(void) - { - return false; -@@ -917,6 +931,7 @@ static inline void efi_find_mirror(void) {} - #endif - - extern int efi_status_to_err(efi_status_t status); -+extern const char *efi_status_to_str(efi_status_t status); - - /* - * Variable Attributes -@@ -1133,13 +1148,6 @@ static inline bool efi_runtime_disabled(void) { return true; } - extern void efi_call_virt_check_flags(unsigned long flags, const void *caller); - extern unsigned long efi_call_virt_save_flags(void); - --enum efi_secureboot_mode { -- efi_secureboot_mode_unset, -- efi_secureboot_mode_unknown, -- efi_secureboot_mode_disabled, -- efi_secureboot_mode_enabled, --}; -- - static inline - enum efi_secureboot_mode efi_get_secureboot_mode(efi_get_variable_t *get_var) - { -diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h -index 2b8d85aae083..6a560e1abb59 100644 ---- a/include/linux/lsm_hook_defs.h -+++ b/include/linux/lsm_hook_defs.h -@@ -405,6 +405,8 @@ LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux) - #endif /* CONFIG_BPF_SYSCALL */ - - LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) -+LSM_HOOK(int, 0, lock_kernel_down, const char *where, enum lockdown_reason level) -+ - - #ifdef CONFIG_PERF_EVENTS - LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type) -diff --git a/include/linux/module.h b/include/linux/module.h -index a98e188cf37b..2eef4246c2c9 100644 ---- a/include/linux/module.h -+++ b/include/linux/module.h -@@ -418,6 +418,7 @@ struct module { - struct module_attribute *modinfo_attrs; - const char *version; - const char *srcversion; -+ const char *rhelversion; - struct kobject *holders_dir; - - /* Exported symbols */ -diff --git a/include/linux/rh_kabi.h b/include/linux/rh_kabi.h -new file mode 100644 -index 000000000000..e0d3353802bb ---- /dev/null -+++ b/include/linux/rh_kabi.h -@@ -0,0 +1,172 @@ -+/* -+ * rh_kabi.h - Red Hat kABI abstraction header -+ * -+ * Copyright (c) 2014 Don Zickus -+ * Copyright (c) 2015-2017 Jiri Benc -+ * Copyright (c) 2015 Sabrina Dubroca, Hannes Frederic Sowa -+ * Copyright (c) 2016-2018 Prarit Bhargava -+ * Copyright (c) 2017 Paolo Abeni, Larry Woodman -+ * -+ * This file is released under the GPLv2. -+ * See the file COPYING for more details. -+ * -+ * These kabi macros hide the changes from the kabi checker and from the -+ * process that computes the exported symbols' checksums. -+ * They have 2 variants: one (defined under __GENKSYMS__) used when -+ * generating the checksums, and the other used when building the kernel's -+ * binaries. -+ * -+ * The use of these macros does not guarantee that the usage and modification -+ * of code is correct. As with all Red Hat only changes, an engineer must -+ * explain why the use of the macro is valid in the patch containing the -+ * changes. -+ * -+ */ -+ -+#ifndef _LINUX_RH_KABI_H -+#define _LINUX_RH_KABI_H -+ -+#include -+#include -+ -+/* -+ * RH_KABI_CONST -+ * Adds a new const modifier to a function parameter preserving the old -+ * checksum. -+ * -+ * RH_KABI_DEPRECATE -+ * Mark the element as deprecated and make it unusable by modules while -+ * preserving kABI checksums. -+ * -+ * RH_KABI_DEPRECATE_FN -+ * Mark the function pointer as deprecated and make it unusable by modules -+ * while preserving kABI checksums. -+ * -+ * RH_KABI_EXTEND -+ * Simple macro for adding a new element to a struct. -+ * -+ * Warning: only use if a hole exists for _all_ arches. Use pahole to verify. -+ * -+ * RH_KABI_FILL_HOLE -+ * Simple macro for filling a hole in a struct. -+ * -+ * RH_KABI_RENAME -+ * Simple macro for renaming an element without changing its type. This -+ * macro can be used in bitfields, for example. -+ * -+ * NOTE: does not include the final ';' -+ * -+ * RH_KABI_REPLACE -+ * Simple replacement of _orig with a union of _orig and _new. -+ * -+ * The RH_KABI_REPLACE* macros attempt to add the ability to use the '_new' -+ * element while preserving size alignment with the '_orig' element. -+ * -+ * The #ifdef __GENKSYMS__ preserves the kABI agreement, while the anonymous -+ * union structure preserves the size alignment (assuming the '_new' element -+ * is not bigger than the '_orig' element). -+ * -+ * RH_KABI_REPLACE_UNSAFE -+ * Unsafe version of RH_KABI_REPLACE. Only use for typedefs. -+ * -+ * RH_KABI_FORCE_CHANGE -+ * Force change of the symbol checksum. The argument of the macro is a -+ * version for cases we need to do this more than once. -+ * -+ * This macro does the opposite: it changes the symbol checksum without -+ * actually changing anything about the exported symbol. It is useful for -+ * symbols that are not whitelisted, we're changing them in an -+ * incompatible way and want to prevent 3rd party modules to silently -+ * corrupt memory. Instead, by changing the symbol checksum, such modules -+ * won't be loaded by the kernel. This macro should only be used as a -+ * last resort when all other KABI workarounds have failed. -+ * -+ * NOTE -+ * Don't use ';' after these macros as it messes up the kABI checker by -+ * changing what the resulting token string looks like. Instead let this -+ * macro add the ';' so it can be properly hidden from the kABI checker -+ * (mainly for RH_KABI_EXTEND, but applied to all macros for uniformity). -+ * -+ */ -+#ifdef __GENKSYMS__ -+ -+# define RH_KABI_CONST -+# define RH_KABI_EXTEND(_new) -+# define RH_KABI_FILL_HOLE(_new) -+# define RH_KABI_FORCE_CHANGE(ver) __attribute__((rh_kabi_change ## ver)) -+# define RH_KABI_RENAME(_orig, _new) _orig -+ -+# define _RH_KABI_DEPRECATE(_type, _orig) _type _orig -+# define _RH_KABI_DEPRECATE_FN(_type, _orig, _args...) _type (*_orig)(_args) -+# define _RH_KABI_REPLACE(_orig, _new) _orig -+# define _RH_KABI_REPLACE_UNSAFE(_orig, _new) _orig -+ -+#else -+ -+# define RH_KABI_ALIGN_WARNING ". Disable CONFIG_RH_KABI_SIZE_ALIGN_CHECKS if debugging." -+ -+# define RH_KABI_CONST const -+# define RH_KABI_EXTEND(_new) _new; -+# define RH_KABI_FILL_HOLE(_new) _new; -+# define RH_KABI_FORCE_CHANGE(ver) -+# define RH_KABI_RENAME(_orig, _new) _new -+ -+ -+#if IS_BUILTIN(CONFIG_RH_KABI_SIZE_ALIGN_CHECKS) -+# define __RH_KABI_CHECK_SIZE_ALIGN(_orig, _new) \ -+ union { \ -+ _Static_assert(sizeof(struct{_new;}) <= sizeof(struct{_orig;}), \ -+ __FILE__ ":" __stringify(__LINE__) ": " __stringify(_new) " is larger than " __stringify(_orig) RH_KABI_ALIGN_WARNING); \ -+ _Static_assert(__alignof__(struct{_new;}) <= __alignof__(struct{_orig;}), \ -+ __FILE__ ":" __stringify(__LINE__) ": " __stringify(_orig) " is not aligned the same as " __stringify(_new) RH_KABI_ALIGN_WARNING); \ -+ } -+#else -+# define __RH_KABI_CHECK_SIZE_ALIGN(_orig, _new) -+#endif -+ -+# define _RH_KABI_DEPRECATE(_type, _orig) _type rh_reserved_##_orig -+# define _RH_KABI_DEPRECATE_FN(_type, _orig, _args...) \ -+ _type (* rh_reserved_##_orig)(_args) -+# define _RH_KABI_REPLACE(_orig, _new) \ -+ union { \ -+ _new; \ -+ struct { \ -+ _orig; \ -+ } __UNIQUE_ID(rh_kabi_hide); \ -+ __RH_KABI_CHECK_SIZE_ALIGN(_orig, _new); \ -+ } -+# define _RH_KABI_REPLACE_UNSAFE(_orig, _new) _new -+ -+#endif /* __GENKSYMS__ */ -+ -+/* semicolon added wrappers for the RH_KABI_REPLACE macros */ -+# define RH_KABI_DEPRECATE(_type, _orig) _RH_KABI_DEPRECATE(_type, _orig); -+# define RH_KABI_DEPRECATE_FN(_type, _orig, _args...) \ -+ _RH_KABI_DEPRECATE_FN(_type, _orig, _args); -+# define RH_KABI_REPLACE(_orig, _new) _RH_KABI_REPLACE(_orig, _new); -+# define RH_KABI_REPLACE_UNSAFE(_orig, _new) _RH_KABI_REPLACE_UNSAFE(_orig, _new); -+/* -+ * Macro for breaking up a random element into two smaller chunks using an -+ * anonymous struct inside an anonymous union. -+ */ -+# define RH_KABI_REPLACE2(orig, _new1, _new2) RH_KABI_REPLACE(orig, struct{ _new1; _new2;}) -+ -+# define RH_KABI_RESERVE(n) _RH_KABI_RESERVE(n); -+/* -+ * Simple wrappers to replace standard Red Hat reserved elements. -+ */ -+# define RH_KABI_USE(n, _new) RH_KABI_REPLACE(_RH_KABI_RESERVE(n), _new) -+/* -+ * Macros for breaking up a reserved element into two smaller chunks using -+ * an anonymous struct inside an anonymous union. -+ */ -+# define RH_KABI_USE2(n, _new1, _new2) RH_KABI_REPLACE(_RH_KABI_RESERVE(n), struct{ _new1; _new2; }) -+ -+/* -+ * We tried to standardize on Red Hat reserved names. These wrappers -+ * leverage those common names making it easier to read and find in the -+ * code. -+ */ -+# define _RH_KABI_RESERVE(n) unsigned long rh_reserved##n -+ -+#endif /* _LINUX_RH_KABI_H */ -diff --git a/include/linux/rmi.h b/include/linux/rmi.h -index ab7eea01ab42..fff7c5f737fc 100644 ---- a/include/linux/rmi.h -+++ b/include/linux/rmi.h -@@ -364,6 +364,7 @@ struct rmi_driver_data { - - struct rmi4_attn_data attn_data; - DECLARE_KFIFO(attn_fifo, struct rmi4_attn_data, 16); -+ struct work_struct attn_work; - }; - - int rmi_register_transport_device(struct rmi_transport_dev *xport); -diff --git a/include/linux/security.h b/include/linux/security.h -index 5f16eecde00b..974be25cfa70 100644 ---- a/include/linux/security.h -+++ b/include/linux/security.h -@@ -484,6 +484,7 @@ int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); - int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); - int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); - int security_locked_down(enum lockdown_reason what); -+int security_lock_kernel_down(const char *where, enum lockdown_reason level); - #else /* CONFIG_SECURITY */ - - static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) -@@ -1395,6 +1396,10 @@ static inline int security_locked_down(enum lockdown_reason what) - { - return 0; - } -+static inline int security_lock_kernel_down(const char *where, enum lockdown_reason level) -+{ -+ return 0; -+} - #endif /* CONFIG_SECURITY */ - - #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) -diff --git a/kernel/module/main.c b/kernel/module/main.c -index 98fedfdb8db5..a21921f880e1 100644 ---- a/kernel/module/main.c -+++ b/kernel/module/main.c -@@ -528,6 +528,7 @@ static struct module_attribute modinfo_##field = { \ - - MODINFO_ATTR(version); - MODINFO_ATTR(srcversion); -+MODINFO_ATTR(rhelversion); - - static struct { - char name[MODULE_NAME_LEN + 1]; -@@ -980,6 +981,7 @@ struct module_attribute *modinfo_attrs[] = { - &module_uevent, - &modinfo_version, - &modinfo_srcversion, -+ &modinfo_rhelversion, - &modinfo_initstate, - &modinfo_coresize, - #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC -diff --git a/kernel/module/signing.c b/kernel/module/signing.c -index a2ff4242e623..f0d2be1ee4f1 100644 ---- a/kernel/module/signing.c -+++ b/kernel/module/signing.c -@@ -61,10 +61,17 @@ int mod_verify_sig(const void *mod, struct load_info *info) - modlen -= sig_len + sizeof(ms); - info->len = modlen; - -- return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, -+ ret = verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, - VERIFY_USE_SECONDARY_KEYRING, - VERIFYING_MODULE_SIGNATURE, - NULL, NULL); -+ if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) { -+ ret = verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, -+ VERIFY_USE_PLATFORM_KEYRING, -+ VERIFYING_MODULE_SIGNATURE, -+ NULL, NULL); -+ } -+ return ret; - } - - int module_sig_check(struct load_info *info, int flags) -diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c -index ac4ef3e206bb..80ede130812c 100644 ---- a/scripts/mod/modpost.c -+++ b/scripts/mod/modpost.c -@@ -23,6 +23,7 @@ - #include "modpost.h" - #include "../../include/linux/license.h" - #include "../../include/linux/module_symbol.h" -+#include "../../include/generated/uapi/linux/version.h" - - static bool module_enabled; - /* Are we using CONFIG_MODVERSIONS? */ -@@ -2090,6 +2091,12 @@ static void write_buf(struct buffer *b, const char *fname) - } - } - -+static void add_rhelversion(struct buffer *b, struct module *mod) -+{ -+ buf_printf(b, "MODULE_INFO(rhelversion, \"%d.%d\");\n", RHEL_MAJOR, -+ RHEL_MINOR); -+} -+ - static void write_if_changed(struct buffer *b, const char *fname) - { - char *tmp; -@@ -2150,6 +2157,7 @@ static void write_mod_c_file(struct module *mod) - add_depends(&buf, mod); - add_moddevtable(&buf, mod); - add_srcversion(&buf, mod); -+ add_rhelversion(&buf, mod); - - ret = snprintf(fname, sizeof(fname), "%s.mod.c", mod->name); - if (ret >= sizeof(fname)) { -diff --git a/scripts/tags.sh b/scripts/tags.sh -index a70d43723146..56d06b04f752 100755 ---- a/scripts/tags.sh -+++ b/scripts/tags.sh -@@ -16,6 +16,8 @@ fi - ignore="$(echo "$RCS_FIND_IGNORE" | sed 's|\\||g' )" - # tags and cscope files should also ignore MODVERSION *.mod.c files - ignore="$ignore ( -name *.mod.c ) -prune -o" -+# RHEL tags and cscope should also ignore redhat/rpm -+ignore="$ignore ( -path redhat/rpm ) -prune -o" - - # ignore arbitrary directories - if [ -n "${IGNORE_DIRS}" ]; then -diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c -index d1fdd113450a..182e8090cfe8 100644 ---- a/security/integrity/platform_certs/load_uefi.c -+++ b/security/integrity/platform_certs/load_uefi.c -@@ -74,7 +74,8 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, - return NULL; - - if (*status != EFI_BUFFER_TOO_SMALL) { -- pr_err("Couldn't get size: 0x%lx\n", *status); -+ pr_err("Couldn't get size: %s (0x%lx)\n", -+ efi_status_to_str(*status), *status); - return NULL; - } - -@@ -85,7 +86,8 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, - *status = efi.get_variable(name, guid, NULL, &lsize, db); - if (*status != EFI_SUCCESS) { - kfree(db); -- pr_err("Error reading db var: 0x%lx\n", *status); -+ pr_err("Error reading db var: %s (0x%lx)\n", -+ efi_status_to_str(*status), *status); - return NULL; - } - -diff --git a/security/lockdown/Kconfig b/security/lockdown/Kconfig -index e84ddf484010..d0501353a4b9 100644 ---- a/security/lockdown/Kconfig -+++ b/security/lockdown/Kconfig -@@ -16,6 +16,19 @@ config SECURITY_LOCKDOWN_LSM_EARLY - subsystem is fully initialised. If enabled, lockdown will - unconditionally be called before any other LSMs. - -+config LOCK_DOWN_IN_EFI_SECURE_BOOT -+ bool "Lock down the kernel in EFI Secure Boot mode" -+ default n -+ depends on EFI && SECURITY_LOCKDOWN_LSM_EARLY -+ help -+ UEFI Secure Boot provides a mechanism for ensuring that the firmware -+ will only load signed bootloaders and kernels. Secure boot mode may -+ be determined from EFI variables provided by the system firmware if -+ not indicated by the boot parameters. -+ -+ Enabling this option results in kernel lockdown being triggered if -+ EFI Secure Boot is set. -+ - choice - prompt "Kernel default lockdown mode" - default LOCK_DOWN_KERNEL_FORCE_NONE -diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c -index 68d19632aeb7..ef348935b6ff 100644 ---- a/security/lockdown/lockdown.c -+++ b/security/lockdown/lockdown.c -@@ -73,6 +73,7 @@ static int lockdown_is_locked_down(enum lockdown_reason what) - - static struct security_hook_list lockdown_hooks[] __ro_after_init = { - LSM_HOOK_INIT(locked_down, lockdown_is_locked_down), -+ LSM_HOOK_INIT(lock_kernel_down, lock_kernel_down), - }; - - static int __init lockdown_lsm_init(void) -diff --git a/security/security.c b/security/security.c -index 23b129d482a7..55d0fe0d121b 100644 ---- a/security/security.c -+++ b/security/security.c -@@ -5230,6 +5230,18 @@ int security_locked_down(enum lockdown_reason what) - } - EXPORT_SYMBOL(security_locked_down); - -+/** -+ * security_lock_kernel_down() - Put the kernel into lock-down mode. -+ * -+ * @where: Where the lock-down is originating from (e.g. command line option) -+ * @level: The lock-down level (can only increase) -+ */ -+int security_lock_kernel_down(const char *where, enum lockdown_reason level) -+{ -+ return call_int_hook(lock_kernel_down, 0, where, level); -+} -+EXPORT_SYMBOL(security_lock_kernel_down); -+ - #ifdef CONFIG_PERF_EVENTS - /** - * security_perf_event_open() - Check if a perf event open is allowed -diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c -index 3c157b006a5a..257f71e9ec07 100644 ---- a/sound/pci/hda/cs35l41_hda.c -+++ b/sound/pci/hda/cs35l41_hda.c -@@ -33,6 +33,9 @@ - #define CAL_AMBIENT_DSP_CTL_NAME "CAL_AMBIENT" - #define CAL_DSP_CTL_TYPE 5 - #define CAL_DSP_CTL_ALG 205 -+#define CS35L41_UUID "50d90cdc-3de4-4f18-b528-c7fe3b71f40d" -+#define CS35L41_DSM_GET_MUTE 5 -+#define CS35L41_NOTIFY_EVENT 0x91 - - static bool firmware_autostart = 1; - module_param(firmware_autostart, bool, 0444); -@@ -563,6 +566,31 @@ static void cs35l41_hda_play_start(struct device *dev) - - } - -+static void cs35l41_mute(struct device *dev, bool mute) -+{ -+ struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev); -+ struct regmap *reg = cs35l41->regmap; -+ -+ dev_dbg(dev, "Mute(%d:%d) Playback Started: %d\n", mute, cs35l41->mute_override, -+ cs35l41->playback_started); -+ -+ if (cs35l41->playback_started) { -+ if (mute || cs35l41->mute_override) { -+ dev_dbg(dev, "Muting\n"); -+ regmap_multi_reg_write(reg, cs35l41_hda_mute, ARRAY_SIZE(cs35l41_hda_mute)); -+ } else { -+ dev_dbg(dev, "Unmuting\n"); -+ if (cs35l41->firmware_running) { -+ regmap_multi_reg_write(reg, cs35l41_hda_unmute_dsp, -+ ARRAY_SIZE(cs35l41_hda_unmute_dsp)); -+ } else { -+ regmap_multi_reg_write(reg, cs35l41_hda_unmute, -+ ARRAY_SIZE(cs35l41_hda_unmute)); -+ } -+ } -+ } -+} -+ - static void cs35l41_hda_play_done(struct device *dev) - { - struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev); -@@ -572,13 +600,7 @@ static void cs35l41_hda_play_done(struct device *dev) - - cs35l41_global_enable(dev, reg, cs35l41->hw_cfg.bst_type, 1, - cs35l41->firmware_running); -- if (cs35l41->firmware_running) { -- regmap_multi_reg_write(reg, cs35l41_hda_unmute_dsp, -- ARRAY_SIZE(cs35l41_hda_unmute_dsp)); -- } else { -- regmap_multi_reg_write(reg, cs35l41_hda_unmute, -- ARRAY_SIZE(cs35l41_hda_unmute)); -- } -+ cs35l41_mute(dev, false); - } - - static void cs35l41_hda_pause_start(struct device *dev) -@@ -588,7 +610,7 @@ static void cs35l41_hda_pause_start(struct device *dev) - - dev_dbg(dev, "Pause (Start)\n"); - -- regmap_multi_reg_write(reg, cs35l41_hda_mute, ARRAY_SIZE(cs35l41_hda_mute)); -+ cs35l41_mute(dev, true); - cs35l41_global_enable(dev, reg, cs35l41->hw_cfg.bst_type, 0, - cs35l41->firmware_running); - } -@@ -1116,6 +1138,53 @@ static int cs35l41_create_controls(struct cs35l41_hda *cs35l41) - return 0; - } - -+static bool cs35l41_dsm_supported(acpi_handle handle, unsigned int commands) -+{ -+ guid_t guid; -+ -+ guid_parse(CS35L41_UUID, &guid); -+ -+ return acpi_check_dsm(handle, &guid, 0, BIT(commands)); -+} -+ -+static int cs35l41_get_acpi_mute_state(struct cs35l41_hda *cs35l41, acpi_handle handle) -+{ -+ guid_t guid; -+ union acpi_object *ret; -+ int mute = -ENODEV; -+ -+ guid_parse(CS35L41_UUID, &guid); -+ -+ if (cs35l41_dsm_supported(handle, CS35L41_DSM_GET_MUTE)) { -+ ret = acpi_evaluate_dsm(handle, &guid, 0, CS35L41_DSM_GET_MUTE, NULL); -+ mute = *ret->buffer.pointer; -+ dev_dbg(cs35l41->dev, "CS35L41_DSM_GET_MUTE: %d\n", mute); -+ } -+ -+ dev_dbg(cs35l41->dev, "%s: %d\n", __func__, mute); -+ -+ return mute; -+} -+ -+static void cs35l41_acpi_device_notify(acpi_handle handle, u32 event, struct device *dev) -+{ -+ struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev); -+ int mute; -+ -+ if (event != CS35L41_NOTIFY_EVENT) -+ return; -+ -+ mute = cs35l41_get_acpi_mute_state(cs35l41, handle); -+ if (mute < 0) { -+ dev_warn(cs35l41->dev, "Unable to retrieve mute state: %d\n", mute); -+ return; -+ } -+ -+ dev_dbg(cs35l41->dev, "Requesting mute value: %d\n", mute); -+ cs35l41->mute_override = (mute > 0); -+ cs35l41_mute(cs35l41->dev, cs35l41->mute_override); -+} -+ - static int cs35l41_hda_bind(struct device *dev, struct device *master, void *master_data) - { - struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev); -@@ -1157,6 +1226,14 @@ static int cs35l41_hda_bind(struct device *dev, struct device *master, void *mas - comps->playback_hook = cs35l41_hda_playback_hook; - comps->pre_playback_hook = cs35l41_hda_pre_playback_hook; - comps->post_playback_hook = cs35l41_hda_post_playback_hook; -+ comps->acpi_notify = cs35l41_acpi_device_notify; -+ comps->adev = cs35l41->dacpi; -+ -+ comps->acpi_notifications_supported = cs35l41_dsm_supported(acpi_device_handle(comps->adev), -+ CS35L41_DSM_GET_MUTE); -+ -+ cs35l41->mute_override = cs35l41_get_acpi_mute_state(cs35l41, -+ acpi_device_handle(cs35l41->dacpi)) > 0; - - mutex_unlock(&cs35l41->fw_mutex); - -@@ -1430,8 +1507,8 @@ static int cs35l41_hda_read_acpi(struct cs35l41_hda *cs35l41, const char *hid, i - return -ENODEV; - } - -+ cs35l41->dacpi = adev; - physdev = get_device(acpi_get_first_physical_node(adev)); -- acpi_dev_put(adev); - - sub = acpi_get_subsystem_id(ACPI_HANDLE(physdev)); - if (IS_ERR(sub)) -@@ -1541,6 +1618,7 @@ static int cs35l41_hda_read_acpi(struct cs35l41_hda *cs35l41, const char *hid, i - hw_cfg->valid = false; - hw_cfg->gpio1.valid = false; - hw_cfg->gpio2.valid = false; -+ acpi_dev_put(cs35l41->dacpi); - put_physdev: - put_device(physdev); - -@@ -1644,10 +1722,7 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i - if (ret) - goto err; - -- ret = regmap_multi_reg_write(cs35l41->regmap, cs35l41_hda_mute, -- ARRAY_SIZE(cs35l41_hda_mute)); -- if (ret) -- goto err; -+ cs35l41_mute(cs35l41->dev, true); - - INIT_WORK(&cs35l41->fw_load_work, cs35l41_fw_load_work); - mutex_init(&cs35l41->fw_mutex); -@@ -1684,6 +1759,8 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i - if (cs35l41_safe_reset(cs35l41->regmap, cs35l41->hw_cfg.bst_type)) - gpiod_set_value_cansleep(cs35l41->reset_gpio, 0); - gpiod_put(cs35l41->reset_gpio); -+ gpiod_put(cs35l41->cs_gpio); -+ acpi_dev_put(cs35l41->dacpi); - kfree(cs35l41->acpi_subsystem_id); - - return ret; -@@ -1703,11 +1780,14 @@ void cs35l41_hda_remove(struct device *dev) - - component_del(cs35l41->dev, &cs35l41_hda_comp_ops); - -+ acpi_dev_put(cs35l41->dacpi); -+ - pm_runtime_put_noidle(cs35l41->dev); - - if (cs35l41_safe_reset(cs35l41->regmap, cs35l41->hw_cfg.bst_type)) - gpiod_set_value_cansleep(cs35l41->reset_gpio, 0); - gpiod_put(cs35l41->reset_gpio); -+ gpiod_put(cs35l41->cs_gpio); - kfree(cs35l41->acpi_subsystem_id); - } - EXPORT_SYMBOL_NS_GPL(cs35l41_hda_remove, SND_HDA_SCODEC_CS35L41); -diff --git a/sound/pci/hda/cs35l41_hda.h b/sound/pci/hda/cs35l41_hda.h -index b93bf762976e..3d925d677213 100644 ---- a/sound/pci/hda/cs35l41_hda.h -+++ b/sound/pci/hda/cs35l41_hda.h -@@ -10,6 +10,7 @@ - #ifndef __CS35L41_HDA_H__ - #define __CS35L41_HDA_H__ - -+#include - #include - #include - #include -@@ -34,8 +35,8 @@ struct cs35l41_amp_efi_data { - } __packed; - - enum cs35l41_hda_spk_pos { -- CS35l41_LEFT, -- CS35l41_RIGHT, -+ CS35L41_LEFT, -+ CS35L41_RIGHT, - }; - - enum cs35l41_hda_gpio_function { -@@ -49,6 +50,7 @@ struct cs35l41_hda { - struct device *dev; - struct regmap *regmap; - struct gpio_desc *reset_gpio; -+ struct gpio_desc *cs_gpio; - struct cs35l41_hw_cfg hw_cfg; - struct hda_codec *codec; - -@@ -70,6 +72,8 @@ struct cs35l41_hda { - bool halo_initialized; - bool playback_started; - struct cs_dsp cs_dsp; -+ struct acpi_device *dacpi; -+ bool mute_override; - }; - - enum halo_state { -diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c -index b62a4e6968e2..c9eb70290973 100644 ---- a/sound/pci/hda/cs35l41_hda_property.c -+++ b/sound/pci/hda/cs35l41_hda_property.c -@@ -6,9 +6,300 @@ - // - // Author: Stefan Binding - -+#include - #include - #include - #include "cs35l41_hda_property.h" -+#include -+ -+#define MAX_AMPS 4 -+ -+struct cs35l41_config { -+ const char *ssid; -+ enum { -+ SPI, -+ I2C -+ } bus; -+ int num_amps; -+ enum { -+ INTERNAL, -+ EXTERNAL -+ } boost_type; -+ u8 channel[MAX_AMPS]; -+ int reset_gpio_index; /* -1 if no reset gpio */ -+ int spkid_gpio_index; /* -1 if no spkid gpio */ -+ int cs_gpio_index; /* -1 if no cs gpio, or cs-gpios already exists, max num amps == 2 */ -+ int boost_ind_nanohenry; /* Required if boost_type == Internal */ -+ int boost_peak_milliamp; /* Required if boost_type == Internal */ -+ int boost_cap_microfarad; /* Required if boost_type == Internal */ -+}; -+ -+static const struct cs35l41_config cs35l41_config_table[] = { -+/* -+ * Device 103C89C6 does have _DSD, however it is setup to use the wrong boost type. -+ * We can override the _DSD to correct the boost type here. -+ * Since this laptop has valid ACPI, we do not need to handle cs-gpios, since that already exists -+ * in the ACPI. The Reset GPIO is also valid, so we can use the Reset defined in _DSD. -+ */ -+ { "103C89C6", SPI, 2, INTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, 0, 0 }, -1, -1, -1, 1000, 4500, 24 }, -+ { "104312AF", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, -+ { "10431433", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, -+ { "10431463", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, -+ { "10431473", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 1000, 4500, 24 }, -+ { "10431483", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 1000, 4500, 24 }, -+ { "10431493", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, -+ { "104314D3", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, -+ { "104314E3", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, -+ { "10431503", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, -+ { "10431533", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, -+ { "10431573", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, -+ { "10431663", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 1000, 4500, 24 }, -+ { "104316D3", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, -+ { "104316F3", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, -+ { "104317F3", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, -+ { "10431863", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, -+ { "104318D3", I2C, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, -+ { "10431C9F", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, -+ { "10431CAF", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, -+ { "10431CCF", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, -+ { "10431CDF", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, -+ { "10431CEF", SPI, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, -+ { "10431D1F", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, -+ { "10431DA2", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, -+ { "10431E02", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, -+ { "10431EE2", I2C, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 }, -+ { "10431F12", I2C, 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, -+ { "10431F1F", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 0, 0, 0 }, -+ { "10431F62", SPI, 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, -+ {} -+}; -+ -+static int cs35l41_add_gpios(struct cs35l41_hda *cs35l41, struct device *physdev, int reset_gpio, -+ int spkid_gpio, int cs_gpio_index, int num_amps) -+{ -+ struct acpi_gpio_mapping *gpio_mapping; -+ struct acpi_gpio_params *reset_gpio_params; -+ struct acpi_gpio_params *spkid_gpio_params; -+ struct acpi_gpio_params *cs_gpio_params; -+ unsigned int num_entries = 0; -+ unsigned int reset_index, spkid_index, csgpio_index; -+ int i; -+ -+ /* -+ * GPIO Mapping only needs to be done once, since it would be available for subsequent amps -+ */ -+ if (cs35l41->dacpi->driver_gpios) -+ return 0; -+ -+ if (reset_gpio >= 0) { -+ reset_index = num_entries; -+ num_entries++; -+ } -+ -+ if (spkid_gpio >= 0) { -+ spkid_index = num_entries; -+ num_entries++; -+ } -+ -+ if ((cs_gpio_index >= 0) && (num_amps == 2)) { -+ csgpio_index = num_entries; -+ num_entries++; -+ } -+ -+ if (!num_entries) -+ return 0; -+ -+ /* must include termination entry */ -+ num_entries++; -+ -+ gpio_mapping = devm_kcalloc(physdev, num_entries, sizeof(struct acpi_gpio_mapping), -+ GFP_KERNEL); -+ -+ if (!gpio_mapping) -+ goto err; -+ -+ if (reset_gpio >= 0) { -+ gpio_mapping[reset_index].name = "reset-gpios"; -+ reset_gpio_params = devm_kcalloc(physdev, num_amps, sizeof(struct acpi_gpio_params), -+ GFP_KERNEL); -+ if (!reset_gpio_params) -+ goto err; -+ -+ for (i = 0; i < num_amps; i++) -+ reset_gpio_params[i].crs_entry_index = reset_gpio; -+ -+ gpio_mapping[reset_index].data = reset_gpio_params; -+ gpio_mapping[reset_index].size = num_amps; -+ } -+ -+ if (spkid_gpio >= 0) { -+ gpio_mapping[spkid_index].name = "spk-id-gpios"; -+ spkid_gpio_params = devm_kcalloc(physdev, num_amps, sizeof(struct acpi_gpio_params), -+ GFP_KERNEL); -+ if (!spkid_gpio_params) -+ goto err; -+ -+ for (i = 0; i < num_amps; i++) -+ spkid_gpio_params[i].crs_entry_index = spkid_gpio; -+ -+ gpio_mapping[spkid_index].data = spkid_gpio_params; -+ gpio_mapping[spkid_index].size = num_amps; -+ } -+ -+ if ((cs_gpio_index >= 0) && (num_amps == 2)) { -+ gpio_mapping[csgpio_index].name = "cs-gpios"; -+ /* only one GPIO CS is supported without using _DSD, obtained using index 0 */ -+ cs_gpio_params = devm_kzalloc(physdev, sizeof(struct acpi_gpio_params), GFP_KERNEL); -+ if (!cs_gpio_params) -+ goto err; -+ -+ cs_gpio_params->crs_entry_index = cs_gpio_index; -+ -+ gpio_mapping[csgpio_index].data = cs_gpio_params; -+ gpio_mapping[csgpio_index].size = 1; -+ } -+ -+ return devm_acpi_dev_add_driver_gpios(physdev, gpio_mapping); -+err: -+ devm_kfree(physdev, gpio_mapping); -+ devm_kfree(physdev, reset_gpio_params); -+ devm_kfree(physdev, spkid_gpio_params); -+ devm_kfree(physdev, cs_gpio_params); -+ return -ENOMEM; -+} -+ -+static int generic_dsd_config(struct cs35l41_hda *cs35l41, struct device *physdev, int id, -+ const char *hid) -+{ -+ struct cs35l41_hw_cfg *hw_cfg = &cs35l41->hw_cfg; -+ const struct cs35l41_config *cfg; -+ struct gpio_desc *cs_gpiod; -+ struct spi_device *spi; -+ bool dsd_found; -+ int ret; -+ -+ for (cfg = cs35l41_config_table; cfg->ssid; cfg++) { -+ if (!strcasecmp(cfg->ssid, cs35l41->acpi_subsystem_id)) -+ break; -+ } -+ -+ if (!cfg->ssid) -+ return -ENOENT; -+ -+ if (!cs35l41->dacpi || cs35l41->dacpi != ACPI_COMPANION(physdev)) { -+ dev_err(cs35l41->dev, "ACPI Device does not match, cannot override _DSD.\n"); -+ return -ENODEV; -+ } -+ -+ dev_info(cs35l41->dev, "Adding DSD properties for %s\n", cs35l41->acpi_subsystem_id); -+ -+ dsd_found = acpi_dev_has_props(cs35l41->dacpi); -+ -+ if (!dsd_found) { -+ ret = cs35l41_add_gpios(cs35l41, physdev, cfg->reset_gpio_index, -+ cfg->spkid_gpio_index, cfg->cs_gpio_index, -+ cfg->num_amps); -+ if (ret) { -+ dev_err(cs35l41->dev, "Error adding GPIO mapping: %d\n", ret); -+ return ret; -+ } -+ } else if (cfg->reset_gpio_index >= 0 || cfg->spkid_gpio_index >= 0) { -+ dev_warn(cs35l41->dev, "Cannot add Reset/Speaker ID/SPI CS GPIO Mapping, " -+ "_DSD already exists.\n"); -+ } -+ -+ if (cfg->bus == SPI) { -+ cs35l41->index = id; -+ /* -+ * Manually set the Chip Select for the second amp in the node. -+ * This is only supported for systems with 2 amps, since we cannot expand the -+ * default number of chip selects without using cs-gpios -+ * The CS GPIO must be set high prior to communicating with the first amp (which -+ * uses a native chip select), to ensure the second amp does not clash with the -+ * first. -+ */ -+ if (cfg->cs_gpio_index >= 0) { -+ spi = to_spi_device(cs35l41->dev); -+ -+ if (cfg->num_amps != 2) { -+ dev_warn(cs35l41->dev, -+ "Cannot update SPI CS, Number of Amps (%d) != 2\n", -+ cfg->num_amps); -+ } else if (dsd_found) { -+ dev_warn(cs35l41->dev, -+ "Cannot update SPI CS, _DSD already exists.\n"); -+ } else { -+ /* -+ * This is obtained using driver_gpios, since only one GPIO for CS -+ * exists, this can be obtained using index 0. -+ */ -+ cs_gpiod = gpiod_get_index(physdev, "cs", 0, GPIOD_OUT_LOW); -+ if (IS_ERR(cs_gpiod)) { -+ dev_err(cs35l41->dev, -+ "Unable to get Chip Select GPIO descriptor\n"); -+ return PTR_ERR(cs_gpiod); -+ } -+ if (id == 1) { -+ spi_set_csgpiod(spi, 0, cs_gpiod); -+ cs35l41->cs_gpio = cs_gpiod; -+ } else { -+ gpiod_set_value_cansleep(cs_gpiod, true); -+ gpiod_put(cs_gpiod); -+ } -+ spi_setup(spi); -+ } -+ } -+ } else { -+ if (cfg->num_amps > 2) -+ /* -+ * i2c addresses for 3/4 amps are used in order: 0x40, 0x41, 0x42, 0x43, -+ * subtracting 0x40 would give zero-based index -+ */ -+ cs35l41->index = id - 0x40; -+ else -+ /* i2c addr 0x40 for first amp (always), 0x41/0x42 for 2nd amp */ -+ cs35l41->index = id == 0x40 ? 0 : 1; -+ } -+ -+ if (cfg->num_amps == 3) -+ /* 3 amps means a center channel, so no duplicate channels */ -+ cs35l41->channel_index = 0; -+ else -+ /* -+ * if 4 amps, there are duplicate channels, so they need different indexes -+ * if 2 amps, no duplicate channels, channel_index would be 0 -+ */ -+ cs35l41->channel_index = cs35l41->index / 2; -+ -+ cs35l41->reset_gpio = fwnode_gpiod_get_index(acpi_fwnode_handle(cs35l41->dacpi), "reset", -+ cs35l41->index, GPIOD_OUT_LOW, -+ "cs35l41-reset"); -+ cs35l41->speaker_id = cs35l41_get_speaker_id(physdev, cs35l41->index, cfg->num_amps, -1); -+ -+ hw_cfg->spk_pos = cfg->channel[cs35l41->index]; -+ -+ if (cfg->boost_type == INTERNAL) { -+ hw_cfg->bst_type = CS35L41_INT_BOOST; -+ hw_cfg->bst_ind = cfg->boost_ind_nanohenry; -+ hw_cfg->bst_ipk = cfg->boost_peak_milliamp; -+ hw_cfg->bst_cap = cfg->boost_cap_microfarad; -+ hw_cfg->gpio1.func = CS35L41_NOT_USED; -+ hw_cfg->gpio1.valid = true; -+ } else { -+ hw_cfg->bst_type = CS35L41_EXT_BOOST; -+ hw_cfg->bst_ind = -1; -+ hw_cfg->bst_ipk = -1; -+ hw_cfg->bst_cap = -1; -+ hw_cfg->gpio1.func = CS35l41_VSPK_SWITCH; -+ hw_cfg->gpio1.valid = true; -+ } -+ -+ hw_cfg->gpio2.func = CS35L41_INTERRUPT; -+ hw_cfg->gpio2.valid = true; -+ hw_cfg->valid = true; -+ -+ return 0; -+} - - /* - * Device CLSA010(0/1) doesn't have _DSD so a gpiod_get by the label reset won't work. -@@ -43,37 +334,6 @@ static int lenovo_legion_no_acpi(struct cs35l41_hda *cs35l41, struct device *phy - return 0; - } - --/* -- * Device 103C89C6 does have _DSD, however it is setup to use the wrong boost type. -- * We can override the _DSD to correct the boost type here. -- * Since this laptop has valid ACPI, we do not need to handle cs-gpios, since that already exists -- * in the ACPI. -- */ --static int hp_vision_acpi_fix(struct cs35l41_hda *cs35l41, struct device *physdev, int id, -- const char *hid) --{ -- struct cs35l41_hw_cfg *hw_cfg = &cs35l41->hw_cfg; -- -- dev_info(cs35l41->dev, "Adding DSD properties for %s\n", cs35l41->acpi_subsystem_id); -- -- cs35l41->index = id; -- cs35l41->channel_index = 0; -- cs35l41->reset_gpio = gpiod_get_index(physdev, NULL, 1, GPIOD_OUT_HIGH); -- cs35l41->speaker_id = -ENOENT; -- hw_cfg->spk_pos = cs35l41->index ? 1 : 0; // right:left -- hw_cfg->gpio1.func = CS35L41_NOT_USED; -- hw_cfg->gpio1.valid = true; -- hw_cfg->gpio2.func = CS35L41_INTERRUPT; -- hw_cfg->gpio2.valid = true; -- hw_cfg->bst_type = CS35L41_INT_BOOST; -- hw_cfg->bst_ind = 1000; -- hw_cfg->bst_ipk = 4500; -- hw_cfg->bst_cap = 24; -- hw_cfg->valid = true; -- -- return 0; --} -- - struct cs35l41_prop_model { - const char *hid; - const char *ssid; -@@ -84,7 +344,36 @@ struct cs35l41_prop_model { - static const struct cs35l41_prop_model cs35l41_prop_model_table[] = { - { "CLSA0100", NULL, lenovo_legion_no_acpi }, - { "CLSA0101", NULL, lenovo_legion_no_acpi }, -- { "CSC3551", "103C89C6", hp_vision_acpi_fix }, -+ { "CSC3551", "103C89C6", generic_dsd_config }, -+ { "CSC3551", "104312AF", generic_dsd_config }, -+ { "CSC3551", "10431433", generic_dsd_config }, -+ { "CSC3551", "10431463", generic_dsd_config }, -+ { "CSC3551", "10431473", generic_dsd_config }, -+ { "CSC3551", "10431483", generic_dsd_config }, -+ { "CSC3551", "10431493", generic_dsd_config }, -+ { "CSC3551", "104314D3", generic_dsd_config }, -+ { "CSC3551", "104314E3", generic_dsd_config }, -+ { "CSC3551", "10431503", generic_dsd_config }, -+ { "CSC3551", "10431533", generic_dsd_config }, -+ { "CSC3551", "10431573", generic_dsd_config }, -+ { "CSC3551", "10431663", generic_dsd_config }, -+ { "CSC3551", "104316D3", generic_dsd_config }, -+ { "CSC3551", "104316F3", generic_dsd_config }, -+ { "CSC3551", "104317F3", generic_dsd_config }, -+ { "CSC3551", "10431863", generic_dsd_config }, -+ { "CSC3551", "104318D3", generic_dsd_config }, -+ { "CSC3551", "10431C9F", generic_dsd_config }, -+ { "CSC3551", "10431CAF", generic_dsd_config }, -+ { "CSC3551", "10431CCF", generic_dsd_config }, -+ { "CSC3551", "10431CDF", generic_dsd_config }, -+ { "CSC3551", "10431CEF", generic_dsd_config }, -+ { "CSC3551", "10431D1F", generic_dsd_config }, -+ { "CSC3551", "10431DA2", generic_dsd_config }, -+ { "CSC3551", "10431E02", generic_dsd_config }, -+ { "CSC3551", "10431EE2", generic_dsd_config }, -+ { "CSC3551", "10431F12", generic_dsd_config }, -+ { "CSC3551", "10431F1F", generic_dsd_config }, -+ { "CSC3551", "10431F62", generic_dsd_config }, - {} - }; - -@@ -97,7 +386,7 @@ int cs35l41_add_dsd_properties(struct cs35l41_hda *cs35l41, struct device *physd - if (!strcmp(model->hid, hid) && - (!model->ssid || - (cs35l41->acpi_subsystem_id && -- !strcmp(model->ssid, cs35l41->acpi_subsystem_id)))) -+ !strcasecmp(model->ssid, cs35l41->acpi_subsystem_id)))) - return model->add_prop(cs35l41, physdev, id, hid); - } - -diff --git a/sound/pci/hda/hda_component.h b/sound/pci/hda/hda_component.h -index f170aec967c1..bbd6f0ed16c1 100644 ---- a/sound/pci/hda/hda_component.h -+++ b/sound/pci/hda/hda_component.h -@@ -6,6 +6,7 @@ - * Cirrus Logic International Semiconductor Ltd. - */ - -+#include - #include - - #define HDA_MAX_COMPONENTS 4 -@@ -15,6 +16,9 @@ struct hda_component { - struct device *dev; - char name[HDA_MAX_NAME_SIZE]; - struct hda_codec *codec; -+ struct acpi_device *adev; -+ bool acpi_notifications_supported; -+ void (*acpi_notify)(acpi_handle handle, u32 event, struct device *dev); - void (*pre_playback_hook)(struct device *dev, int action); - void (*playback_hook)(struct device *dev, int action); - void (*post_playback_hook)(struct device *dev, int action); -diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c -index 375569d0864b..cfe4a3b588dd 100644 ---- a/sound/pci/hda/patch_realtek.c -+++ b/sound/pci/hda/patch_realtek.c -@@ -9873,22 +9873,28 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { - SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC), - SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), -- SND_PCI_QUIRK(0x1043, 0x1433, "ASUS GX650P", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), -- SND_PCI_QUIRK(0x1043, 0x1463, "Asus GA402X", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), -- SND_PCI_QUIRK(0x1043, 0x1473, "ASUS GU604V", ALC285_FIXUP_ASUS_HEADSET_MIC), -- SND_PCI_QUIRK(0x1043, 0x1483, "ASUS GU603V", ALC285_FIXUP_ASUS_HEADSET_MIC), -- SND_PCI_QUIRK(0x1043, 0x1493, "ASUS GV601V", ALC285_FIXUP_ASUS_HEADSET_MIC), -+ SND_PCI_QUIRK(0x1043, 0x1433, "ASUS GX650PY/PZ/PV/PU/PYV/PZV/PIV/PVV", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), -+ SND_PCI_QUIRK(0x1043, 0x1463, "Asus GA402X/GA402N", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), -+ SND_PCI_QUIRK(0x1043, 0x1473, "ASUS GU604VI/VC/VE/VG/VJ/VQ/VU/VV/VY/VZ", ALC285_FIXUP_ASUS_HEADSET_MIC), -+ SND_PCI_QUIRK(0x1043, 0x1483, "ASUS GU603VQ/VU/VV/VJ/VI", ALC285_FIXUP_ASUS_HEADSET_MIC), -+ SND_PCI_QUIRK(0x1043, 0x1493, "ASUS GV601VV/VU/VJ/VQ/VI", ALC285_FIXUP_ASUS_HEADSET_MIC), -+ SND_PCI_QUIRK(0x1043, 0x14d3, "ASUS G614JY/JZ/JG", ALC245_FIXUP_CS35L41_SPI_2), -+ SND_PCI_QUIRK(0x1043, 0x14e3, "ASUS G513PI/PU/PV", ALC287_FIXUP_CS35L41_I2C_2), -+ SND_PCI_QUIRK(0x1043, 0x1503, "ASUS G733PY/PZ/PZV/PYV", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A), -- SND_PCI_QUIRK(0x1043, 0x1533, "ASUS GV302XA", ALC287_FIXUP_CS35L41_I2C_2), -- SND_PCI_QUIRK(0x1043, 0x1573, "ASUS GZ301V", ALC285_FIXUP_ASUS_HEADSET_MIC), -+ SND_PCI_QUIRK(0x1043, 0x1533, "ASUS GV302XA/XJ/XQ/XU/XV/XI", ALC287_FIXUP_CS35L41_I2C_2), -+ SND_PCI_QUIRK(0x1043, 0x1573, "ASUS GZ301VV/VQ/VU/VJ/VA/VC/VE/VVC/VQC/VUC/VJC/VEC/VCC", ALC285_FIXUP_ASUS_HEADSET_MIC), - SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK), -- SND_PCI_QUIRK(0x1043, 0x1663, "ASUS GU603ZV", ALC285_FIXUP_ASUS_HEADSET_MIC), -+ SND_PCI_QUIRK(0x1043, 0x1663, "ASUS GU603ZI/ZJ/ZQ/ZU/ZV", ALC285_FIXUP_ASUS_HEADSET_MIC), - SND_PCI_QUIRK(0x1043, 0x1683, "ASUS UM3402YAR", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), -+ SND_PCI_QUIRK(0x1043, 0x16d3, "ASUS UX5304VA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC), -+ SND_PCI_QUIRK(0x1043, 0x16f3, "ASUS UX7602VI/BZ", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x1740, "ASUS UX430UA", ALC295_FIXUP_ASUS_DACS), - SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK), -- SND_PCI_QUIRK(0x1043, 0x17f3, "ROG Ally RC71L_RC71L", ALC294_FIXUP_ASUS_ALLY), -+ SND_PCI_QUIRK(0x1043, 0x17f3, "ROG Ally NR2301L/X", ALC294_FIXUP_ASUS_ALLY), -+ SND_PCI_QUIRK(0x1043, 0x1863, "ASUS UX6404VI/VV", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS), - SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), - SND_PCI_QUIRK(0x1043, 0x18d3, "ASUS UM3504DA", ALC294_FIXUP_CS35L41_I2C_2), -@@ -9913,10 +9919,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { - SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), - SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS), -- SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JI", ALC285_FIXUP_ASUS_HEADSET_MIC), -- SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), -+ SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JU/JV/JI", ALC285_FIXUP_ASUS_HEADSET_MIC), -+ SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JY/JZ/JI/JG", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), - SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC), -- SND_PCI_QUIRK(0x1043, 0x1d1f, "ASUS ROG Strix G17 2023 (G713PV)", ALC287_FIXUP_CS35L41_I2C_2), -+ SND_PCI_QUIRK(0x1043, 0x1ccf, "ASUS G814JU/JV/JI", ALC245_FIXUP_CS35L41_SPI_2), -+ SND_PCI_QUIRK(0x1043, 0x1cdf, "ASUS G814JY/JZ/JG", ALC245_FIXUP_CS35L41_SPI_2), -+ SND_PCI_QUIRK(0x1043, 0x1cef, "ASUS G834JY/JZ/JI/JG", ALC285_FIXUP_ASUS_HEADSET_MIC), -+ SND_PCI_QUIRK(0x1043, 0x1d1f, "ASUS G713PI/PU/PV/PVN", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401), - SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), - SND_PCI_QUIRK(0x1043, 0x1da2, "ASUS UP6502ZA/ZD", ALC245_FIXUP_CS35L41_SPI_2), -@@ -9932,6 +9941,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { - SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401), - SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), - SND_PCI_QUIRK(0x1043, 0x1f12, "ASUS UM5302", ALC287_FIXUP_CS35L41_I2C_2), -+ SND_PCI_QUIRK(0x1043, 0x1f1f, "ASUS H7604JI/JV/J3D", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401), - SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), diff --git a/SOURCES/patch-6.7-redhat.patch b/SOURCES/patch-6.7-redhat.patch new file mode 100644 index 0000000..dc163a4 --- /dev/null +++ b/SOURCES/patch-6.7-redhat.patch @@ -0,0 +1,1553 @@ + Makefile | 20 ++- + arch/s390/include/asm/ipl.h | 1 + + arch/s390/kernel/ipl.c | 5 + + arch/s390/kernel/setup.c | 4 + + arch/x86/kernel/setup.c | 22 ++- + drivers/acpi/apei/hest.c | 8 + + drivers/acpi/irq.c | 17 +- + drivers/acpi/scan.c | 9 ++ + drivers/ata/libahci.c | 18 +++ + drivers/char/ipmi/ipmi_dmi.c | 15 ++ + drivers/char/ipmi/ipmi_msghandler.c | 16 +- + drivers/firmware/efi/Makefile | 1 + + drivers/firmware/efi/efi.c | 124 +++++++++++---- + drivers/firmware/efi/secureboot.c | 38 +++++ + drivers/firmware/sysfb.c | 18 ++- + drivers/hid/hid-rmi.c | 66 -------- + drivers/hwtracing/coresight/coresight-etm4x-core.c | 19 +++ + drivers/input/rmi4/rmi_driver.c | 124 +++++++++------ + drivers/iommu/iommu.c | 22 +++ + drivers/net/wireless/ath/ath10k/wmi-tlv.c | 4 + + drivers/pci/quirks.c | 24 +++ + drivers/scsi/sd.c | 10 ++ + drivers/usb/core/hub.c | 7 + + include/linux/efi.h | 22 ++- + include/linux/lsm_hook_defs.h | 2 + + include/linux/module.h | 1 + + include/linux/rh_kabi.h | 172 +++++++++++++++++++++ + include/linux/rmi.h | 1 + + include/linux/security.h | 5 + + kernel/module/main.c | 2 + + kernel/module/signing.c | 9 +- + scripts/mod/modpost.c | 8 + + scripts/tags.sh | 2 + + security/integrity/platform_certs/load_uefi.c | 6 +- + security/lockdown/Kconfig | 13 ++ + security/lockdown/lockdown.c | 1 + + security/security.c | 12 ++ + tools/power/cpupower/Makefile | 2 +- + 38 files changed, 674 insertions(+), 176 deletions(-) + +diff --git a/Makefile b/Makefile +index 96a08c9f0faa..e627bde4e578 100644 +--- a/Makefile ++++ b/Makefile +@@ -22,6 +22,18 @@ $(if $(filter __%, $(MAKECMDGOALS)), \ + PHONY := __all + __all: + ++# Set RHEL variables ++# Note that this ifdef'ery is required to handle when building with ++# the O= mechanism (relocate the object file results) due to upstream ++# commit 67d7c302 which broke our RHEL include file ++ifneq ($(realpath source),) ++include $(realpath source)/Makefile.rhelver ++else ++ifneq ($(realpath Makefile.rhelver),) ++include Makefile.rhelver ++endif ++endif ++ + # We are using a recursive build, so we need to do a little thinking + # to get the ordering right. + # +@@ -1239,7 +1251,13 @@ define filechk_version.h + ((c) > 255 ? 255 : (c)))'; \ + echo \#define LINUX_VERSION_MAJOR $(VERSION); \ + echo \#define LINUX_VERSION_PATCHLEVEL $(PATCHLEVEL); \ +- echo \#define LINUX_VERSION_SUBLEVEL $(SUBLEVEL) ++ echo \#define LINUX_VERSION_SUBLEVEL $(SUBLEVEL); \ ++ echo '#define RHEL_MAJOR $(RHEL_MAJOR)'; \ ++ echo '#define RHEL_MINOR $(RHEL_MINOR)'; \ ++ echo '#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b))'; \ ++ echo '#define RHEL_RELEASE_CODE \ ++ $(shell expr $(RHEL_MAJOR) \* 256 + $(RHEL_MINOR))'; \ ++ echo '#define RHEL_RELEASE "$(RHEL_RELEASE)"' + endef + + $(version_h): PATCHLEVEL := $(or $(PATCHLEVEL), 0) +diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h +index b0d00032479d..afb9544fb007 100644 +--- a/arch/s390/include/asm/ipl.h ++++ b/arch/s390/include/asm/ipl.h +@@ -139,6 +139,7 @@ int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf, + unsigned char flags, unsigned short cert); + int ipl_report_add_certificate(struct ipl_report *report, void *key, + unsigned long addr, unsigned long len); ++bool ipl_get_secureboot(void); + + /* + * DIAG 308 support +diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c +index ba75f6bee774..7d37ce1772f1 100644 +--- a/arch/s390/kernel/ipl.c ++++ b/arch/s390/kernel/ipl.c +@@ -2520,3 +2520,8 @@ int ipl_report_free(struct ipl_report *report) + } + + #endif ++ ++bool ipl_get_secureboot(void) ++{ ++ return !!ipl_secure_flag; ++} +diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c +index 5701356f4f33..cf3593dc271e 100644 +--- a/arch/s390/kernel/setup.c ++++ b/arch/s390/kernel/setup.c +@@ -49,6 +49,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -907,6 +908,9 @@ void __init setup_arch(char **cmdline_p) + + log_component_list(); + ++ if (ipl_get_secureboot()) ++ security_lock_kernel_down("Secure IPL mode", LOCKDOWN_INTEGRITY_MAX); ++ + /* Have one command line that is parsed and saved in /proc/cmdline */ + /* boot_command_line has been already set up in early.c */ + *cmdline_p = boot_command_line; +diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c +index 1526747bedf2..c482cbf0ccaf 100644 +--- a/arch/x86/kernel/setup.c ++++ b/arch/x86/kernel/setup.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -903,6 +904,13 @@ void __init setup_arch(char **cmdline_p) + if (efi_enabled(EFI_BOOT)) + efi_init(); + ++ efi_set_secure_boot(boot_params.secure_boot); ++ ++#ifdef CONFIG_LOCK_DOWN_IN_EFI_SECURE_BOOT ++ if (efi_enabled(EFI_SECURE_BOOT)) ++ security_lock_kernel_down("EFI Secure Boot mode", LOCKDOWN_INTEGRITY_MAX); ++#endif ++ + reserve_ibft_region(); + dmi_setup(); + +@@ -1064,19 +1072,7 @@ void __init setup_arch(char **cmdline_p) + /* Allocate bigger log buffer */ + setup_log_buf(1); + +- if (efi_enabled(EFI_BOOT)) { +- switch (boot_params.secure_boot) { +- case efi_secureboot_mode_disabled: +- pr_info("Secure boot disabled\n"); +- break; +- case efi_secureboot_mode_enabled: +- pr_info("Secure boot enabled\n"); +- break; +- default: +- pr_info("Secure boot could not be determined\n"); +- break; +- } +- } ++ efi_set_secure_boot(boot_params.secure_boot); + + reserve_initrd(); + +diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c +index 6aef1ee5e1bd..8f146b1b4972 100644 +--- a/drivers/acpi/apei/hest.c ++++ b/drivers/acpi/apei/hest.c +@@ -96,6 +96,14 @@ static int apei_hest_parse(apei_hest_func_t func, void *data) + if (hest_disable || !hest_tab) + return -EINVAL; + ++#ifdef CONFIG_ARM64 ++ /* Ignore broken firmware */ ++ if (!strncmp(hest_tab->header.oem_id, "HPE ", 6) && ++ !strncmp(hest_tab->header.oem_table_id, "ProLiant", 8) && ++ MIDR_IMPLEMENTOR(read_cpuid_id()) == ARM_CPU_IMP_APM) ++ return -EINVAL; ++#endif ++ + hest_hdr = (struct acpi_hest_header *)(hest_tab + 1); + for (i = 0; i < hest_tab->error_source_count; i++) { + len = hest_esrc_len(hest_hdr); +diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c +index 1687483ff319..390b67f19181 100644 +--- a/drivers/acpi/irq.c ++++ b/drivers/acpi/irq.c +@@ -143,6 +143,7 @@ struct acpi_irq_parse_one_ctx { + unsigned int index; + unsigned long *res_flags; + struct irq_fwspec *fwspec; ++ bool skip_producer_check; + }; + + /** +@@ -216,7 +217,8 @@ static acpi_status acpi_irq_parse_one_cb(struct acpi_resource *ares, + return AE_CTRL_TERMINATE; + case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: + eirq = &ares->data.extended_irq; +- if (eirq->producer_consumer == ACPI_PRODUCER) ++ if (!ctx->skip_producer_check && ++ eirq->producer_consumer == ACPI_PRODUCER) + return AE_OK; + if (ctx->index >= eirq->interrupt_count) { + ctx->index -= eirq->interrupt_count; +@@ -252,8 +254,19 @@ static acpi_status acpi_irq_parse_one_cb(struct acpi_resource *ares, + static int acpi_irq_parse_one(acpi_handle handle, unsigned int index, + struct irq_fwspec *fwspec, unsigned long *flags) + { +- struct acpi_irq_parse_one_ctx ctx = { -EINVAL, index, flags, fwspec }; ++ struct acpi_irq_parse_one_ctx ctx = { -EINVAL, index, flags, fwspec, false }; + ++ /* ++ * Firmware on arm64-based HPE m400 platform incorrectly marks ++ * its UART interrupt as ACPI_PRODUCER rather than ACPI_CONSUMER. ++ * Don't do the producer/consumer check for that device. ++ */ ++ if (IS_ENABLED(CONFIG_ARM64)) { ++ struct acpi_device *adev = acpi_get_acpi_dev(handle); ++ ++ if (adev && !strcmp(acpi_device_hid(adev), "APMC0D08")) ++ ctx.skip_producer_check = true; ++ } + acpi_walk_resources(handle, METHOD_NAME__CRS, acpi_irq_parse_one_cb, &ctx); + return ctx.rc; + } +diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c +index 02bb2cce423f..4a6287d7a22d 100644 +--- a/drivers/acpi/scan.c ++++ b/drivers/acpi/scan.c +@@ -1757,6 +1757,15 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) + if (!acpi_match_device_ids(device, ignore_serial_bus_ids)) + return false; + ++ /* ++ * Firmware on some arm64 X-Gene platforms will make the UART ++ * device appear as both a UART and a slave of that UART. Just ++ * bail out here for X-Gene UARTs. ++ */ ++ if (IS_ENABLED(CONFIG_ARM64) && ++ !strcmp(acpi_device_hid(device), "APMC0D08")) ++ return false; ++ + INIT_LIST_HEAD(&resource_list); + acpi_dev_get_resources(device, &resource_list, + acpi_check_serial_bus_slave, +diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c +index 1a63200ea437..a911e976a596 100644 +--- a/drivers/ata/libahci.c ++++ b/drivers/ata/libahci.c +@@ -729,6 +729,24 @@ int ahci_stop_engine(struct ata_port *ap) + tmp &= ~PORT_CMD_START; + writel(tmp, port_mmio + PORT_CMD); + ++#ifdef CONFIG_ARM64 ++ /* Rev Ax of Cavium CN99XX needs a hack for port stop */ ++ if (dev_is_pci(ap->host->dev) && ++ to_pci_dev(ap->host->dev)->vendor == 0x14e4 && ++ to_pci_dev(ap->host->dev)->device == 0x9027 && ++ midr_is_cpu_model_range(read_cpuid_id(), ++ MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN), ++ MIDR_CPU_VAR_REV(0, 0), ++ MIDR_CPU_VAR_REV(0, MIDR_REVISION_MASK))) { ++ tmp = readl(hpriv->mmio + 0x8000); ++ udelay(100); ++ writel(tmp | (1 << 26), hpriv->mmio + 0x8000); ++ udelay(100); ++ writel(tmp & ~(1 << 26), hpriv->mmio + 0x8000); ++ dev_warn(ap->host->dev, "CN99XX SATA reset workaround applied\n"); ++ } ++#endif ++ + /* wait for engine to stop. This could be as long as 500 msec */ + tmp = ata_wait_register(ap, port_mmio + PORT_CMD, + PORT_CMD_LIST_ON, PORT_CMD_LIST_ON, 1, 500); +diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c +index bbf7029e224b..cf7faa970dd6 100644 +--- a/drivers/char/ipmi/ipmi_dmi.c ++++ b/drivers/char/ipmi/ipmi_dmi.c +@@ -215,6 +215,21 @@ static int __init scan_for_dmi_ipmi(void) + { + const struct dmi_device *dev = NULL; + ++#ifdef CONFIG_ARM64 ++ /* RHEL-only ++ * If this is ARM-based HPE m400, return now, because that platform ++ * reports the host-side ipmi address as intel port-io space, which ++ * does not exist in the ARM architecture. ++ */ ++ const char *dmistr = dmi_get_system_info(DMI_PRODUCT_NAME); ++ ++ if (dmistr && (strcmp("ProLiant m400 Server", dmistr) == 0)) { ++ pr_debug("%s does not support host ipmi\n", dmistr); ++ return 0; ++ } ++ /* END RHEL-only */ ++#endif ++ + while ((dev = dmi_find_device(DMI_DEV_TYPE_IPMI, NULL, dev))) + dmi_decode_ipmi((const struct dmi_header *) dev->device_data); + +diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c +index d6f14279684d..9bc53b1970ab 100644 +--- a/drivers/char/ipmi/ipmi_msghandler.c ++++ b/drivers/char/ipmi/ipmi_msghandler.c +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + #include + + #define IPMI_DRIVER_VERSION "39.2" +@@ -5511,8 +5512,21 @@ static int __init ipmi_init_msghandler_mod(void) + { + int rv; + +- pr_info("version " IPMI_DRIVER_VERSION "\n"); ++#ifdef CONFIG_ARM64 ++ /* RHEL-only ++ * If this is ARM-based HPE m400, return now, because that platform ++ * reports the host-side ipmi address as intel port-io space, which ++ * does not exist in the ARM architecture. ++ */ ++ const char *dmistr = dmi_get_system_info(DMI_PRODUCT_NAME); + ++ if (dmistr && (strcmp("ProLiant m400 Server", dmistr) == 0)) { ++ pr_debug("%s does not support host ipmi\n", dmistr); ++ return -ENOSYS; ++ } ++ /* END RHEL-only */ ++#endif ++ pr_info("version " IPMI_DRIVER_VERSION "\n"); + mutex_lock(&ipmi_interfaces_mutex); + rv = ipmi_register_driver(); + mutex_unlock(&ipmi_interfaces_mutex); +diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile +index e489fefd23da..f2dfae764fb5 100644 +--- a/drivers/firmware/efi/Makefile ++++ b/drivers/firmware/efi/Makefile +@@ -25,6 +25,7 @@ subdir-$(CONFIG_EFI_STUB) += libstub + obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o + obj-$(CONFIG_EFI_TEST) += test/ + obj-$(CONFIG_EFI_DEV_PATH_PARSER) += dev-path-parser.o ++obj-$(CONFIG_EFI) += secureboot.o + obj-$(CONFIG_APPLE_PROPERTIES) += apple-properties.o + obj-$(CONFIG_EFI_RCI2_TABLE) += rci2-table.o + obj-$(CONFIG_EFI_EMBEDDED_FIRMWARE) += embedded-firmware.o +diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c +index 9d3910d1abe1..f73b3ba6ef0b 100644 +--- a/drivers/firmware/efi/efi.c ++++ b/drivers/firmware/efi/efi.c +@@ -32,6 +32,7 @@ + #include + #include + #include ++#include + + #include + +@@ -972,40 +973,101 @@ int efi_mem_type(unsigned long phys_addr) + return -EINVAL; + } + ++struct efi_error_code { ++ efi_status_t status; ++ int errno; ++ const char *description; ++}; ++ ++static const struct efi_error_code efi_error_codes[] = { ++ { EFI_SUCCESS, 0, "Success"}, ++#if 0 ++ { EFI_LOAD_ERROR, -EPICK_AN_ERRNO, "Load Error"}, ++#endif ++ { EFI_INVALID_PARAMETER, -EINVAL, "Invalid Parameter"}, ++ { EFI_UNSUPPORTED, -ENOSYS, "Unsupported"}, ++ { EFI_BAD_BUFFER_SIZE, -ENOSPC, "Bad Buffer Size"}, ++ { EFI_BUFFER_TOO_SMALL, -ENOSPC, "Buffer Too Small"}, ++ { EFI_NOT_READY, -EAGAIN, "Not Ready"}, ++ { EFI_DEVICE_ERROR, -EIO, "Device Error"}, ++ { EFI_WRITE_PROTECTED, -EROFS, "Write Protected"}, ++ { EFI_OUT_OF_RESOURCES, -ENOMEM, "Out of Resources"}, ++#if 0 ++ { EFI_VOLUME_CORRUPTED, -EPICK_AN_ERRNO, "Volume Corrupt"}, ++ { EFI_VOLUME_FULL, -EPICK_AN_ERRNO, "Volume Full"}, ++ { EFI_NO_MEDIA, -EPICK_AN_ERRNO, "No Media"}, ++ { EFI_MEDIA_CHANGED, -EPICK_AN_ERRNO, "Media changed"}, ++#endif ++ { EFI_NOT_FOUND, -ENOENT, "Not Found"}, ++#if 0 ++ { EFI_ACCESS_DENIED, -EPICK_AN_ERRNO, "Access Denied"}, ++ { EFI_NO_RESPONSE, -EPICK_AN_ERRNO, "No Response"}, ++ { EFI_NO_MAPPING, -EPICK_AN_ERRNO, "No mapping"}, ++ { EFI_TIMEOUT, -EPICK_AN_ERRNO, "Time out"}, ++ { EFI_NOT_STARTED, -EPICK_AN_ERRNO, "Not started"}, ++ { EFI_ALREADY_STARTED, -EPICK_AN_ERRNO, "Already started"}, ++#endif ++ { EFI_ABORTED, -EINTR, "Aborted"}, ++#if 0 ++ { EFI_ICMP_ERROR, -EPICK_AN_ERRNO, "ICMP Error"}, ++ { EFI_TFTP_ERROR, -EPICK_AN_ERRNO, "TFTP Error"}, ++ { EFI_PROTOCOL_ERROR, -EPICK_AN_ERRNO, "Protocol Error"}, ++ { EFI_INCOMPATIBLE_VERSION, -EPICK_AN_ERRNO, "Incompatible Version"}, ++#endif ++ { EFI_SECURITY_VIOLATION, -EACCES, "Security Policy Violation"}, ++#if 0 ++ { EFI_CRC_ERROR, -EPICK_AN_ERRNO, "CRC Error"}, ++ { EFI_END_OF_MEDIA, -EPICK_AN_ERRNO, "End of Media"}, ++ { EFI_END_OF_FILE, -EPICK_AN_ERRNO, "End of File"}, ++ { EFI_INVALID_LANGUAGE, -EPICK_AN_ERRNO, "Invalid Languages"}, ++ { EFI_COMPROMISED_DATA, -EPICK_AN_ERRNO, "Compromised Data"}, ++ ++ // warnings ++ { EFI_WARN_UNKOWN_GLYPH, -EPICK_AN_ERRNO, "Warning Unknown Glyph"}, ++ { EFI_WARN_DELETE_FAILURE, -EPICK_AN_ERRNO, "Warning Delete Failure"}, ++ { EFI_WARN_WRITE_FAILURE, -EPICK_AN_ERRNO, "Warning Write Failure"}, ++ { EFI_WARN_BUFFER_TOO_SMALL, -EPICK_AN_ERRNO, "Warning Buffer Too Small"}, ++#endif ++}; ++ ++static int ++efi_status_cmp_bsearch(const void *key, const void *item) ++{ ++ u64 status = (u64)(uintptr_t)key; ++ struct efi_error_code *code = (struct efi_error_code *)item; ++ ++ if (status < code->status) ++ return -1; ++ if (status > code->status) ++ return 1; ++ return 0; ++} ++ + int efi_status_to_err(efi_status_t status) + { +- int err; +- +- switch (status) { +- case EFI_SUCCESS: +- err = 0; +- break; +- case EFI_INVALID_PARAMETER: +- err = -EINVAL; +- break; +- case EFI_OUT_OF_RESOURCES: +- err = -ENOSPC; +- break; +- case EFI_DEVICE_ERROR: +- err = -EIO; +- break; +- case EFI_WRITE_PROTECTED: +- err = -EROFS; +- break; +- case EFI_SECURITY_VIOLATION: +- err = -EACCES; +- break; +- case EFI_NOT_FOUND: +- err = -ENOENT; +- break; +- case EFI_ABORTED: +- err = -EINTR; +- break; +- default: +- err = -EINVAL; +- } ++ struct efi_error_code *found; ++ size_t num = sizeof(efi_error_codes) / sizeof(struct efi_error_code); + +- return err; ++ found = bsearch((void *)(uintptr_t)status, efi_error_codes, ++ sizeof(struct efi_error_code), num, ++ efi_status_cmp_bsearch); ++ if (!found) ++ return -EINVAL; ++ return found->errno; ++} ++ ++const char * ++efi_status_to_str(efi_status_t status) ++{ ++ struct efi_error_code *found; ++ size_t num = sizeof(efi_error_codes) / sizeof(struct efi_error_code); ++ ++ found = bsearch((void *)(uintptr_t)status, efi_error_codes, ++ sizeof(struct efi_error_code), num, ++ efi_status_cmp_bsearch); ++ if (!found) ++ return "Unknown error code"; ++ return found->description; + } + EXPORT_SYMBOL_GPL(efi_status_to_err); + +diff --git a/drivers/firmware/efi/secureboot.c b/drivers/firmware/efi/secureboot.c +new file mode 100644 +index 000000000000..de0a3714a5d4 +--- /dev/null ++++ b/drivers/firmware/efi/secureboot.c +@@ -0,0 +1,38 @@ ++/* Core kernel secure boot support. ++ * ++ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. ++ * Written by David Howells (dhowells@redhat.com) ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public Licence ++ * as published by the Free Software Foundation; either version ++ * 2 of the Licence, or (at your option) any later version. ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++#include ++ ++/* ++ * Decide what to do when UEFI secure boot mode is enabled. ++ */ ++void __init efi_set_secure_boot(enum efi_secureboot_mode mode) ++{ ++ if (efi_enabled(EFI_BOOT)) { ++ switch (mode) { ++ case efi_secureboot_mode_disabled: ++ pr_info("Secure boot disabled\n"); ++ break; ++ case efi_secureboot_mode_enabled: ++ set_bit(EFI_SECURE_BOOT, &efi.flags); ++ pr_info("Secure boot enabled\n"); ++ break; ++ default: ++ pr_warn("Secure boot could not be determined (mode %u)\n", ++ mode); ++ break; ++ } ++ } ++} +diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c +index 3c197db42c9d..16e4a2e90fae 100644 +--- a/drivers/firmware/sysfb.c ++++ b/drivers/firmware/sysfb.c +@@ -34,6 +34,22 @@ + #include + #include + ++static int skip_simpledrm; ++ ++static int __init simpledrm_disable(char *opt) ++{ ++ if (!opt) ++ return -EINVAL; ++ ++ get_option(&opt, &skip_simpledrm); ++ ++ if (skip_simpledrm) ++ pr_info("The simpledrm driver will not be probed\n"); ++ ++ return 0; ++} ++early_param("nvidia-drm.modeset", simpledrm_disable); ++ + static struct platform_device *pd; + static DEFINE_MUTEX(disable_lock); + static bool disabled; +@@ -85,7 +101,7 @@ static __init int sysfb_init(void) + + /* try to create a simple-framebuffer device */ + compatible = sysfb_parse_mode(si, &mode); +- if (compatible) { ++ if (compatible && !skip_simpledrm) { + pd = sysfb_create_simplefb(si, &mode); + if (!IS_ERR(pd)) + goto unlock_mutex; +diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c +index d4af17fdba46..154f0403cbf4 100644 +--- a/drivers/hid/hid-rmi.c ++++ b/drivers/hid/hid-rmi.c +@@ -321,21 +321,12 @@ static int rmi_input_event(struct hid_device *hdev, u8 *data, int size) + { + struct rmi_data *hdata = hid_get_drvdata(hdev); + struct rmi_device *rmi_dev = hdata->xport.rmi_dev; +- unsigned long flags; + + if (!(test_bit(RMI_STARTED, &hdata->flags))) + return 0; + +- pm_wakeup_event(hdev->dev.parent, 0); +- +- local_irq_save(flags); +- + rmi_set_attn_data(rmi_dev, data[1], &data[2], size - 2); + +- generic_handle_irq(hdata->rmi_irq); +- +- local_irq_restore(flags); +- + return 1; + } + +@@ -589,56 +580,6 @@ static const struct rmi_transport_ops hid_rmi_ops = { + .reset = rmi_hid_reset, + }; + +-static void rmi_irq_teardown(void *data) +-{ +- struct rmi_data *hdata = data; +- struct irq_domain *domain = hdata->domain; +- +- if (!domain) +- return; +- +- irq_dispose_mapping(irq_find_mapping(domain, 0)); +- +- irq_domain_remove(domain); +- hdata->domain = NULL; +- hdata->rmi_irq = 0; +-} +- +-static int rmi_irq_map(struct irq_domain *h, unsigned int virq, +- irq_hw_number_t hw_irq_num) +-{ +- irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); +- +- return 0; +-} +- +-static const struct irq_domain_ops rmi_irq_ops = { +- .map = rmi_irq_map, +-}; +- +-static int rmi_setup_irq_domain(struct hid_device *hdev) +-{ +- struct rmi_data *hdata = hid_get_drvdata(hdev); +- int ret; +- +- hdata->domain = irq_domain_create_linear(hdev->dev.fwnode, 1, +- &rmi_irq_ops, hdata); +- if (!hdata->domain) +- return -ENOMEM; +- +- ret = devm_add_action_or_reset(&hdev->dev, &rmi_irq_teardown, hdata); +- if (ret) +- return ret; +- +- hdata->rmi_irq = irq_create_mapping(hdata->domain, 0); +- if (hdata->rmi_irq <= 0) { +- hid_err(hdev, "Can't allocate an IRQ\n"); +- return hdata->rmi_irq < 0 ? hdata->rmi_irq : -ENXIO; +- } +- +- return 0; +-} +- + static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id) + { + struct rmi_data *data = NULL; +@@ -711,18 +652,11 @@ static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id) + + mutex_init(&data->page_mutex); + +- ret = rmi_setup_irq_domain(hdev); +- if (ret) { +- hid_err(hdev, "failed to allocate IRQ domain\n"); +- return ret; +- } +- + if (data->device_flags & RMI_DEVICE_HAS_PHYS_BUTTONS) + rmi_hid_pdata.gpio_data.disable = true; + + data->xport.dev = hdev->dev.parent; + data->xport.pdata = rmi_hid_pdata; +- data->xport.pdata.irq = data->rmi_irq; + data->xport.proto_name = "hid"; + data->xport.ops = &hid_rmi_ops; + +diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c +index 34aee59dd147..7c5a7f7c11bd 100644 +--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c ++++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -2303,6 +2304,16 @@ static const struct amba_id etm4_ids[] = { + {}, + }; + ++static const struct dmi_system_id broken_coresight[] = { ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "HPE"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "Apollo 70"), ++ }, ++ }, ++ { } /* terminating entry */ ++}; ++ + MODULE_DEVICE_TABLE(amba, etm4_ids); + + static struct amba_driver etm4x_amba_driver = { +@@ -2372,6 +2383,11 @@ static int __init etm4x_init(void) + { + int ret; + ++ if (dmi_check_system(broken_coresight)) { ++ pr_info("ETM4 disabled due to firmware bug\n"); ++ return 0; ++ } ++ + ret = etm4_pm_setup(); + + /* etm4_pm_setup() does its own cleanup - exit on error */ +@@ -2398,6 +2414,9 @@ static int __init etm4x_init(void) + + static void __exit etm4x_exit(void) + { ++ if (dmi_check_system(broken_coresight)) ++ return; ++ + amba_driver_unregister(&etm4x_amba_driver); + platform_driver_unregister(&etm4_platform_driver); + etm4_pm_clear(); +diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c +index 258d5fe3d395..f7298e3dc8f3 100644 +--- a/drivers/input/rmi4/rmi_driver.c ++++ b/drivers/input/rmi4/rmi_driver.c +@@ -182,34 +182,47 @@ void rmi_set_attn_data(struct rmi_device *rmi_dev, unsigned long irq_status, + attn_data.data = fifo_data; + + kfifo_put(&drvdata->attn_fifo, attn_data); ++ ++ schedule_work(&drvdata->attn_work); + } + EXPORT_SYMBOL_GPL(rmi_set_attn_data); + +-static irqreturn_t rmi_irq_fn(int irq, void *dev_id) ++static void attn_callback(struct work_struct *work) + { +- struct rmi_device *rmi_dev = dev_id; +- struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev); ++ struct rmi_driver_data *drvdata = container_of(work, ++ struct rmi_driver_data, ++ attn_work); + struct rmi4_attn_data attn_data = {0}; + int ret, count; + + count = kfifo_get(&drvdata->attn_fifo, &attn_data); +- if (count) { +- *(drvdata->irq_status) = attn_data.irq_status; +- drvdata->attn_data = attn_data; +- } ++ if (!count) ++ return; + +- ret = rmi_process_interrupt_requests(rmi_dev); ++ *(drvdata->irq_status) = attn_data.irq_status; ++ drvdata->attn_data = attn_data; ++ ++ ret = rmi_process_interrupt_requests(drvdata->rmi_dev); + if (ret) +- rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev, ++ rmi_dbg(RMI_DEBUG_CORE, &drvdata->rmi_dev->dev, + "Failed to process interrupt request: %d\n", ret); + +- if (count) { +- kfree(attn_data.data); +- drvdata->attn_data.data = NULL; +- } ++ kfree(attn_data.data); ++ drvdata->attn_data.data = NULL; + + if (!kfifo_is_empty(&drvdata->attn_fifo)) +- return rmi_irq_fn(irq, dev_id); ++ schedule_work(&drvdata->attn_work); ++} ++ ++static irqreturn_t rmi_irq_fn(int irq, void *dev_id) ++{ ++ struct rmi_device *rmi_dev = dev_id; ++ int ret; ++ ++ ret = rmi_process_interrupt_requests(rmi_dev); ++ if (ret) ++ rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev, ++ "Failed to process interrupt request: %d\n", ret); + + return IRQ_HANDLED; + } +@@ -217,7 +230,6 @@ static irqreturn_t rmi_irq_fn(int irq, void *dev_id) + static int rmi_irq_init(struct rmi_device *rmi_dev) + { + struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev); +- struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev); + int irq_flags = irq_get_trigger_type(pdata->irq); + int ret; + +@@ -235,8 +247,6 @@ static int rmi_irq_init(struct rmi_device *rmi_dev) + return ret; + } + +- data->enabled = true; +- + return 0; + } + +@@ -886,23 +896,27 @@ void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake) + if (data->enabled) + goto out; + +- enable_irq(irq); +- data->enabled = true; +- if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) { +- retval = disable_irq_wake(irq); +- if (retval) +- dev_warn(&rmi_dev->dev, +- "Failed to disable irq for wake: %d\n", +- retval); +- } ++ if (irq) { ++ enable_irq(irq); ++ data->enabled = true; ++ if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) { ++ retval = disable_irq_wake(irq); ++ if (retval) ++ dev_warn(&rmi_dev->dev, ++ "Failed to disable irq for wake: %d\n", ++ retval); ++ } + +- /* +- * Call rmi_process_interrupt_requests() after enabling irq, +- * otherwise we may lose interrupt on edge-triggered systems. +- */ +- irq_flags = irq_get_trigger_type(pdata->irq); +- if (irq_flags & IRQ_TYPE_EDGE_BOTH) +- rmi_process_interrupt_requests(rmi_dev); ++ /* ++ * Call rmi_process_interrupt_requests() after enabling irq, ++ * otherwise we may lose interrupt on edge-triggered systems. ++ */ ++ irq_flags = irq_get_trigger_type(pdata->irq); ++ if (irq_flags & IRQ_TYPE_EDGE_BOTH) ++ rmi_process_interrupt_requests(rmi_dev); ++ } else { ++ data->enabled = true; ++ } + + out: + mutex_unlock(&data->enabled_mutex); +@@ -922,20 +936,22 @@ void rmi_disable_irq(struct rmi_device *rmi_dev, bool enable_wake) + goto out; + + data->enabled = false; +- disable_irq(irq); +- if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) { +- retval = enable_irq_wake(irq); +- if (retval) +- dev_warn(&rmi_dev->dev, +- "Failed to enable irq for wake: %d\n", +- retval); +- } +- +- /* make sure the fifo is clean */ +- while (!kfifo_is_empty(&data->attn_fifo)) { +- count = kfifo_get(&data->attn_fifo, &attn_data); +- if (count) +- kfree(attn_data.data); ++ if (irq) { ++ disable_irq(irq); ++ if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) { ++ retval = enable_irq_wake(irq); ++ if (retval) ++ dev_warn(&rmi_dev->dev, ++ "Failed to enable irq for wake: %d\n", ++ retval); ++ } ++ } else { ++ /* make sure the fifo is clean */ ++ while (!kfifo_is_empty(&data->attn_fifo)) { ++ count = kfifo_get(&data->attn_fifo, &attn_data); ++ if (count) ++ kfree(attn_data.data); ++ } + } + + out: +@@ -981,6 +997,8 @@ static int rmi_driver_remove(struct device *dev) + irq_domain_remove(data->irqdomain); + data->irqdomain = NULL; + ++ cancel_work_sync(&data->attn_work); ++ + rmi_f34_remove_sysfs(rmi_dev); + rmi_free_function_list(rmi_dev); + +@@ -1219,9 +1237,15 @@ static int rmi_driver_probe(struct device *dev) + } + } + +- retval = rmi_irq_init(rmi_dev); +- if (retval < 0) +- goto err_destroy_functions; ++ if (pdata->irq) { ++ retval = rmi_irq_init(rmi_dev); ++ if (retval < 0) ++ goto err_destroy_functions; ++ } ++ ++ data->enabled = true; ++ ++ INIT_WORK(&data->attn_work, attn_callback); + + if (data->f01_container->dev.driver) { + /* Driver already bound, so enable ATTN now. */ +diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c +index 33e2a9b5d339..6ae1abc3f11c 100644 +--- a/drivers/iommu/iommu.c ++++ b/drivers/iommu/iommu.c +@@ -8,6 +8,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -3031,6 +3032,27 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) + } + EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); + ++#ifdef CONFIG_ARM64 ++static int __init iommu_quirks(void) ++{ ++ const char *vendor, *name; ++ ++ vendor = dmi_get_system_info(DMI_SYS_VENDOR); ++ name = dmi_get_system_info(DMI_PRODUCT_NAME); ++ ++ if (vendor && ++ (strncmp(vendor, "GIGABYTE", 8) == 0 && name && ++ (strncmp(name, "R120", 4) == 0 || ++ strncmp(name, "R270", 4) == 0))) { ++ pr_warn("Gigabyte %s detected, force iommu passthrough mode", name); ++ iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; ++ } ++ ++ return 0; ++} ++arch_initcall(iommu_quirks); ++#endif ++ + /** + * iommu_setup_default_domain - Set the default_domain for the group + * @group: Group to change +diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c +index 6b6aa3c36744..0ce08e9a0a3d 100644 +--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c ++++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c +@@ -851,6 +851,10 @@ ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev(struct ath10k *ar, struct sk_buff *skb, + } + + ev = tb[WMI_TLV_TAG_STRUCT_MGMT_TX_COMPL_EVENT]; ++ if (!ev) { ++ kfree(tb); ++ return -EPROTO; ++ } + + arg->desc_id = ev->desc_id; + arg->status = ev->status; +diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c +index d55a3ffae4b8..a8c1d69567cd 100644 +--- a/drivers/pci/quirks.c ++++ b/drivers/pci/quirks.c +@@ -4410,6 +4410,30 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9000, + DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9084, + quirk_bridge_cavm_thrx2_pcie_root); + ++/* ++ * PCI BAR 5 is not setup correctly for the on-board AHCI controller ++ * on Broadcom's Vulcan processor. Added a quirk to fix BAR 5 by ++ * using BAR 4's resources which are populated correctly and NOT ++ * actually used by the AHCI controller. ++ */ ++static void quirk_fix_vulcan_ahci_bars(struct pci_dev *dev) ++{ ++ struct resource *r = &dev->resource[4]; ++ ++ if (!(r->flags & IORESOURCE_MEM) || (r->start == 0)) ++ return; ++ ++ /* Set BAR5 resource to BAR4 */ ++ dev->resource[5] = *r; ++ ++ /* Update BAR5 in pci config space */ ++ pci_write_config_dword(dev, PCI_BASE_ADDRESS_5, r->start); ++ ++ /* Clear BAR4's resource */ ++ memset(r, 0, sizeof(*r)); ++} ++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9027, quirk_fix_vulcan_ahci_bars); ++ + /* + * Intersil/Techwell TW686[4589]-based video capture cards have an empty (zero) + * class code. Fix it. +diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c +index 542a4bbb21bc..62161ceed2e2 100644 +--- a/drivers/scsi/sd.c ++++ b/drivers/scsi/sd.c +@@ -118,6 +118,14 @@ static const char *sd_cache_types[] = { + "write back, no read (daft)" + }; + ++static const char *sd_probe_types[] = { "async", "sync" }; ++ ++static char sd_probe_type[6] = "async"; ++module_param_string(probe, sd_probe_type, sizeof(sd_probe_type), ++ S_IRUGO|S_IWUSR); ++MODULE_PARM_DESC(probe, "async or sync. Setting to 'sync' disables asynchronous " ++ "device number assignments (sda, sdb, ...)."); ++ + static void sd_set_flush_flag(struct scsi_disk *sdkp) + { + bool wc = false, fua = false; +@@ -4048,6 +4056,8 @@ static int __init init_sd(void) + goto err_out_class; + } + ++ if (!strcmp(sd_probe_type, "sync")) ++ sd_template.gendrv.probe_type = PROBE_FORCE_SYNCHRONOUS; + err = scsi_register_driver(&sd_template.gendrv); + if (err) + goto err_out_driver; +diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c +index 87480a6e6d93..0f0c5550bea6 100644 +--- a/drivers/usb/core/hub.c ++++ b/drivers/usb/core/hub.c +@@ -5763,6 +5763,13 @@ static void hub_event(struct work_struct *work) + (u16) hub->change_bits[0], + (u16) hub->event_bits[0]); + ++ /* Don't disconnect USB-SATA on TrimSlice */ ++ if (strcmp(dev_name(hdev->bus->controller), "tegra-ehci.0") == 0) { ++ if ((hdev->state == 7) && (hub->change_bits[0] == 0) && ++ (hub->event_bits[0] == 0x2)) ++ hub->event_bits[0] = 0; ++ } ++ + /* Lock the device, then check to see if we were + * disconnected while waiting for the lock to succeed. */ + usb_lock_device(hdev); +diff --git a/include/linux/efi.h b/include/linux/efi.h +index 9cc5bf32f6f2..7462fb1fc99e 100644 +--- a/include/linux/efi.h ++++ b/include/linux/efi.h +@@ -44,6 +44,8 @@ struct screen_info; + #define EFI_ABORTED (21 | (1UL << (BITS_PER_LONG-1))) + #define EFI_SECURITY_VIOLATION (26 | (1UL << (BITS_PER_LONG-1))) + ++#define EFI_IS_ERROR(x) ((x) & (1UL << (BITS_PER_LONG-1))) ++ + typedef unsigned long efi_status_t; + typedef u8 efi_bool_t; + typedef u16 efi_char16_t; /* UNICODE character */ +@@ -864,6 +866,14 @@ static inline int efi_range_is_wc(unsigned long start, unsigned long len) + #define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ + #define EFI_MEM_NO_SOFT_RESERVE 11 /* Is the kernel configured to ignore soft reservations? */ + #define EFI_PRESERVE_BS_REGIONS 12 /* Are EFI boot-services memory segments available? */ ++#define EFI_SECURE_BOOT 13 /* Are we in Secure Boot mode? */ ++ ++enum efi_secureboot_mode { ++ efi_secureboot_mode_unset, ++ efi_secureboot_mode_unknown, ++ efi_secureboot_mode_disabled, ++ efi_secureboot_mode_enabled, ++}; + + #ifdef CONFIG_EFI + /* +@@ -875,6 +885,8 @@ static inline bool efi_enabled(int feature) + } + extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); + ++extern void __init efi_set_secure_boot(enum efi_secureboot_mode mode); ++ + bool __pure __efi_soft_reserve_enabled(void); + + static inline bool __pure efi_soft_reserve_enabled(void) +@@ -896,6 +908,8 @@ static inline bool efi_enabled(int feature) + static inline void + efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {} + ++static inline void efi_set_secure_boot(enum efi_secureboot_mode mode) {} ++ + static inline bool efi_soft_reserve_enabled(void) + { + return false; +@@ -910,6 +924,7 @@ static inline void efi_find_mirror(void) {} + #endif + + extern int efi_status_to_err(efi_status_t status); ++extern const char *efi_status_to_str(efi_status_t status); + + /* + * Variable Attributes +@@ -1126,13 +1141,6 @@ static inline bool efi_runtime_disabled(void) { return true; } + extern void efi_call_virt_check_flags(unsigned long flags, const void *caller); + extern unsigned long efi_call_virt_save_flags(void); + +-enum efi_secureboot_mode { +- efi_secureboot_mode_unset, +- efi_secureboot_mode_unknown, +- efi_secureboot_mode_disabled, +- efi_secureboot_mode_enabled, +-}; +- + static inline + enum efi_secureboot_mode efi_get_secureboot_mode(efi_get_variable_t *get_var) + { +diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h +index 472cb16458b0..7c0f6dd800cb 100644 +--- a/include/linux/lsm_hook_defs.h ++++ b/include/linux/lsm_hook_defs.h +@@ -407,6 +407,8 @@ LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux) + #endif /* CONFIG_BPF_SYSCALL */ + + LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) ++LSM_HOOK(int, 0, lock_kernel_down, const char *where, enum lockdown_reason level) ++ + + #ifdef CONFIG_PERF_EVENTS + LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type) +diff --git a/include/linux/module.h b/include/linux/module.h +index a98e188cf37b..2eef4246c2c9 100644 +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -418,6 +418,7 @@ struct module { + struct module_attribute *modinfo_attrs; + const char *version; + const char *srcversion; ++ const char *rhelversion; + struct kobject *holders_dir; + + /* Exported symbols */ +diff --git a/include/linux/rh_kabi.h b/include/linux/rh_kabi.h +new file mode 100644 +index 000000000000..e0d3353802bb +--- /dev/null ++++ b/include/linux/rh_kabi.h +@@ -0,0 +1,172 @@ ++/* ++ * rh_kabi.h - Red Hat kABI abstraction header ++ * ++ * Copyright (c) 2014 Don Zickus ++ * Copyright (c) 2015-2017 Jiri Benc ++ * Copyright (c) 2015 Sabrina Dubroca, Hannes Frederic Sowa ++ * Copyright (c) 2016-2018 Prarit Bhargava ++ * Copyright (c) 2017 Paolo Abeni, Larry Woodman ++ * ++ * This file is released under the GPLv2. ++ * See the file COPYING for more details. ++ * ++ * These kabi macros hide the changes from the kabi checker and from the ++ * process that computes the exported symbols' checksums. ++ * They have 2 variants: one (defined under __GENKSYMS__) used when ++ * generating the checksums, and the other used when building the kernel's ++ * binaries. ++ * ++ * The use of these macros does not guarantee that the usage and modification ++ * of code is correct. As with all Red Hat only changes, an engineer must ++ * explain why the use of the macro is valid in the patch containing the ++ * changes. ++ * ++ */ ++ ++#ifndef _LINUX_RH_KABI_H ++#define _LINUX_RH_KABI_H ++ ++#include ++#include ++ ++/* ++ * RH_KABI_CONST ++ * Adds a new const modifier to a function parameter preserving the old ++ * checksum. ++ * ++ * RH_KABI_DEPRECATE ++ * Mark the element as deprecated and make it unusable by modules while ++ * preserving kABI checksums. ++ * ++ * RH_KABI_DEPRECATE_FN ++ * Mark the function pointer as deprecated and make it unusable by modules ++ * while preserving kABI checksums. ++ * ++ * RH_KABI_EXTEND ++ * Simple macro for adding a new element to a struct. ++ * ++ * Warning: only use if a hole exists for _all_ arches. Use pahole to verify. ++ * ++ * RH_KABI_FILL_HOLE ++ * Simple macro for filling a hole in a struct. ++ * ++ * RH_KABI_RENAME ++ * Simple macro for renaming an element without changing its type. This ++ * macro can be used in bitfields, for example. ++ * ++ * NOTE: does not include the final ';' ++ * ++ * RH_KABI_REPLACE ++ * Simple replacement of _orig with a union of _orig and _new. ++ * ++ * The RH_KABI_REPLACE* macros attempt to add the ability to use the '_new' ++ * element while preserving size alignment with the '_orig' element. ++ * ++ * The #ifdef __GENKSYMS__ preserves the kABI agreement, while the anonymous ++ * union structure preserves the size alignment (assuming the '_new' element ++ * is not bigger than the '_orig' element). ++ * ++ * RH_KABI_REPLACE_UNSAFE ++ * Unsafe version of RH_KABI_REPLACE. Only use for typedefs. ++ * ++ * RH_KABI_FORCE_CHANGE ++ * Force change of the symbol checksum. The argument of the macro is a ++ * version for cases we need to do this more than once. ++ * ++ * This macro does the opposite: it changes the symbol checksum without ++ * actually changing anything about the exported symbol. It is useful for ++ * symbols that are not whitelisted, we're changing them in an ++ * incompatible way and want to prevent 3rd party modules to silently ++ * corrupt memory. Instead, by changing the symbol checksum, such modules ++ * won't be loaded by the kernel. This macro should only be used as a ++ * last resort when all other KABI workarounds have failed. ++ * ++ * NOTE ++ * Don't use ';' after these macros as it messes up the kABI checker by ++ * changing what the resulting token string looks like. Instead let this ++ * macro add the ';' so it can be properly hidden from the kABI checker ++ * (mainly for RH_KABI_EXTEND, but applied to all macros for uniformity). ++ * ++ */ ++#ifdef __GENKSYMS__ ++ ++# define RH_KABI_CONST ++# define RH_KABI_EXTEND(_new) ++# define RH_KABI_FILL_HOLE(_new) ++# define RH_KABI_FORCE_CHANGE(ver) __attribute__((rh_kabi_change ## ver)) ++# define RH_KABI_RENAME(_orig, _new) _orig ++ ++# define _RH_KABI_DEPRECATE(_type, _orig) _type _orig ++# define _RH_KABI_DEPRECATE_FN(_type, _orig, _args...) _type (*_orig)(_args) ++# define _RH_KABI_REPLACE(_orig, _new) _orig ++# define _RH_KABI_REPLACE_UNSAFE(_orig, _new) _orig ++ ++#else ++ ++# define RH_KABI_ALIGN_WARNING ". Disable CONFIG_RH_KABI_SIZE_ALIGN_CHECKS if debugging." ++ ++# define RH_KABI_CONST const ++# define RH_KABI_EXTEND(_new) _new; ++# define RH_KABI_FILL_HOLE(_new) _new; ++# define RH_KABI_FORCE_CHANGE(ver) ++# define RH_KABI_RENAME(_orig, _new) _new ++ ++ ++#if IS_BUILTIN(CONFIG_RH_KABI_SIZE_ALIGN_CHECKS) ++# define __RH_KABI_CHECK_SIZE_ALIGN(_orig, _new) \ ++ union { \ ++ _Static_assert(sizeof(struct{_new;}) <= sizeof(struct{_orig;}), \ ++ __FILE__ ":" __stringify(__LINE__) ": " __stringify(_new) " is larger than " __stringify(_orig) RH_KABI_ALIGN_WARNING); \ ++ _Static_assert(__alignof__(struct{_new;}) <= __alignof__(struct{_orig;}), \ ++ __FILE__ ":" __stringify(__LINE__) ": " __stringify(_orig) " is not aligned the same as " __stringify(_new) RH_KABI_ALIGN_WARNING); \ ++ } ++#else ++# define __RH_KABI_CHECK_SIZE_ALIGN(_orig, _new) ++#endif ++ ++# define _RH_KABI_DEPRECATE(_type, _orig) _type rh_reserved_##_orig ++# define _RH_KABI_DEPRECATE_FN(_type, _orig, _args...) \ ++ _type (* rh_reserved_##_orig)(_args) ++# define _RH_KABI_REPLACE(_orig, _new) \ ++ union { \ ++ _new; \ ++ struct { \ ++ _orig; \ ++ } __UNIQUE_ID(rh_kabi_hide); \ ++ __RH_KABI_CHECK_SIZE_ALIGN(_orig, _new); \ ++ } ++# define _RH_KABI_REPLACE_UNSAFE(_orig, _new) _new ++ ++#endif /* __GENKSYMS__ */ ++ ++/* semicolon added wrappers for the RH_KABI_REPLACE macros */ ++# define RH_KABI_DEPRECATE(_type, _orig) _RH_KABI_DEPRECATE(_type, _orig); ++# define RH_KABI_DEPRECATE_FN(_type, _orig, _args...) \ ++ _RH_KABI_DEPRECATE_FN(_type, _orig, _args); ++# define RH_KABI_REPLACE(_orig, _new) _RH_KABI_REPLACE(_orig, _new); ++# define RH_KABI_REPLACE_UNSAFE(_orig, _new) _RH_KABI_REPLACE_UNSAFE(_orig, _new); ++/* ++ * Macro for breaking up a random element into two smaller chunks using an ++ * anonymous struct inside an anonymous union. ++ */ ++# define RH_KABI_REPLACE2(orig, _new1, _new2) RH_KABI_REPLACE(orig, struct{ _new1; _new2;}) ++ ++# define RH_KABI_RESERVE(n) _RH_KABI_RESERVE(n); ++/* ++ * Simple wrappers to replace standard Red Hat reserved elements. ++ */ ++# define RH_KABI_USE(n, _new) RH_KABI_REPLACE(_RH_KABI_RESERVE(n), _new) ++/* ++ * Macros for breaking up a reserved element into two smaller chunks using ++ * an anonymous struct inside an anonymous union. ++ */ ++# define RH_KABI_USE2(n, _new1, _new2) RH_KABI_REPLACE(_RH_KABI_RESERVE(n), struct{ _new1; _new2; }) ++ ++/* ++ * We tried to standardize on Red Hat reserved names. These wrappers ++ * leverage those common names making it easier to read and find in the ++ * code. ++ */ ++# define _RH_KABI_RESERVE(n) unsigned long rh_reserved##n ++ ++#endif /* _LINUX_RH_KABI_H */ +diff --git a/include/linux/rmi.h b/include/linux/rmi.h +index ab7eea01ab42..fff7c5f737fc 100644 +--- a/include/linux/rmi.h ++++ b/include/linux/rmi.h +@@ -364,6 +364,7 @@ struct rmi_driver_data { + + struct rmi4_attn_data attn_data; + DECLARE_KFIFO(attn_fifo, struct rmi4_attn_data, 16); ++ struct work_struct attn_work; + }; + + int rmi_register_transport_device(struct rmi_transport_dev *xport); +diff --git a/include/linux/security.h b/include/linux/security.h +index 9d3138c6364c..7089843865bf 100644 +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -486,6 +486,7 @@ int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); + int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); + int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); + int security_locked_down(enum lockdown_reason what); ++int security_lock_kernel_down(const char *where, enum lockdown_reason level); + #else /* CONFIG_SECURITY */ + + static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) +@@ -1404,6 +1405,10 @@ static inline int security_locked_down(enum lockdown_reason what) + { + return 0; + } ++static inline int security_lock_kernel_down(const char *where, enum lockdown_reason level) ++{ ++ return 0; ++} + #endif /* CONFIG_SECURITY */ + + #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) +diff --git a/kernel/module/main.c b/kernel/module/main.c +index 98fedfdb8db5..a21921f880e1 100644 +--- a/kernel/module/main.c ++++ b/kernel/module/main.c +@@ -528,6 +528,7 @@ static struct module_attribute modinfo_##field = { \ + + MODINFO_ATTR(version); + MODINFO_ATTR(srcversion); ++MODINFO_ATTR(rhelversion); + + static struct { + char name[MODULE_NAME_LEN + 1]; +@@ -980,6 +981,7 @@ struct module_attribute *modinfo_attrs[] = { + &module_uevent, + &modinfo_version, + &modinfo_srcversion, ++ &modinfo_rhelversion, + &modinfo_initstate, + &modinfo_coresize, + #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC +diff --git a/kernel/module/signing.c b/kernel/module/signing.c +index a2ff4242e623..f0d2be1ee4f1 100644 +--- a/kernel/module/signing.c ++++ b/kernel/module/signing.c +@@ -61,10 +61,17 @@ int mod_verify_sig(const void *mod, struct load_info *info) + modlen -= sig_len + sizeof(ms); + info->len = modlen; + +- return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, ++ ret = verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, + VERIFY_USE_SECONDARY_KEYRING, + VERIFYING_MODULE_SIGNATURE, + NULL, NULL); ++ if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) { ++ ret = verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, ++ VERIFY_USE_PLATFORM_KEYRING, ++ VERIFYING_MODULE_SIGNATURE, ++ NULL, NULL); ++ } ++ return ret; + } + + int module_sig_check(struct load_info *info, int flags) +diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c +index cb6406f485a9..71e1f15d9dce 100644 +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -22,6 +22,7 @@ + #include + #include "modpost.h" + #include "../../include/linux/license.h" ++#include "../../include/generated/uapi/linux/version.h" + + static bool module_enabled; + /* Are we using CONFIG_MODVERSIONS? */ +@@ -1987,6 +1988,12 @@ static void write_buf(struct buffer *b, const char *fname) + } + } + ++static void add_rhelversion(struct buffer *b, struct module *mod) ++{ ++ buf_printf(b, "MODULE_INFO(rhelversion, \"%d.%d\");\n", RHEL_MAJOR, ++ RHEL_MINOR); ++} ++ + static void write_if_changed(struct buffer *b, const char *fname) + { + char *tmp; +@@ -2047,6 +2054,7 @@ static void write_mod_c_file(struct module *mod) + add_depends(&buf, mod); + add_moddevtable(&buf, mod); + add_srcversion(&buf, mod); ++ add_rhelversion(&buf, mod); + + ret = snprintf(fname, sizeof(fname), "%s.mod.c", mod->name); + if (ret >= sizeof(fname)) { +diff --git a/scripts/tags.sh b/scripts/tags.sh +index a70d43723146..56d06b04f752 100755 +--- a/scripts/tags.sh ++++ b/scripts/tags.sh +@@ -16,6 +16,8 @@ fi + ignore="$(echo "$RCS_FIND_IGNORE" | sed 's|\\||g' )" + # tags and cscope files should also ignore MODVERSION *.mod.c files + ignore="$ignore ( -name *.mod.c ) -prune -o" ++# RHEL tags and cscope should also ignore redhat/rpm ++ignore="$ignore ( -path redhat/rpm ) -prune -o" + + # ignore arbitrary directories + if [ -n "${IGNORE_DIRS}" ]; then +diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c +index d1fdd113450a..182e8090cfe8 100644 +--- a/security/integrity/platform_certs/load_uefi.c ++++ b/security/integrity/platform_certs/load_uefi.c +@@ -74,7 +74,8 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, + return NULL; + + if (*status != EFI_BUFFER_TOO_SMALL) { +- pr_err("Couldn't get size: 0x%lx\n", *status); ++ pr_err("Couldn't get size: %s (0x%lx)\n", ++ efi_status_to_str(*status), *status); + return NULL; + } + +@@ -85,7 +86,8 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, + *status = efi.get_variable(name, guid, NULL, &lsize, db); + if (*status != EFI_SUCCESS) { + kfree(db); +- pr_err("Error reading db var: 0x%lx\n", *status); ++ pr_err("Error reading db var: %s (0x%lx)\n", ++ efi_status_to_str(*status), *status); + return NULL; + } + +diff --git a/security/lockdown/Kconfig b/security/lockdown/Kconfig +index e84ddf484010..d0501353a4b9 100644 +--- a/security/lockdown/Kconfig ++++ b/security/lockdown/Kconfig +@@ -16,6 +16,19 @@ config SECURITY_LOCKDOWN_LSM_EARLY + subsystem is fully initialised. If enabled, lockdown will + unconditionally be called before any other LSMs. + ++config LOCK_DOWN_IN_EFI_SECURE_BOOT ++ bool "Lock down the kernel in EFI Secure Boot mode" ++ default n ++ depends on EFI && SECURITY_LOCKDOWN_LSM_EARLY ++ help ++ UEFI Secure Boot provides a mechanism for ensuring that the firmware ++ will only load signed bootloaders and kernels. Secure boot mode may ++ be determined from EFI variables provided by the system firmware if ++ not indicated by the boot parameters. ++ ++ Enabling this option results in kernel lockdown being triggered if ++ EFI Secure Boot is set. ++ + choice + prompt "Kernel default lockdown mode" + default LOCK_DOWN_KERNEL_FORCE_NONE +diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c +index 68d19632aeb7..ef348935b6ff 100644 +--- a/security/lockdown/lockdown.c ++++ b/security/lockdown/lockdown.c +@@ -73,6 +73,7 @@ static int lockdown_is_locked_down(enum lockdown_reason what) + + static struct security_hook_list lockdown_hooks[] __ro_after_init = { + LSM_HOOK_INIT(locked_down, lockdown_is_locked_down), ++ LSM_HOOK_INIT(lock_kernel_down, lock_kernel_down), + }; + + static int __init lockdown_lsm_init(void) +diff --git a/security/security.c b/security/security.c +index 266cec94369b..c572a4da96b2 100644 +--- a/security/security.c ++++ b/security/security.c +@@ -5248,6 +5248,18 @@ int security_locked_down(enum lockdown_reason what) + } + EXPORT_SYMBOL(security_locked_down); + ++/** ++ * security_lock_kernel_down() - Put the kernel into lock-down mode. ++ * ++ * @where: Where the lock-down is originating from (e.g. command line option) ++ * @level: The lock-down level (can only increase) ++ */ ++int security_lock_kernel_down(const char *where, enum lockdown_reason level) ++{ ++ return call_int_hook(lock_kernel_down, 0, where, level); ++} ++EXPORT_SYMBOL(security_lock_kernel_down); ++ + #ifdef CONFIG_PERF_EVENTS + /** + * security_perf_event_open() - Check if a perf event open is allowed +diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile +index b53753dee02f..90701fc65aa2 100644 +--- a/tools/power/cpupower/Makefile ++++ b/tools/power/cpupower/Makefile +@@ -53,7 +53,7 @@ DESTDIR ?= + + VERSION:= $(shell ./utils/version-gen.sh) + LIB_MAJ= 0.0.1 +-LIB_MIN= 1 ++LIB_MIN= 0 + + PACKAGE = cpupower + PACKAGE_BUGREPORT = linux-pm@vger.kernel.org diff --git a/SOURCES/rog-ally-audio-fix.patch b/SOURCES/rog-ally-audio-fix.patch index 79ae8e8..07fb3bf 100644 --- a/SOURCES/rog-ally-audio-fix.patch +++ b/SOURCES/rog-ally-audio-fix.patch @@ -54,7 +54,7 @@ index 2b8f8fd52..f4933be4c 100644 struct cs35l41_prop_model { const char *hid; const char *ssid; -@@ -360,7 +360,7 @@ +@@ -413,7 +413,7 @@ { "CSC3551", "10431663", generic_dsd_config }, { "CSC3551", "104316D3", generic_dsd_config }, { "CSC3551", "104316F3", generic_dsd_config }, diff --git a/SOURCES/rog-ally-gyro-fix.patch b/SOURCES/rog-ally-gyro-fix.patch new file mode 100644 index 0000000..fc02fe0 --- /dev/null +++ b/SOURCES/rog-ally-gyro-fix.patch @@ -0,0 +1,2974 @@ +Add devicetree description document for Bosch BMI323, a 6-Axis IMU. + +Signed-off-by: Jagath Jog J +Reviewed-by: Krzysztof Kozlowski +--- + .../bindings/iio/imu/bosch,bmi323.yaml | 77 +++++++++++++++++++ + 1 file changed, 77 insertions(+) + create mode 100644 Documentation/devicetree/bindings/iio/imu/bosch,bmi323.yaml + +diff --git a/Documentation/devicetree/bindings/iio/imu/bosch,bmi323.yaml b/Documentation/devicetree/bindings/iio/imu/bosch,bmi323.yaml +new file mode 100644 +index 000000000000..64ef26e19669 +--- /dev/null ++++ b/Documentation/devicetree/bindings/iio/imu/bosch,bmi323.yaml +@@ -0,0 +1,77 @@ ++# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) ++%YAML 1.2 ++--- ++$id: http://devicetree.org/schemas/iio/imu/bosch,bmi323.yaml# ++$schema: http://devicetree.org/meta-schemas/core.yaml# ++ ++title: Bosch BMI323 6-Axis IMU ++ ++maintainers: ++ - Jagath Jog J ++ ++description: ++ BMI323 is a 6-axis inertial measurement unit that supports acceleration and ++ gyroscopic measurements with hardware fifo buffering. Sensor also provides ++ events information such as motion, steps, orientation, single and double ++ tap detection. ++ ++properties: ++ compatible: ++ const: bosch,bmi323 ++ ++ reg: ++ maxItems: 1 ++ ++ vdd-supply: true ++ vddio-supply: true ++ ++ interrupts: ++ minItems: 1 ++ maxItems: 2 ++ ++ interrupt-names: ++ minItems: 1 ++ maxItems: 2 ++ items: ++ enum: ++ - INT1 ++ - INT2 ++ ++ drive-open-drain: ++ description: ++ set if the specified interrupt pin should be configured as ++ open drain. If not set, defaults to push-pull. ++ ++ mount-matrix: ++ description: ++ an optional 3x3 mounting rotation matrix. ++ ++required: ++ - compatible ++ - reg ++ - vdd-supply ++ - vddio-supply ++ ++allOf: ++ - $ref: /schemas/spi/spi-peripheral-props.yaml# ++ ++unevaluatedProperties: false ++ ++examples: ++ - | ++ // Example for I2C ++ #include ++ i2c { ++ #address-cells = <1>; ++ #size-cells = <0>; ++ ++ imu@68 { ++ compatible = "bosch,bmi323"; ++ reg = <0x68>; ++ vddio-supply = <&vddio>; ++ vdd-supply = <&vdd>; ++ interrupt-parent = <&gpio1>; ++ interrupts = <29 IRQ_TYPE_EDGE_RISING>; ++ interrupt-names = "INT1"; ++ }; ++ }; +From: Jagath Jog J +To: jic23@kernel.org, andriy.shevchenko@linux.intel.com, + lars@metafoo.de, robh+dt@kernel.org, + krzysztof.kozlowski+dt@linaro.org +Cc: linux-iio@vger.kernel.org, devicetree@vger.kernel.org, + linux-kernel@vger.kernel.org +Subject: [RFC 2/2] iio: imu: Add driver for BMI323 IMU +Date: Mon, 18 Sep 2023 13:33:14 +0530 [thread overview] +Message-ID: <20230918080314.11959-3-jagathjog1996@gmail.com> (raw) +In-Reply-To: <20230918080314.11959-1-jagathjog1996@gmail.com> + +The Bosch BMI323 is a 6-axis low-power IMU that provide measurements for +acceleration, angular rate, and temperature. This sensor includes +motion-triggered interrupt features, such as a step counter, tap detection, +and activity/inactivity interrupt capabilities. + +The driver supports various functionalities, including data ready, FIFO +data handling, and events such as tap detection, step counting, and +activity interrupts. + +Signed-off-by: Jagath Jog J +--- + MAINTAINERS | 7 + + drivers/iio/imu/Kconfig | 1 + + drivers/iio/imu/Makefile | 1 + + drivers/iio/imu/bmi323/Kconfig | 33 + + drivers/iio/imu/bmi323/Makefile | 7 + + drivers/iio/imu/bmi323/bmi323.h | 209 +++ + drivers/iio/imu/bmi323/bmi323_core.c | 2139 +++++++++++++++++++++++ + drivers/iio/imu/bmi323/bmi323_i2c.c | 121 ++ + drivers/iio/imu/bmi323/bmi323_spi.c | 92 + + 10 files changed, 2628 insertions(+) + create mode 100644 drivers/iio/imu/bmi323/Kconfig + create mode 100644 drivers/iio/imu/bmi323/Makefile + create mode 100644 drivers/iio/imu/bmi323/bmi323.h + create mode 100644 drivers/iio/imu/bmi323/bmi323_core.c + create mode 100644 drivers/iio/imu/bmi323/bmi323_i2c.c + create mode 100644 drivers/iio/imu/bmi323/bmi323_spi.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index 4e07c032d06a..47ca415212a7 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -3636,6 +3636,13 @@ + F: Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml + F: drivers/iio/accel/bma400* + ++BOSCH SENSORTEC BMI323 IMU IIO DRIVER ++M: Jagath Jog J ++L: linux-iio@vger.kernel.org ++S: Maintained ++F: Documentation/devicetree/bindings/iio/imu/bosch,bma400.yaml ++F: drivers/iio/imu/bmi323/ ++ + BPF JIT for ARM + M: Russell King + M: Puranjay Mohan +diff --git a/drivers/iio/imu/Kconfig b/drivers/iio/imu/Kconfig +index c2f97629e9cd..6c9a85294bc1 100644 +--- a/drivers/iio/imu/Kconfig ++++ b/drivers/iio/imu/Kconfig +@@ -54,6 +54,7 @@ config ADIS16480 + + source "drivers/iio/imu/bmi160/Kconfig" + source "drivers/iio/imu/bno055/Kconfig" ++source "drivers/iio/imu/bmi323/Kconfig" + + config FXOS8700 + tristate +diff --git a/drivers/iio/imu/Makefile b/drivers/iio/imu/Makefile +index 6eb612034722..627406476357 100644 +--- a/drivers/iio/imu/Makefile ++++ b/drivers/iio/imu/Makefile +@@ -16,6 +16,7 @@ obj-$(CONFIG_IIO_ADIS_LIB) += adis_lib.o + + obj-y += bmi160/ + obj-y += bno055/ ++obj-y += bmi323/ + + obj-$(CONFIG_FXOS8700) += fxos8700_core.o + obj-$(CONFIG_FXOS8700_I2C) += fxos8700_i2c.o +diff --git a/drivers/iio/imu/bmi323/Kconfig b/drivers/iio/imu/bmi323/Kconfig +new file mode 100644 +index 000000000000..ab37b285393c +--- /dev/null ++++ b/drivers/iio/imu/bmi323/Kconfig +@@ -0,0 +1,33 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# ++# BMI323 IMU driver ++# ++ ++config BMI323 ++ tristate ++ select IIO_BUFFER ++ select IIO_TRIGGERED_BUFFER ++ ++config BMI323_I2C ++ tristate "Bosch BMI323 I2C driver" ++ depends on I2C ++ select BMI323 ++ select REGMAP_I2C ++ help ++ Enable support for the Bosch BMI323 6-Axis IMU connected to I2C ++ interface. ++ ++ This driver can also be built as a module. If so, the module will be ++ called bmi323_i2c. ++ ++config BMI323_SPI ++ tristate "Bosch BMI323 SPI driver" ++ depends on SPI ++ select BMI323 ++ select REGMAP_SPI ++ help ++ Enable support for the Bosch BMI323 6-Axis IMU connected to SPI ++ interface. ++ ++ This driver can also be built as a module. If so, the module will be ++ called bmi323_spi. +diff --git a/drivers/iio/imu/bmi323/Makefile b/drivers/iio/imu/bmi323/Makefile +new file mode 100644 +index 000000000000..a6a6dc0207c9 +--- /dev/null ++++ b/drivers/iio/imu/bmi323/Makefile +@@ -0,0 +1,7 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# ++# Makefile for Bosch BMI323 IMU ++# ++obj-$(CONFIG_BMI323) += bmi323_core.o ++obj-$(CONFIG_BMI323_I2C) += bmi323_i2c.o ++obj-$(CONFIG_BMI323_SPI) += bmi323_spi.o +diff --git a/drivers/iio/imu/bmi323/bmi323.h b/drivers/iio/imu/bmi323/bmi323.h +new file mode 100644 +index 000000000000..dff126d41658 +--- /dev/null ++++ b/drivers/iio/imu/bmi323/bmi323.h +@@ -0,0 +1,209 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * IIO driver for Bosch BMI323 6-Axis IMU ++ * ++ * Copyright (C) 2023, Jagath Jog J ++ */ ++ ++#ifndef _BMI323_H_ ++#define _BMI323_H_ ++ ++#include ++#include ++#include ++ ++#define BMI323_I2C_DUMMY 2 ++#define BMI323_SPI_DUMMY 1 ++ ++/* Register map */ ++ ++#define BMI323_CHIP_ID_REG 0x00 ++#define BMI323_CHIP_ID_VAL 0x0043 ++#define BMI323_CHIP_ID_MSK GENMASK(7, 0) ++#define BMI323_ERR_REG 0x01 ++#define BMI323_STATUS_REG 0x02 ++#define BMI323_STATUS_POR_MSK BIT(0) ++ ++/* Accelero/Gyro/Temp data registers */ ++#define BMI323_ACCEL_X_REG 0x03 ++#define BMI323_GYRO_X_REG 0x06 ++#define BMI323_TEMP_REG 0x09 ++#define BMI323_ALL_CHAN_MSK GENMASK(5, 0) ++ ++/* Status registers */ ++#define BMI323_STATUS_INT1_REG 0x0D ++#define BMI323_STATUS_INT2_REG 0x0E ++#define BMI323_STATUS_NOMOTION_MSK BIT(0) ++#define BMI323_STATUS_MOTION_MSK BIT(1) ++#define BMI323_STATUS_STP_WTR_MSK BIT(5) ++#define BMI323_STATUS_TAP_MSK BIT(8) ++#define BMI323_STATUS_ERROR_MSK BIT(10) ++#define BMI323_STATUS_TMP_DRDY_MSK BIT(11) ++#define BMI323_STATUS_GYR_DRDY_MSK BIT(12) ++#define BMI323_STATUS_ACC_DRDY_MSK BIT(13) ++#define BMI323_STATUS_ACC_GYR_DRDY_MSK GENMASK(13, 12) ++#define BMI323_STATUS_FIFO_WTRMRK_MSK BIT(14) ++#define BMI323_STATUS_FIFO_FULL_MSK BIT(15) ++ ++/* Feature registers */ ++#define BMI323_FEAT_IO0_REG 0x10 ++#define BMI323_FEAT_IO0_XYZ_NOMOTION_MSK GENMASK(2, 0) ++#define BMI323_FEAT_IO0_XYZ_MOTION_MSK GENMASK(5, 3) ++#define BMI323_FEAT_XYZ_MSK GENMASK(2, 0) ++#define BMI323_FEAT_IO0_STP_CNT_MSK BIT(9) ++#define BMI323_FEAT_IO0_S_TAP_MSK BIT(12) ++#define BMI323_FEAT_IO0_D_TAP_MSK BIT(13) ++#define BMI323_FEAT_IO1_REG 0x11 ++#define BMI323_FEAT_IO1_ERR_MSK GENMASK(3, 0) ++#define BMI323_FEAT_IO2_REG 0x12 ++#define BMI323_FEAT_IO_STATUS_REG 0x14 ++#define BMI323_FEAT_IO_STATUS_MSK BIT(0) ++#define BMI323_FEAT_ENG_POLL 2000 ++#define BMI323_FEAT_ENG_TIMEOUT 10000 ++ ++/* FIFO registers */ ++#define BMI323_FIFO_FILL_LEVEL_REG 0x15 ++#define BMI323_FIFO_DATA_REG 0x16 ++ ++/* Accelero/Gyro config registers */ ++#define BMI323_ACC_CONF_REG 0x20 ++#define BMI323_GYRO_CONF_REG 0x21 ++#define BMI323_ACC_GYRO_CONF_MODE_MSK GENMASK(14, 12) ++#define BMI323_ACC_GYRO_CONF_ODR_MSK GENMASK(3, 0) ++#define BMI323_ACC_GYRO_CONF_SCL_MSK GENMASK(6, 4) ++#define BMI323_ACC_GYRO_CONF_BW_MSK BIT(7) ++#define BMI323_ACC_GYRO_CONF_AVG_MSK GENMASK(10, 8) ++ ++/* FIFO registers */ ++#define BMI323_FIFO_WTRMRK_REG 0x35 ++#define BMI323_FIFO_CONF_REG 0x36 ++#define BMI323_FIFO_CONF_STP_FUL_MSK BIT(0) ++#define BMI323_FIFO_CONF_ACC_GYR_EN_MSK GENMASK(10, 9) ++#define BMI323_FIFO_ACC_GYR_MSK GENMASK(1, 0) ++#define BMI323_FIFO_CTRL_REG 0x37 ++#define BMI323_FIFO_FLUSH_MSK BIT(0) ++ ++/* Interrupt pin config registers */ ++#define BMI323_IO_INT_CTR_REG 0x38 ++#define BMI323_IO_INT1_LVL_MSK BIT(0) ++#define BMI323_IO_INT1_OD_MSK BIT(1) ++#define BMI323_IO_INT1_OP_EN_MSK BIT(2) ++#define BMI323_IO_INT1_LVL_OD_OP_MSK GENMASK(2, 0) ++#define BMI323_IO_INT2_LVL_MSK BIT(8) ++#define BMI323_IO_INT2_OD_MSK BIT(9) ++#define BMI323_IO_INT2_OP_EN_MSK BIT(10) ++#define BMI323_IO_INT2_LVL_OD_OP_MSK GENMASK(10, 8) ++#define BMI323_IO_INT_CONF_REG 0x39 ++#define BMI323_IO_INT_LTCH_MSK BIT(0) ++#define BMI323_INT_MAP1_REG 0x3A ++#define BMI323_INT_MAP2_REG 0x3B ++#define BMI323_NOMOTION_MSK GENMASK(1, 0) ++#define BMI323_MOTION_MSK GENMASK(3, 2) ++#define BMI323_STEP_CNT_MSK GENMASK(11, 10) ++#define BMI323_TAP_MSK GENMASK(1, 0) ++#define BMI323_TMP_DRDY_MSK GENMASK(7, 6) ++#define BMI323_GYR_DRDY_MSK GENMASK(9, 8) ++#define BMI323_ACC_DRDY_MSK GENMASK(11, 10) ++#define BMI323_FIFO_WTRMRK_MSK GENMASK(13, 12) ++#define BMI323_FIFO_FULL_MSK GENMASK(15, 14) ++ ++/* Feature registers */ ++#define BMI323_FEAT_CTRL_REG 0x40 ++#define BMI323_FEAT_ENG_EN_MSK BIT(0) ++#define BMI323_FEAT_DATA_ADDR 0x41 ++#define BMI323_FEAT_DATA_TX 0x42 ++#define BMI323_FEAT_DATA_STATUS 0x43 ++#define BMI323_FEAT_DATA_TX_RDY_MSK BIT(1) ++#define BMI323_FEAT_EVNT_EXT_REG 0x47 ++#define BMI323_FEAT_EVNT_EXT_S_MSK BIT(3) ++#define BMI323_FEAT_EVNT_EXT_D_MSK BIT(4) ++ ++#define BMI323_CMD_REG 0x7E ++#define BMI323_RST_VAL 0xDEAF ++#define BMI323_CFG_RES_REG 0x7F ++ ++/* Extended registers */ ++#define BMI323_GEN_SET1_REG 0x02 ++#define BMI323_GEN_SET1_MODE_MSK BIT(0) ++#define BMI323_GEN_HOLD_DUR_MSK GENMASK(4, 1) ++ ++/* Any Motion/No Motion config registers */ ++#define BMI323_ANYMO1_REG 0x05 ++#define BMI323_NOMO1_REG 0x08 ++#define BMI323_MO2_OFFSET 0x01 ++#define BMI323_MO3_OFFSET 0x02 ++#define BMI323_MO1_REF_UP_MSK BIT(12) ++#define BMI323_MO1_SLOPE_TH_MSK GENMASK(11, 0) ++#define BMI323_MO2_HYSTR_MSK GENMASK(9, 0) ++#define BMI323_MO3_DURA_MSK GENMASK(12, 0) ++ ++/* Step counter config registers */ ++#define BMI323_STEP_SC1_REG 0x10 ++#define BMI323_STEP_SC1_WTRMRK_MSK GENMASK(9, 0) ++#define BMI323_STEP_SC1_RST_CNT_MSK BIT(10) ++#define BMI323_STEP_SC1_REG 0x10 ++#define BMI323_STEP_LEN 2 ++ ++/* Tap gesture config registers */ ++#define BMI323_TAP1_REG 0x1E ++#define BMI323_TAP1_AXIS_SEL_MSK GENMASK(1, 0) ++#define BMI323_AXIS_XYZ_MSK GENMASK(1, 0) ++#define BMI323_TAP1_TIMOUT_MSK BIT(2) ++#define BMI323_TAP1_MAX_PEAKS_MSK GENMASK(5, 3) ++#define BMI323_TAP1_MODE_MSK GENMASK(7, 6) ++#define BMI323_TAP2_REG 0x1F ++#define BMI323_TAP2_THRES_MSK GENMASK(9, 0) ++#define BMI323_TAP2_MAX_DUR_MSK GENMASK(15, 10) ++#define BMI323_TAP3_REG 0x20 ++#define BMI323_TAP3_QUIET_TIM_MSK GENMASK(15, 12) ++#define BMI323_TAP3_QT_BW_TAP_MSK GENMASK(11, 8) ++#define BMI323_TAP3_QT_AFT_GES_MSK GENMASK(15, 12) ++ ++#define BMI323_MOTION_THRES_SCALE 512 ++#define BMI323_MOTION_HYSTR_SCALE 512 ++#define BMI323_MOTION_DURAT_SCALE 50 ++#define BMI323_TAP_THRES_SCALE 512 ++#define BMI323_DUR_BW_TAP_SCALE 200 ++#define BMI323_QUITE_TIM_GES_SCALE 25 ++#define BMI323_MAX_GES_DUR_SCALE 25 ++ ++/* ++ * The formula to calculate temperature in C. ++ * See datasheet section 6.1.1, Register Map Overview ++ * ++ * T_C = (temp_raw / 512) + 23 ++ */ ++#define BMI323_TEMP_OFFSET 11776 ++#define BMI323_TEMP_SCALE 1953125 ++ ++/* ++ * The BMI323 features a FIFO with a capacity of 2048 bytes. Each frame ++ * consists of accelerometer (X, Y, Z) data and gyroscope (X, Y, Z) data, ++ * totaling 6 words or 12 bytes. The FIFO buffer can hold a total of ++ * 170 frames. ++ * ++ * If a watermark interrupt is configured for 170 frames, the interrupt will ++ * trigger when the FIFO reaches 169 frames, so limit the maximum watermark ++ * level to 169 frames. In terms of data, 169 frames would equal 1014 bytes, ++ * which is approximately 2 frames before the FIFO reaches its full capacity. ++ * See datasheet section 5.7.3 FIFO Buffer Interrupts ++ */ ++#define BMI323_BYTES_PER_SAMPLE 2 ++#define BMI323_FIFO_LENGTH_IN_BYTES 2048 ++#define BMI323_FIFO_FRAME_LENGTH 6 ++#define BMI323_FIFO_FULL_IN_FRAMES \ ++ ((BMI323_FIFO_LENGTH_IN_BYTES / \ ++ (BMI323_BYTES_PER_SAMPLE * BMI323_FIFO_FRAME_LENGTH)) - 1) ++#define BMI323_FIFO_FULL_IN_WORDS \ ++ (BMI323_FIFO_FULL_IN_FRAMES * BMI323_FIFO_FRAME_LENGTH) ++ ++#define BMI323_INT_MICRO_TO_RAW(val, val2, scale) ((val) * (scale) + \ ++ ((val2) * (scale)) / MEGA) ++ ++#define BMI323_RAW_TO_MICRO(raw, scale) ((((raw) % (scale)) * MEGA) / scale) ++ ++struct device; ++int bmi323_core_probe(struct device *dev); ++extern const struct regmap_config bmi323_regmap_config; ++ ++#endif +diff --git a/drivers/iio/imu/bmi323/bmi323_core.c b/drivers/iio/imu/bmi323/bmi323_core.c +new file mode 100644 +index 000000000000..0bd5dedd9a63 +--- /dev/null ++++ b/drivers/iio/imu/bmi323/bmi323_core.c +@@ -0,0 +1,2139 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * IIO core driver for Bosch BMI323 6-Axis IMU. ++ * ++ * Copyright (C) 2023, Jagath Jog J ++ * ++ * Datasheet: https://www.bosch-sensortec.com/media/boschsensortec/downloads/datasheets/bst-bmi323-ds000.pdf ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "bmi323.h" ++ ++enum bmi323_sensor_type { ++ BMI323_ACCEL, ++ BMI323_GYRO, ++ BMI323_SENSORS_CNT, ++}; ++ ++enum bmi323_opr_mode { ++ ACC_GYRO_MODE_DISABLE = 0x00, ++ GYRO_DRIVE_MODE_ENABLED = 0x01, ++ ACC_GYRO_MODE_DUTYCYCLE = 0x03, ++ ACC_GYRO_MODE_CONTINOUS = 0x04, ++ ACC_GYRO_MODE_HIGH_PERF = 0x07, ++}; ++ ++enum bmi323_state { ++ BMI323_IDLE, ++ BMI323_BUFFER_DRDY_TRIGGERED, ++ BMI323_BUFFER_FIFO, ++}; ++ ++enum bmi323_irq_pin { ++ BMI323_IRQ_DISABLED, ++ BMI323_IRQ_INT1, ++ BMI323_IRQ_INT2, ++}; ++ ++enum bmi323_3db_bw { ++ BMI323_BW_ODR_BY_2, ++ BMI323_BW_ODR_BY_4, ++}; ++ ++enum bmi323_scan { ++ BMI323_ACCEL_X, ++ BMI323_ACCEL_Y, ++ BMI323_ACCEL_Z, ++ BMI323_GYRO_X, ++ BMI323_GYRO_Y, ++ BMI323_GYRO_Z, ++ BMI323_CHAN_MAX ++}; ++ ++struct bmi323_hw { ++ u8 data; ++ u8 config; ++ const int (*scale_table)[2]; ++ int scale_table_len; ++}; ++ ++/* ++ * The accelerometer supports +-2G/4G/8G/16G ranges, and the resolution of ++ * each sample is 16 bits, signed. ++ * At +-8G the scale can calculated by ++ * ((8 + 8) * 9.80665 / (2^16 - 1)) * 10^6 = 2394.23819 scale in micro ++ * ++ */ ++static const int bmi323_accel_scale[][2] = { ++ { 0, 598 }, ++ { 0, 1197 }, ++ { 0, 2394 }, ++ { 0, 4788 }, ++}; ++ ++static const int bmi323_gyro_scale[][2] = { ++ { 0, 66 }, ++ { 0, 133 }, ++ { 0, 266 }, ++ { 0, 532 }, ++ { 0, 1065 }, ++}; ++ ++static const int bmi323_accel_gyro_avrg[] = {0, 2, 4, 8, 16, 32, 64}; ++ ++static const struct bmi323_hw bmi323_hw[2] = { ++ [BMI323_ACCEL] = { ++ .data = BMI323_ACCEL_X_REG, ++ .config = BMI323_ACC_CONF_REG, ++ .scale_table = bmi323_accel_scale, ++ .scale_table_len = ARRAY_SIZE(bmi323_accel_scale), ++ }, ++ [BMI323_GYRO] = { ++ .data = BMI323_GYRO_X_REG, ++ .config = BMI323_GYRO_CONF_REG, ++ .scale_table = bmi323_gyro_scale, ++ .scale_table_len = ARRAY_SIZE(bmi323_gyro_scale), ++ }, ++}; ++ ++struct bmi323_data { ++ struct device *dev; ++ struct regmap *regmap; ++ struct iio_mount_matrix orientation; ++ enum bmi323_irq_pin irq_pin; ++ struct iio_trigger *trig; ++ bool drdy_trigger_enabled; ++ enum bmi323_state state; ++ s64 fifo_tstamp, old_fifo_tstamp; ++ u32 odrns[BMI323_SENSORS_CNT]; ++ u32 odrhz[BMI323_SENSORS_CNT]; ++ unsigned int feature_events; ++ ++ /* ++ * Lock to protect the members of device's private data from concurrent ++ * access and also to serialize the access of extended registers. ++ * See bmi323_write_ext_reg(..) for more info. ++ */ ++ struct mutex mutex; ++ int watermark; ++ __le16 fifo_buff[BMI323_FIFO_FULL_IN_WORDS] __aligned(IIO_DMA_MINALIGN); ++ struct { ++ __le16 channels[BMI323_CHAN_MAX]; ++ s64 ts __aligned(8); ++ } buffer; ++ __le16 steps_count[BMI323_STEP_LEN]; ++}; ++ ++static const struct iio_mount_matrix * ++bmi323_get_mount_matrix(const struct iio_dev *idev, ++ const struct iio_chan_spec *chan) ++{ ++ struct bmi323_data *data = iio_priv(idev); ++ ++ return &data->orientation; ++} ++ ++static const struct iio_chan_spec_ext_info bmi323_ext_info[] = { ++ IIO_MOUNT_MATRIX(IIO_SHARED_BY_TYPE, bmi323_get_mount_matrix), ++ { } ++}; ++ ++static const struct iio_event_spec bmi323_step_wtrmrk_event = { ++ .type = IIO_EV_TYPE_CHANGE, ++ .dir = IIO_EV_DIR_NONE, ++ .mask_shared_by_type = BIT(IIO_EV_INFO_ENABLE) | ++ BIT(IIO_EV_INFO_VALUE), ++}; ++ ++static const struct iio_event_spec bmi323_accel_event[] = { ++ { ++ .type = IIO_EV_TYPE_MAG, ++ .dir = IIO_EV_DIR_FALLING, ++ .mask_shared_by_type = BIT(IIO_EV_INFO_VALUE) | ++ BIT(IIO_EV_INFO_PERIOD) | ++ BIT(IIO_EV_INFO_HYSTERESIS) | ++ BIT(IIO_EV_INFO_ENABLE), ++ }, ++ { ++ .type = IIO_EV_TYPE_MAG, ++ .dir = IIO_EV_DIR_RISING, ++ .mask_shared_by_type = BIT(IIO_EV_INFO_VALUE) | ++ BIT(IIO_EV_INFO_PERIOD) | ++ BIT(IIO_EV_INFO_HYSTERESIS) | ++ BIT(IIO_EV_INFO_ENABLE), ++ }, ++ { ++ .type = IIO_EV_TYPE_GESTURE, ++ .dir = IIO_EV_DIR_SINGLETAP, ++ .mask_shared_by_type = BIT(IIO_EV_INFO_ENABLE) | ++ BIT(IIO_EV_INFO_VALUE) | ++ BIT(IIO_EV_INFO_RESET_TIMEOUT), ++ }, ++ { ++ .type = IIO_EV_TYPE_GESTURE, ++ .dir = IIO_EV_DIR_DOUBLETAP, ++ .mask_shared_by_type = BIT(IIO_EV_INFO_ENABLE) | ++ BIT(IIO_EV_INFO_VALUE) | ++ BIT(IIO_EV_INFO_RESET_TIMEOUT) | ++ BIT(IIO_EV_INFO_TAP2_MIN_DELAY), ++ }, ++}; ++ ++#define BMI323_ACCEL_CHANNEL(_type, _axis, _index) { \ ++ .type = _type, \ ++ .modified = 1, \ ++ .channel2 = IIO_MOD_##_axis, \ ++ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ ++ .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SAMP_FREQ) | \ ++ BIT(IIO_CHAN_INFO_SCALE) | \ ++ BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ ++ .info_mask_shared_by_type_available = \ ++ BIT(IIO_CHAN_INFO_SAMP_FREQ) | \ ++ BIT(IIO_CHAN_INFO_SCALE) | \ ++ BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ ++ .scan_index = _index, \ ++ .scan_type = { \ ++ .sign = 's', \ ++ .realbits = 16, \ ++ .storagebits = 16, \ ++ .endianness = IIO_LE, \ ++ }, \ ++ .ext_info = bmi323_ext_info, \ ++ .event_spec = bmi323_accel_event, \ ++ .num_event_specs = ARRAY_SIZE(bmi323_accel_event), \ ++} ++ ++#define BMI323_GYRO_CHANNEL(_type, _axis, _index) { \ ++ .type = _type, \ ++ .modified = 1, \ ++ .channel2 = IIO_MOD_##_axis, \ ++ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ ++ .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SAMP_FREQ) | \ ++ BIT(IIO_CHAN_INFO_SCALE) | \ ++ BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ ++ .info_mask_shared_by_type_available = \ ++ BIT(IIO_CHAN_INFO_SAMP_FREQ) | \ ++ BIT(IIO_CHAN_INFO_SCALE) | \ ++ BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ ++ .scan_index = _index, \ ++ .scan_type = { \ ++ .sign = 's', \ ++ .realbits = 16, \ ++ .storagebits = 16, \ ++ .endianness = IIO_LE, \ ++ }, \ ++ .ext_info = bmi323_ext_info, \ ++} ++ ++static const struct iio_chan_spec bmi323_channels[] = { ++ BMI323_ACCEL_CHANNEL(IIO_ACCEL, X, BMI323_ACCEL_X), ++ BMI323_ACCEL_CHANNEL(IIO_ACCEL, Y, BMI323_ACCEL_Y), ++ BMI323_ACCEL_CHANNEL(IIO_ACCEL, Z, BMI323_ACCEL_Z), ++ BMI323_GYRO_CHANNEL(IIO_ANGL_VEL, X, BMI323_GYRO_X), ++ BMI323_GYRO_CHANNEL(IIO_ANGL_VEL, Y, BMI323_GYRO_Y), ++ BMI323_GYRO_CHANNEL(IIO_ANGL_VEL, Z, BMI323_GYRO_Z), ++ { ++ .type = IIO_TEMP, ++ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | ++ BIT(IIO_CHAN_INFO_OFFSET) | ++ BIT(IIO_CHAN_INFO_SCALE), ++ .scan_index = -1, ++ }, ++ { ++ .type = IIO_STEPS, ++ .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED) | ++ BIT(IIO_CHAN_INFO_ENABLE), ++ .scan_index = -1, ++ .event_spec = &bmi323_step_wtrmrk_event, ++ .num_event_specs = 1, ++ ++ }, ++ IIO_CHAN_SOFT_TIMESTAMP(BMI323_CHAN_MAX), ++}; ++ ++static const int bmi323_acc_gyro_odr[][2] = { ++ { 0, 781250 }, ++ { 1, 562500 }, ++ { 3, 125000 }, ++ { 6, 250000 }, ++ { 12, 500000 }, ++ { 25, 0 }, ++ { 50, 0 }, ++ { 100, 0 }, ++ { 200, 0 }, ++ { 400, 0 }, ++ { 800, 0 }, ++}; ++ ++static const int bmi323_acc_gyro_odrns[] = { ++ 1280 * MEGA, ++ 640 * MEGA, ++ 320 * MEGA, ++ 160 * MEGA, ++ 80 * MEGA, ++ 40 * MEGA, ++ 20 * MEGA, ++ 10 * MEGA, ++ 5 * MEGA, ++ 2500 * KILO, ++ 1250 * KILO, ++}; ++ ++static enum bmi323_sensor_type bmi323_iio_to_sensor(enum iio_chan_type iio_type) ++{ ++ switch (iio_type) { ++ case IIO_ACCEL: ++ return BMI323_ACCEL; ++ case IIO_ANGL_VEL: ++ return BMI323_GYRO; ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int bmi323_set_mode(struct bmi323_data *data, ++ enum bmi323_sensor_type sensor, ++ enum bmi323_opr_mode mode) ++{ ++ guard(mutex)(&data->mutex); ++ return regmap_update_bits(data->regmap, bmi323_hw[sensor].config, ++ BMI323_ACC_GYRO_CONF_MODE_MSK, ++ FIELD_PREP(BMI323_ACC_GYRO_CONF_MODE_MSK, ++ mode)); ++} ++ ++/* ++ * When writing data to extended register there must be no communication to ++ * any other register before write transaction is complete. ++ * See datasheet section 6.2 Extended Register Map Description. ++ */ ++static int bmi323_write_ext_reg(struct bmi323_data *data, unsigned int ext_addr, ++ unsigned int ext_data) ++{ ++ int ret, feature_status; ++ ++ ret = regmap_read(data->regmap, BMI323_FEAT_DATA_STATUS, ++ &feature_status); ++ if (ret) ++ return ret; ++ ++ if (!FIELD_GET(BMI323_FEAT_DATA_TX_RDY_MSK, feature_status)) ++ return -EBUSY; ++ ++ ret = regmap_write(data->regmap, BMI323_FEAT_DATA_ADDR, ext_addr); ++ if (ret) ++ return ret; ++ ++ return regmap_write(data->regmap, BMI323_FEAT_DATA_TX, ext_data); ++} ++ ++/* ++ * When reading data from extended register there must be no communication to ++ * any other register before read transaction is complete. ++ * See datasheet section 6.2 Extended Register Map Description. ++ */ ++static int bmi323_read_ext_reg(struct bmi323_data *data, unsigned int ext_addr, ++ unsigned int *ext_data) ++{ ++ int ret, feature_status; ++ ++ ret = regmap_read(data->regmap, BMI323_FEAT_DATA_STATUS, ++ &feature_status); ++ if (ret) ++ return ret; ++ ++ if (!FIELD_GET(BMI323_FEAT_DATA_TX_RDY_MSK, feature_status)) ++ return -EBUSY; ++ ++ ret = regmap_write(data->regmap, BMI323_FEAT_DATA_ADDR, ext_addr); ++ if (ret) ++ return ret; ++ ++ return regmap_read(data->regmap, BMI323_FEAT_DATA_TX, ext_data); ++} ++ ++static int bmi323_update_ext_reg(struct bmi323_data *data, ++ unsigned int ext_addr, ++ unsigned int mask, unsigned int ext_data) ++{ ++ unsigned int value; ++ int ret; ++ ++ ret = bmi323_read_ext_reg(data, ext_addr, &value); ++ if (ret) ++ return ret; ++ ++ set_mask_bits(&value, mask, ext_data); ++ ++ return bmi323_write_ext_reg(data, ext_addr, value); ++} ++ ++static int bmi323_get_error_status(struct bmi323_data *data) ++{ ++ int error, ret; ++ ++ guard(mutex)(&data->mutex); ++ ret = regmap_read(data->regmap, BMI323_ERR_REG, &error); ++ if (ret) ++ return ret; ++ ++ if (error) ++ dev_err(data->dev, "Sensor error 0x%x\n", error); ++ ++ return error; ++} ++ ++static int bmi323_feature_engine_events(struct bmi323_data *data, ++ const unsigned int event_mask, ++ bool state) ++{ ++ unsigned int value; ++ int ret; ++ ++ ret = regmap_read(data->regmap, BMI323_FEAT_IO0_REG, &value); ++ if (ret) ++ return ret; ++ ++ /* Register must be cleared before changing an active config */ ++ ret = regmap_write(data->regmap, BMI323_FEAT_IO0_REG, 0); ++ if (ret) ++ return ret; ++ ++ if (state) ++ value |= event_mask; ++ else ++ value &= ~event_mask; ++ ++ ret = regmap_write(data->regmap, BMI323_FEAT_IO0_REG, value); ++ if (ret) ++ return ret; ++ ++ return regmap_write(data->regmap, BMI323_FEAT_IO_STATUS_REG, ++ BMI323_FEAT_IO_STATUS_MSK); ++} ++ ++static int bmi323_step_wtrmrk_en(struct bmi323_data *data, int state) ++{ ++ enum bmi323_irq_pin step_irq; ++ int ret; ++ ++ guard(mutex)(&data->mutex); ++ if (!FIELD_GET(BMI323_FEAT_IO0_STP_CNT_MSK, data->feature_events)) ++ return -EINVAL; ++ ++ if (state) ++ step_irq = data->irq_pin; ++ else ++ step_irq = BMI323_IRQ_DISABLED; ++ ++ ret = bmi323_update_ext_reg(data, BMI323_STEP_SC1_REG, ++ BMI323_STEP_SC1_WTRMRK_MSK, ++ FIELD_PREP(BMI323_STEP_SC1_WTRMRK_MSK, ++ state ? 1 : 0)); ++ if (ret) ++ return ret; ++ ++ return regmap_update_bits(data->regmap, BMI323_INT_MAP1_REG, ++ BMI323_STEP_CNT_MSK, ++ FIELD_PREP(BMI323_STEP_CNT_MSK, step_irq)); ++} ++ ++static int bmi323_motion_config_reg(enum iio_event_direction dir) ++{ ++ switch (dir) { ++ case IIO_EV_DIR_RISING: ++ return BMI323_ANYMO1_REG; ++ case IIO_EV_DIR_FALLING: ++ return BMI323_NOMO1_REG; ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int bmi323_motion_event_en(struct bmi323_data *data, ++ enum iio_event_direction dir, int state) ++{ ++ unsigned int state_value = state ? BMI323_FEAT_XYZ_MSK : 0; ++ int config, ret, msk, raw, field_value; ++ enum bmi323_irq_pin motion_irq; ++ int irq_msk, irq_field_val; ++ ++ if (state) ++ motion_irq = data->irq_pin; ++ else ++ motion_irq = BMI323_IRQ_DISABLED; ++ ++ switch (dir) { ++ case IIO_EV_DIR_RISING: ++ msk = BMI323_FEAT_IO0_XYZ_MOTION_MSK; ++ raw = 512; ++ config = BMI323_ANYMO1_REG; ++ irq_msk = BMI323_MOTION_MSK; ++ irq_field_val = FIELD_PREP(BMI323_MOTION_MSK, motion_irq); ++ field_value = FIELD_PREP(BMI323_FEAT_IO0_XYZ_MOTION_MSK, ++ state_value); ++ break; ++ case IIO_EV_DIR_FALLING: ++ msk = BMI323_FEAT_IO0_XYZ_NOMOTION_MSK; ++ raw = 0; ++ config = BMI323_NOMO1_REG; ++ irq_msk = BMI323_NOMOTION_MSK; ++ irq_field_val = FIELD_PREP(BMI323_NOMOTION_MSK, motion_irq); ++ field_value = FIELD_PREP(BMI323_FEAT_IO0_XYZ_NOMOTION_MSK, ++ state_value); ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ guard(mutex)(&data->mutex); ++ ret = bmi323_feature_engine_events(data, msk, state); ++ if (ret) ++ return ret; ++ ++ ret = bmi323_update_ext_reg(data, config, ++ BMI323_MO1_REF_UP_MSK, ++ FIELD_PREP(BMI323_MO1_REF_UP_MSK, 0)); ++ if (ret) ++ return ret; ++ ++ /* Set initial value to avoid interrupts while enabling*/ ++ ret = bmi323_update_ext_reg(data, config, ++ BMI323_MO1_SLOPE_TH_MSK, ++ FIELD_PREP(BMI323_MO1_SLOPE_TH_MSK, raw)); ++ if (ret) ++ return ret; ++ ++ ret = regmap_update_bits(data->regmap, BMI323_INT_MAP1_REG, irq_msk, ++ irq_field_val); ++ if (ret) ++ return ret; ++ ++ set_mask_bits(&data->feature_events, msk, field_value); ++ ++ return 0; ++} ++ ++static int bmi323_tap_event_en(struct bmi323_data *data, ++ enum iio_event_direction dir, int state) ++{ ++ enum bmi323_irq_pin tap_irq; ++ int ret, tap_enabled; ++ ++ guard(mutex)(&data->mutex); ++ ++ if (data->odrhz[BMI323_ACCEL] < 200) { ++ dev_err(data->dev, "Invalid accelrometer parameter\n"); ++ return -EINVAL; ++ } ++ ++ switch (dir) { ++ case IIO_EV_DIR_SINGLETAP: ++ ret = bmi323_feature_engine_events(data, ++ BMI323_FEAT_IO0_S_TAP_MSK, ++ state); ++ if (ret) ++ return ret; ++ ++ set_mask_bits(&data->feature_events, BMI323_FEAT_IO0_S_TAP_MSK, ++ FIELD_PREP(BMI323_FEAT_IO0_S_TAP_MSK, state)); ++ break; ++ case IIO_EV_DIR_DOUBLETAP: ++ ret = bmi323_feature_engine_events(data, ++ BMI323_FEAT_IO0_D_TAP_MSK, ++ state); ++ if (ret) ++ return ret; ++ ++ set_mask_bits(&data->feature_events, BMI323_FEAT_IO0_D_TAP_MSK, ++ FIELD_PREP(BMI323_FEAT_IO0_D_TAP_MSK, state)); ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ tap_enabled = FIELD_GET(BMI323_FEAT_IO0_S_TAP_MSK | ++ BMI323_FEAT_IO0_D_TAP_MSK, ++ data->feature_events); ++ ++ if (tap_enabled) ++ tap_irq = data->irq_pin; ++ else ++ tap_irq = BMI323_IRQ_DISABLED; ++ ++ ret = regmap_update_bits(data->regmap, BMI323_INT_MAP2_REG, ++ BMI323_TAP_MSK, ++ FIELD_PREP(BMI323_TAP_MSK, tap_irq)); ++ if (ret) ++ return ret; ++ ++ if (!state) ++ return 0; ++ ++ ret = bmi323_update_ext_reg(data, BMI323_TAP1_REG, ++ BMI323_TAP1_MAX_PEAKS_MSK, ++ FIELD_PREP(BMI323_TAP1_MAX_PEAKS_MSK, ++ 0x04)); ++ if (ret) ++ return ret; ++ ++ ret = bmi323_update_ext_reg(data, BMI323_TAP1_REG, ++ BMI323_TAP1_AXIS_SEL_MSK, ++ FIELD_PREP(BMI323_TAP1_AXIS_SEL_MSK, ++ BMI323_AXIS_XYZ_MSK)); ++ if (ret) ++ return ret; ++ ++ return bmi323_update_ext_reg(data, BMI323_TAP1_REG, ++ BMI323_TAP1_TIMOUT_MSK, ++ FIELD_PREP(BMI323_TAP1_TIMOUT_MSK, ++ 0)); ++} ++ ++static ssize_t in_accel_gesture_tap_wait_dur_show(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct iio_dev *indio_dev = dev_to_iio_dev(dev); ++ struct bmi323_data *data = iio_priv(indio_dev); ++ unsigned int reg_value, raw; ++ int ret, val[2]; ++ ++ scoped_guard(mutex, &data->mutex) { ++ ret = bmi323_read_ext_reg(data, BMI323_TAP2_REG, ®_value); ++ if (ret) ++ return ret; ++ } ++ ++ raw = FIELD_GET(BMI323_TAP2_MAX_DUR_MSK, reg_value); ++ val[0] = raw / BMI323_MAX_GES_DUR_SCALE; ++ val[1] = BMI323_RAW_TO_MICRO(raw, BMI323_MAX_GES_DUR_SCALE); ++ ++ return iio_format_value(buf, IIO_VAL_INT_PLUS_MICRO, ARRAY_SIZE(val), ++ val); ++} ++ ++static ssize_t in_accel_gesture_tap_wait_dur_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t len) ++{ ++ struct iio_dev *indio_dev = dev_to_iio_dev(dev); ++ struct bmi323_data *data = iio_priv(indio_dev); ++ int ret, val_int, val_fract, raw; ++ ++ ret = iio_str_to_fixpoint(buf, 100000, &val_int, &val_fract); ++ if (ret) ++ return ret; ++ ++ raw = BMI323_INT_MICRO_TO_RAW(val_int, val_fract, ++ BMI323_MAX_GES_DUR_SCALE); ++ if (!in_range(raw, 0, 64)) ++ return -EINVAL; ++ ++ guard(mutex)(&data->mutex); ++ ret = bmi323_update_ext_reg(data, BMI323_TAP2_REG, ++ BMI323_TAP2_MAX_DUR_MSK, ++ FIELD_PREP(BMI323_TAP2_MAX_DUR_MSK, raw)); ++ if (ret) ++ return ret; ++ ++ return len; ++} ++ ++/* ++ * Maximum duration from first tap within the second tap is expected to happen. ++ * This timeout is applicable only if gesture_tap_wait_timeout is enabled. ++ */ ++static IIO_DEVICE_ATTR_RW(in_accel_gesture_tap_wait_dur, 0); ++ ++static ssize_t in_accel_gesture_tap_wait_timeout_show(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct iio_dev *indio_dev = dev_to_iio_dev(dev); ++ struct bmi323_data *data = iio_priv(indio_dev); ++ unsigned int reg_value, raw; ++ int ret; ++ ++ scoped_guard(mutex, &data->mutex) { ++ ret = bmi323_read_ext_reg(data, BMI323_TAP1_REG, ®_value); ++ if (ret) ++ return ret; ++ } ++ ++ raw = FIELD_GET(BMI323_TAP1_TIMOUT_MSK, reg_value); ++ ++ return iio_format_value(buf, IIO_VAL_INT, 1, &raw); ++} ++ ++static ssize_t in_accel_gesture_tap_wait_timeout_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, ++ size_t len) ++{ ++ struct iio_dev *indio_dev = dev_to_iio_dev(dev); ++ struct bmi323_data *data = iio_priv(indio_dev); ++ bool val; ++ int ret; ++ ++ ret = kstrtobool(buf, &val); ++ if (ret) ++ return ret; ++ ++ guard(mutex)(&data->mutex); ++ ret = bmi323_update_ext_reg(data, BMI323_TAP1_REG, ++ BMI323_TAP1_TIMOUT_MSK, ++ FIELD_PREP(BMI323_TAP1_TIMOUT_MSK, val)); ++ if (ret) ++ return ret; ++ ++ return len; ++} ++ ++/* Enable/disable gesture confirmation with wait time */ ++static IIO_DEVICE_ATTR_RW(in_accel_gesture_tap_wait_timeout, 0); ++ ++static IIO_CONST_ATTR(in_accel_gesture_tap_wait_dur_available, ++ "[0.0 0.04 2.52]"); ++ ++static IIO_CONST_ATTR(in_accel_gesture_doubletap_tap2_min_delay_available, ++ "[0.005 0.005 0.075]"); ++ ++static IIO_CONST_ATTR(in_accel_gesture_tap_reset_timeout_available, ++ "[0.04 0.04 0.6]"); ++ ++static IIO_CONST_ATTR(in_accel_gesture_tap_value_available, "[0.0 0.002 1.99]"); ++ ++static IIO_CONST_ATTR(in_accel_mag_value_available, "[0.0 0.002 7.99]"); ++ ++static IIO_CONST_ATTR(in_accel_mag_period_available, "[0.0 0.02 162.0]"); ++ ++static IIO_CONST_ATTR(in_accel_mag_hysteresis_available, "[0.0 0.002 1.99]"); ++ ++static struct attribute *bmi323_event_attributes[] = { ++ &iio_const_attr_in_accel_gesture_tap_value_available.dev_attr.attr, ++ &iio_const_attr_in_accel_gesture_tap_reset_timeout_available.dev_attr.attr, ++ &iio_const_attr_in_accel_gesture_doubletap_tap2_min_delay_available.dev_attr.attr, ++ &iio_const_attr_in_accel_gesture_tap_wait_dur_available.dev_attr.attr, ++ &iio_dev_attr_in_accel_gesture_tap_wait_timeout.dev_attr.attr, ++ &iio_dev_attr_in_accel_gesture_tap_wait_dur.dev_attr.attr, ++ &iio_const_attr_in_accel_mag_value_available.dev_attr.attr, ++ &iio_const_attr_in_accel_mag_period_available.dev_attr.attr, ++ &iio_const_attr_in_accel_mag_hysteresis_available.dev_attr.attr, ++ NULL ++}; ++ ++static const struct attribute_group bmi323_event_attribute_group = { ++ .attrs = bmi323_event_attributes, ++}; ++ ++static int bmi323_write_event_config(struct iio_dev *indio_dev, ++ const struct iio_chan_spec *chan, ++ enum iio_event_type type, ++ enum iio_event_direction dir, int state) ++{ ++ struct bmi323_data *data = iio_priv(indio_dev); ++ ++ switch (type) { ++ case IIO_EV_TYPE_MAG: ++ return bmi323_motion_event_en(data, dir, state); ++ case IIO_EV_TYPE_GESTURE: ++ return bmi323_tap_event_en(data, dir, state); ++ case IIO_EV_TYPE_CHANGE: ++ return bmi323_step_wtrmrk_en(data, state); ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int bmi323_read_event_config(struct iio_dev *indio_dev, ++ const struct iio_chan_spec *chan, ++ enum iio_event_type type, ++ enum iio_event_direction dir) ++{ ++ struct bmi323_data *data = iio_priv(indio_dev); ++ int ret, value, reg_val; ++ ++ guard(mutex)(&data->mutex); ++ ++ switch (chan->type) { ++ case IIO_ACCEL: ++ switch (dir) { ++ case IIO_EV_DIR_SINGLETAP: ++ ret = FIELD_GET(BMI323_FEAT_IO0_S_TAP_MSK, ++ data->feature_events); ++ break; ++ case IIO_EV_DIR_DOUBLETAP: ++ ret = FIELD_GET(BMI323_FEAT_IO0_D_TAP_MSK, ++ data->feature_events); ++ break; ++ case IIO_EV_DIR_RISING: ++ value = FIELD_GET(BMI323_FEAT_IO0_XYZ_MOTION_MSK, ++ data->feature_events); ++ ret = value ? 1 : 0; ++ break; ++ case IIO_EV_DIR_FALLING: ++ value = FIELD_GET(BMI323_FEAT_IO0_XYZ_NOMOTION_MSK, ++ data->feature_events); ++ ret = value ? 1 : 0; ++ break; ++ default: ++ ret = -EINVAL; ++ break; ++ } ++ return ret; ++ case IIO_STEPS: ++ ret = regmap_read(data->regmap, BMI323_INT_MAP1_REG, ®_val); ++ if (ret) ++ return ret; ++ ++ return FIELD_GET(BMI323_STEP_CNT_MSK, reg_val) ? 1 : 0; ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int bmi323_write_event_value(struct iio_dev *indio_dev, ++ const struct iio_chan_spec *chan, ++ enum iio_event_type type, ++ enum iio_event_direction dir, ++ enum iio_event_info info, ++ int val, int val2) ++{ ++ struct bmi323_data *data = iio_priv(indio_dev); ++ unsigned int raw; ++ int reg; ++ ++ guard(mutex)(&data->mutex); ++ ++ switch (type) { ++ case IIO_EV_TYPE_GESTURE: ++ switch (info) { ++ case IIO_EV_INFO_VALUE: ++ if (!in_range(val, 0, 2)) ++ return -EINVAL; ++ ++ raw = BMI323_INT_MICRO_TO_RAW(val, val2, ++ BMI323_TAP_THRES_SCALE); ++ ++ return bmi323_update_ext_reg(data, BMI323_TAP2_REG, ++ BMI323_TAP2_THRES_MSK, ++ FIELD_PREP(BMI323_TAP2_THRES_MSK, ++ raw)); ++ case IIO_EV_INFO_RESET_TIMEOUT: ++ if (val || !in_range(val2, 40000, 560001)) ++ return -EINVAL; ++ ++ raw = BMI323_INT_MICRO_TO_RAW(val, val2, ++ BMI323_QUITE_TIM_GES_SCALE); ++ ++ return bmi323_update_ext_reg(data, BMI323_TAP3_REG, ++ BMI323_TAP3_QT_AFT_GES_MSK, ++ FIELD_PREP(BMI323_TAP3_QT_AFT_GES_MSK, ++ raw)); ++ case IIO_EV_INFO_TAP2_MIN_DELAY: ++ if (val || !in_range(val2, 5000, 70001)) ++ return -EINVAL; ++ ++ raw = BMI323_INT_MICRO_TO_RAW(val, val2, ++ BMI323_DUR_BW_TAP_SCALE); ++ ++ return bmi323_update_ext_reg(data, BMI323_TAP3_REG, ++ BMI323_TAP3_QT_BW_TAP_MSK, ++ FIELD_PREP(BMI323_TAP3_QT_BW_TAP_MSK, ++ raw)); ++ default: ++ return -EINVAL; ++ } ++ case IIO_EV_TYPE_MAG: ++ reg = bmi323_motion_config_reg(dir); ++ if (reg < 0) ++ return -EINVAL; ++ ++ switch (info) { ++ case IIO_EV_INFO_VALUE: ++ if (!in_range(val, 0, 8)) ++ return -EINVAL; ++ ++ raw = BMI323_INT_MICRO_TO_RAW(val, val2, ++ BMI323_MOTION_THRES_SCALE); ++ ++ return bmi323_update_ext_reg(data, reg, ++ BMI323_MO1_SLOPE_TH_MSK, ++ FIELD_PREP(BMI323_MO1_SLOPE_TH_MSK, ++ raw)); ++ case IIO_EV_INFO_PERIOD: ++ if (!in_range(val, 0, 163)) ++ return -EINVAL; ++ ++ raw = BMI323_INT_MICRO_TO_RAW(val, val2, ++ BMI323_MOTION_DURAT_SCALE); ++ ++ return bmi323_update_ext_reg(data, ++ reg + BMI323_MO3_OFFSET, ++ BMI323_MO3_DURA_MSK, ++ FIELD_PREP(BMI323_MO3_DURA_MSK, ++ raw)); ++ case IIO_EV_INFO_HYSTERESIS: ++ if (!in_range(val, 0, 2)) ++ return -EINVAL; ++ ++ raw = BMI323_INT_MICRO_TO_RAW(val, val2, ++ BMI323_MOTION_HYSTR_SCALE); ++ ++ return bmi323_update_ext_reg(data, ++ reg + BMI323_MO2_OFFSET, ++ BMI323_MO2_HYSTR_MSK, ++ FIELD_PREP(BMI323_MO2_HYSTR_MSK, ++ raw)); ++ default: ++ return -EINVAL; ++ } ++ case IIO_EV_TYPE_CHANGE: ++ if (!in_range(val, 0, 20461)) ++ return -EINVAL; ++ ++ raw = val / 20; ++ return bmi323_update_ext_reg(data, BMI323_STEP_SC1_REG, ++ BMI323_STEP_SC1_WTRMRK_MSK, ++ FIELD_PREP(BMI323_STEP_SC1_WTRMRK_MSK, ++ raw)); ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int bmi323_read_event_value(struct iio_dev *indio_dev, ++ const struct iio_chan_spec *chan, ++ enum iio_event_type type, ++ enum iio_event_direction dir, ++ enum iio_event_info info, ++ int *val, int *val2) ++{ ++ struct bmi323_data *data = iio_priv(indio_dev); ++ unsigned int raw, reg_value; ++ int ret, reg; ++ ++ guard(mutex)(&data->mutex); ++ ++ switch (type) { ++ case IIO_EV_TYPE_GESTURE: ++ switch (info) { ++ case IIO_EV_INFO_VALUE: ++ ret = bmi323_read_ext_reg(data, BMI323_TAP2_REG, ++ ®_value); ++ if (ret) ++ return ret; ++ ++ raw = FIELD_GET(BMI323_TAP2_THRES_MSK, reg_value); ++ *val = raw / BMI323_TAP_THRES_SCALE; ++ *val2 = BMI323_RAW_TO_MICRO(raw, BMI323_TAP_THRES_SCALE); ++ return IIO_VAL_INT_PLUS_MICRO; ++ case IIO_EV_INFO_RESET_TIMEOUT: ++ ret = bmi323_read_ext_reg(data, BMI323_TAP3_REG, ++ ®_value); ++ if (ret) ++ return ret; ++ ++ raw = FIELD_GET(BMI323_TAP3_QT_AFT_GES_MSK, reg_value); ++ *val = 0; ++ *val2 = BMI323_RAW_TO_MICRO(raw, ++ BMI323_QUITE_TIM_GES_SCALE); ++ return IIO_VAL_INT_PLUS_MICRO; ++ case IIO_EV_INFO_TAP2_MIN_DELAY: ++ ret = bmi323_read_ext_reg(data, BMI323_TAP3_REG, ++ ®_value); ++ if (ret) ++ return ret; ++ ++ raw = FIELD_GET(BMI323_TAP3_QT_BW_TAP_MSK, reg_value); ++ *val = 0; ++ *val2 = BMI323_RAW_TO_MICRO(raw, ++ BMI323_DUR_BW_TAP_SCALE); ++ return IIO_VAL_INT_PLUS_MICRO; ++ default: ++ return -EINVAL; ++ } ++ case IIO_EV_TYPE_MAG: ++ reg = bmi323_motion_config_reg(dir); ++ if (reg < 0) ++ return -EINVAL; ++ ++ switch (info) { ++ case IIO_EV_INFO_VALUE: ++ ret = bmi323_read_ext_reg(data, reg, ®_value); ++ if (ret) ++ return ret; ++ ++ raw = FIELD_GET(BMI323_MO1_SLOPE_TH_MSK, reg_value); ++ *val = raw / BMI323_MOTION_THRES_SCALE; ++ *val2 = BMI323_RAW_TO_MICRO(raw, ++ BMI323_MOTION_THRES_SCALE); ++ return IIO_VAL_INT_PLUS_MICRO; ++ case IIO_EV_INFO_PERIOD: ++ ret = bmi323_read_ext_reg(data, ++ reg + BMI323_MO3_OFFSET, ++ ®_value); ++ if (ret) ++ return ret; ++ ++ raw = FIELD_GET(BMI323_MO3_DURA_MSK, reg_value); ++ *val = raw / BMI323_MOTION_DURAT_SCALE; ++ *val2 = BMI323_RAW_TO_MICRO(raw, ++ BMI323_MOTION_DURAT_SCALE); ++ return IIO_VAL_INT_PLUS_MICRO; ++ case IIO_EV_INFO_HYSTERESIS: ++ ret = bmi323_read_ext_reg(data, ++ reg + BMI323_MO2_OFFSET, ++ ®_value); ++ if (ret) ++ return ret; ++ ++ raw = FIELD_GET(BMI323_MO2_HYSTR_MSK, reg_value); ++ *val = raw / BMI323_MOTION_HYSTR_SCALE; ++ *val2 = BMI323_RAW_TO_MICRO(raw, ++ BMI323_MOTION_HYSTR_SCALE); ++ return IIO_VAL_INT_PLUS_MICRO; ++ default: ++ return -EINVAL; ++ } ++ case IIO_EV_TYPE_CHANGE: ++ ret = bmi323_read_ext_reg(data, BMI323_STEP_SC1_REG, ++ ®_value); ++ if (ret) ++ return ret; ++ ++ raw = FIELD_GET(BMI323_STEP_SC1_WTRMRK_MSK, reg_value); ++ *val = raw * 20; ++ return IIO_VAL_INT; ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int __bmi323_fifo_flush(struct iio_dev *indio_dev) ++{ ++ struct bmi323_data *data = iio_priv(indio_dev); ++ int i, ret, fifo_lvl, frame_count, bit, index; ++ __le16 *frame, *pchannels; ++ u64 sample_period; ++ s64 tstamp; ++ ++ guard(mutex)(&data->mutex); ++ ret = regmap_read(data->regmap, BMI323_FIFO_FILL_LEVEL_REG, &fifo_lvl); ++ if (ret) ++ return ret; ++ ++ fifo_lvl = min(fifo_lvl, BMI323_FIFO_FULL_IN_WORDS); ++ ++ frame_count = fifo_lvl / BMI323_FIFO_FRAME_LENGTH; ++ if (!frame_count) ++ return -EINVAL; ++ ++ if (fifo_lvl % BMI323_FIFO_FRAME_LENGTH) ++ dev_warn(data->dev, "Bad FIFO alignment\n"); ++ ++ /* ++ * Approximate timestamps for each of the sample based on the sampling ++ * frequency, timestamp for last sample and number of samples. ++ */ ++ if (data->old_fifo_tstamp) { ++ sample_period = data->fifo_tstamp - data->old_fifo_tstamp; ++ do_div(sample_period, frame_count); ++ } else { ++ sample_period = data->odrns[BMI323_ACCEL]; ++ } ++ ++ tstamp = data->fifo_tstamp - (frame_count - 1) * sample_period; ++ ++ ret = regmap_noinc_read(data->regmap, BMI323_FIFO_DATA_REG, ++ &data->fifo_buff[0], ++ fifo_lvl * BMI323_BYTES_PER_SAMPLE); ++ if (ret) ++ return ret; ++ ++ for (i = 0; i < frame_count; i++) { ++ frame = &data->fifo_buff[i * BMI323_FIFO_FRAME_LENGTH]; ++ pchannels = &data->buffer.channels[0]; ++ ++ index = 0; ++ for_each_set_bit(bit, indio_dev->active_scan_mask, ++ BMI323_CHAN_MAX) ++ pchannels[index++] = frame[bit]; ++ ++ iio_push_to_buffers_with_timestamp(indio_dev, &data->buffer, ++ tstamp); ++ ++ tstamp += sample_period; ++ } ++ ++ return frame_count; ++} ++ ++static int bmi323_set_watermark(struct iio_dev *indio_dev, unsigned int val) ++{ ++ struct bmi323_data *data = iio_priv(indio_dev); ++ ++ val = min(val, (u32)BMI323_FIFO_FULL_IN_FRAMES); ++ ++ guard(mutex)(&data->mutex); ++ data->watermark = val; ++ ++ return 0; ++} ++ ++static int bmi323_fifo_disable(struct bmi323_data *data) ++{ ++ int ret; ++ ++ guard(mutex)(&data->mutex); ++ ret = regmap_write(data->regmap, BMI323_FIFO_CONF_REG, 0); ++ if (ret) ++ return ret; ++ ++ ret = regmap_update_bits(data->regmap, BMI323_INT_MAP2_REG, ++ BMI323_FIFO_WTRMRK_MSK, ++ FIELD_PREP(BMI323_FIFO_WTRMRK_MSK, 0)); ++ if (ret) ++ return ret; ++ ++ data->fifo_tstamp = 0; ++ data->state = BMI323_IDLE; ++ ++ return 0; ++} ++ ++static int bmi323_buffer_predisable(struct iio_dev *indio_dev) ++{ ++ struct bmi323_data *data = iio_priv(indio_dev); ++ ++ if (iio_device_get_current_mode(indio_dev) == INDIO_BUFFER_TRIGGERED) ++ return 0; ++ ++ return bmi323_fifo_disable(data); ++} ++ ++static int bmi323_update_watermark(struct bmi323_data *data) ++{ ++ int wtrmrk; ++ ++ wtrmrk = data->watermark * BMI323_FIFO_FRAME_LENGTH; ++ ++ return regmap_write(data->regmap, BMI323_FIFO_WTRMRK_REG, wtrmrk); ++} ++ ++static int bmi323_fifo_enable(struct bmi323_data *data) ++{ ++ int ret; ++ ++ guard(mutex)(&data->mutex); ++ ret = regmap_update_bits(data->regmap, BMI323_FIFO_CONF_REG, ++ BMI323_FIFO_CONF_ACC_GYR_EN_MSK, ++ FIELD_PREP(BMI323_FIFO_CONF_ACC_GYR_EN_MSK, ++ BMI323_FIFO_ACC_GYR_MSK)); ++ if (ret) ++ return ret; ++ ++ ret = regmap_update_bits(data->regmap, BMI323_INT_MAP2_REG, ++ BMI323_FIFO_WTRMRK_MSK, ++ FIELD_PREP(BMI323_FIFO_WTRMRK_MSK, ++ data->irq_pin)); ++ if (ret) ++ return ret; ++ ++ ret = bmi323_update_watermark(data); ++ if (ret) ++ return ret; ++ ++ ret = regmap_write(data->regmap, BMI323_FIFO_CTRL_REG, ++ BMI323_FIFO_FLUSH_MSK); ++ if (ret) ++ return ret; ++ ++ data->state = BMI323_BUFFER_FIFO; ++ ++ return 0; ++} ++ ++static int bmi323_buffer_preenable(struct iio_dev *indio_dev) ++{ ++ struct bmi323_data *data = iio_priv(indio_dev); ++ ++ guard(mutex)(&data->mutex); ++ /* ++ * When the ODR of the accelerometer and gyroscope do not match, the ++ * maximum ODR value between the accelerometer and gyroscope is used ++ * for FIFO and the signal with lower ODR will insert dummy frame. ++ * So allow buffer read only when ODR's of accelero and gyro are equal. ++ * See datasheet section 5.7 "FIFO Data Buffering". ++ */ ++ if (data->odrns[BMI323_ACCEL] != data->odrns[BMI323_GYRO]) { ++ dev_err(data->dev, "Accelero and Gyro ODR doesn't match\n"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int bmi323_buffer_postenable(struct iio_dev *indio_dev) ++{ ++ struct bmi323_data *data = iio_priv(indio_dev); ++ ++ if (iio_device_get_current_mode(indio_dev) == INDIO_BUFFER_TRIGGERED) ++ return 0; ++ ++ return bmi323_fifo_enable(data); ++} ++ ++static ssize_t hwfifo_watermark_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct iio_dev *indio_dev = dev_to_iio_dev(dev); ++ struct bmi323_data *data = iio_priv(indio_dev); ++ int wm; ++ ++ scoped_guard(mutex, &data->mutex) ++ wm = data->watermark; ++ ++ return sysfs_emit(buf, "%d\n", wm); ++} ++static IIO_DEVICE_ATTR_RO(hwfifo_watermark, 0); ++ ++static ssize_t hwfifo_enabled_show(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct iio_dev *indio_dev = dev_to_iio_dev(dev); ++ struct bmi323_data *data = iio_priv(indio_dev); ++ bool state; ++ ++ scoped_guard(mutex, &data->mutex) ++ state = data->state == BMI323_BUFFER_FIFO; ++ ++ return sysfs_emit(buf, "%d\n", state); ++} ++static IIO_DEVICE_ATTR_RO(hwfifo_enabled, 0); ++ ++static const struct iio_dev_attr *bmi323_fifo_attributes[] = { ++ &iio_dev_attr_hwfifo_watermark, ++ &iio_dev_attr_hwfifo_enabled, ++ NULL ++}; ++ ++static const struct iio_buffer_setup_ops bmi323_buffer_ops = { ++ .preenable = bmi323_buffer_preenable, ++ .postenable = bmi323_buffer_postenable, ++ .predisable = bmi323_buffer_predisable, ++}; ++ ++static irqreturn_t bmi323_irq_thread_handler(int irq, void *private) ++{ ++ struct iio_dev *indio_dev = private; ++ struct bmi323_data *data = iio_priv(indio_dev); ++ unsigned int status_addr, status, feature_event; ++ s64 timestamp = iio_get_time_ns(indio_dev); ++ int ret; ++ ++ if (data->irq_pin == BMI323_IRQ_INT1) ++ status_addr = BMI323_STATUS_INT1_REG; ++ else ++ status_addr = BMI323_STATUS_INT2_REG; ++ ++ scoped_guard(mutex, &data->mutex) { ++ ret = regmap_read(data->regmap, status_addr, &status); ++ if (ret) ++ return IRQ_NONE; ++ } ++ ++ if (!status || FIELD_GET(BMI323_STATUS_ERROR_MSK, status)) ++ return IRQ_NONE; ++ ++ if (FIELD_GET(BMI323_STATUS_FIFO_WTRMRK_MSK, status)) { ++ data->old_fifo_tstamp = data->fifo_tstamp; ++ data->fifo_tstamp = iio_get_time_ns(indio_dev); ++ ret = __bmi323_fifo_flush(indio_dev); ++ if (ret < 0) ++ return IRQ_NONE; ++ } ++ ++ if (FIELD_GET(BMI323_STATUS_ACC_GYR_DRDY_MSK, status)) ++ iio_trigger_poll_nested(data->trig); ++ ++ if (FIELD_GET(BMI323_STATUS_MOTION_MSK, status)) ++ iio_push_event(indio_dev, IIO_MOD_EVENT_CODE(IIO_ACCEL, 0, ++ IIO_MOD_X_OR_Y_OR_Z, ++ IIO_EV_TYPE_MAG, ++ IIO_EV_DIR_RISING), ++ timestamp); ++ ++ if (FIELD_GET(BMI323_STATUS_NOMOTION_MSK, status)) ++ iio_push_event(indio_dev, IIO_MOD_EVENT_CODE(IIO_ACCEL, 0, ++ IIO_MOD_X_OR_Y_OR_Z, ++ IIO_EV_TYPE_MAG, ++ IIO_EV_DIR_FALLING), ++ timestamp); ++ ++ if (FIELD_GET(BMI323_STATUS_STP_WTR_MSK, status)) ++ iio_push_event(indio_dev, IIO_MOD_EVENT_CODE(IIO_STEPS, 0, ++ IIO_NO_MOD, ++ IIO_EV_TYPE_CHANGE, ++ IIO_EV_DIR_NONE), ++ timestamp); ++ ++ if (FIELD_GET(BMI323_STATUS_TAP_MSK, status)) { ++ scoped_guard(mutex, &data->mutex) { ++ ret = regmap_read(data->regmap, ++ BMI323_FEAT_EVNT_EXT_REG, ++ &feature_event); ++ if (ret) ++ return IRQ_NONE; ++ } ++ ++ if (FIELD_GET(BMI323_FEAT_EVNT_EXT_S_MSK, feature_event)) { ++ iio_push_event(indio_dev, ++ IIO_MOD_EVENT_CODE(IIO_ACCEL, 0, ++ IIO_MOD_X_OR_Y_OR_Z, ++ IIO_EV_TYPE_GESTURE, ++ IIO_EV_DIR_SINGLETAP), ++ timestamp); ++ } ++ ++ if (FIELD_GET(BMI323_FEAT_EVNT_EXT_D_MSK, feature_event)) ++ iio_push_event(indio_dev, ++ IIO_MOD_EVENT_CODE(IIO_ACCEL, 0, ++ IIO_MOD_X_OR_Y_OR_Z, ++ IIO_EV_TYPE_GESTURE, ++ IIO_EV_DIR_DOUBLETAP), ++ timestamp); ++ } ++ ++ return IRQ_HANDLED; ++} ++ ++static int bmi323_set_drdy_irq(struct bmi323_data *data, ++ enum bmi323_irq_pin irq_pin) ++{ ++ int ret; ++ ++ ret = regmap_update_bits(data->regmap, BMI323_INT_MAP2_REG, ++ BMI323_GYR_DRDY_MSK, ++ FIELD_PREP(BMI323_GYR_DRDY_MSK, irq_pin)); ++ if (ret) ++ return ret; ++ ++ return regmap_update_bits(data->regmap, BMI323_INT_MAP2_REG, ++ BMI323_ACC_DRDY_MSK, ++ FIELD_PREP(BMI323_ACC_DRDY_MSK, irq_pin)); ++} ++ ++static int bmi323_data_rdy_trigger_set_state(struct iio_trigger *trig, ++ bool state) ++{ ++ struct bmi323_data *data = iio_trigger_get_drvdata(trig); ++ enum bmi323_irq_pin irq_pin; ++ ++ guard(mutex)(&data->mutex); ++ ++ if (data->state == BMI323_BUFFER_FIFO) { ++ dev_warn(data->dev, "Can't set trigger when FIFO enabled\n"); ++ return -EBUSY; ++ } ++ ++ if (state) { ++ data->state = BMI323_BUFFER_DRDY_TRIGGERED; ++ irq_pin = data->irq_pin; ++ } else { ++ data->state = BMI323_IDLE; ++ irq_pin = BMI323_IRQ_DISABLED; ++ } ++ ++ return bmi323_set_drdy_irq(data, irq_pin); ++} ++ ++static const struct iio_trigger_ops bmi323_trigger_ops = { ++ .set_trigger_state = &bmi323_data_rdy_trigger_set_state, ++}; ++ ++static irqreturn_t bmi323_trigger_handler(int irq, void *p) ++{ ++ struct iio_poll_func *pf = p; ++ struct iio_dev *indio_dev = pf->indio_dev; ++ struct bmi323_data *data = iio_priv(indio_dev); ++ int ret, bit, index = 0; ++ ++ /* Lock to protect the data->buffer */ ++ guard(mutex)(&data->mutex); ++ ++ if (*indio_dev->active_scan_mask == BMI323_ALL_CHAN_MSK) { ++ ret = regmap_bulk_read(data->regmap, BMI323_ACCEL_X_REG, ++ &data->buffer.channels, ++ ARRAY_SIZE(data->buffer.channels)); ++ if (ret) ++ return IRQ_NONE; ++ } else { ++ for_each_set_bit(bit, indio_dev->active_scan_mask, ++ BMI323_CHAN_MAX) { ++ ret = regmap_raw_read(data->regmap, ++ BMI323_ACCEL_X_REG + bit, ++ &data->buffer.channels[index++], ++ BMI323_BYTES_PER_SAMPLE); ++ if (ret) ++ return IRQ_NONE; ++ } ++ } ++ ++ iio_push_to_buffers_with_timestamp(indio_dev, &data->buffer, ++ iio_get_time_ns(indio_dev)); ++ ++ iio_trigger_notify_done(indio_dev->trig); ++ ++ return IRQ_HANDLED; ++} ++ ++static int bmi323_set_average(struct bmi323_data *data, ++ enum bmi323_sensor_type sensor, int avg) ++{ ++ int raw = ARRAY_SIZE(bmi323_accel_gyro_avrg); ++ ++ while (raw--) ++ if (avg == bmi323_accel_gyro_avrg[raw]) ++ break; ++ if (raw < 0) ++ return -EINVAL; ++ ++ guard(mutex)(&data->mutex); ++ return regmap_update_bits(data->regmap, bmi323_hw[sensor].config, ++ BMI323_ACC_GYRO_CONF_AVG_MSK, ++ FIELD_PREP(BMI323_ACC_GYRO_CONF_AVG_MSK, ++ raw)); ++} ++ ++static int bmi323_get_average(struct bmi323_data *data, ++ enum bmi323_sensor_type sensor, int *avg) ++{ ++ int ret, value, raw; ++ ++ scoped_guard(mutex, &data->mutex) { ++ ret = regmap_read(data->regmap, bmi323_hw[sensor].config, &value); ++ if (ret) ++ return ret; ++ } ++ ++ raw = FIELD_GET(BMI323_ACC_GYRO_CONF_AVG_MSK, value); ++ *avg = bmi323_accel_gyro_avrg[raw]; ++ ++ return IIO_VAL_INT; ++} ++ ++static int bmi323_enable_steps(struct bmi323_data *data, int val) ++{ ++ int ret; ++ ++ guard(mutex)(&data->mutex); ++ if (data->odrhz[BMI323_ACCEL] < 200) { ++ dev_err(data->dev, "Invalid accelrometer parameter\n"); ++ return -EINVAL; ++ } ++ ++ ret = bmi323_feature_engine_events(data, BMI323_FEAT_IO0_STP_CNT_MSK, ++ val ? 1 : 0); ++ if (ret) ++ return ret; ++ ++ set_mask_bits(&data->feature_events, BMI323_FEAT_IO0_STP_CNT_MSK, ++ FIELD_PREP(BMI323_FEAT_IO0_STP_CNT_MSK, val ? 1 : 0)); ++ ++ return 0; ++} ++ ++static int bmi323_read_steps(struct bmi323_data *data, int *val) ++{ ++ int ret; ++ ++ guard(mutex)(&data->mutex); ++ if (!FIELD_GET(BMI323_FEAT_IO0_STP_CNT_MSK, data->feature_events)) ++ return -EINVAL; ++ ++ ret = regmap_bulk_read(data->regmap, BMI323_FEAT_IO2_REG, ++ data->steps_count, ++ ARRAY_SIZE(data->steps_count)); ++ if (ret) ++ return ret; ++ ++ *val = get_unaligned_le32(data->steps_count); ++ ++ return IIO_VAL_INT; ++} ++ ++static int bmi323_read_axis(struct bmi323_data *data, ++ struct iio_chan_spec const *chan, int *val) ++{ ++ enum bmi323_sensor_type sensor; ++ unsigned int value; ++ u8 addr; ++ int ret; ++ ++ ret = bmi323_get_error_status(data); ++ if (ret) ++ return -EINVAL; ++ ++ sensor = bmi323_iio_to_sensor(chan->type); ++ addr = bmi323_hw[sensor].data + (chan->channel2 - IIO_MOD_X); ++ ++ scoped_guard(mutex, &data->mutex) { ++ ret = regmap_read(data->regmap, addr, &value); ++ if (ret) ++ return ret; ++ } ++ ++ *val = sign_extend32(value, chan->scan_type.realbits - 1); ++ ++ return IIO_VAL_INT; ++} ++ ++static int bmi323_get_temp_data(struct bmi323_data *data, int *val) ++{ ++ unsigned int value; ++ int ret; ++ ++ ret = bmi323_get_error_status(data); ++ if (ret) ++ return -EINVAL; ++ ++ scoped_guard(mutex, &data->mutex) { ++ ret = regmap_read(data->regmap, BMI323_TEMP_REG, &value); ++ if (ret) ++ return ret; ++ } ++ ++ *val = sign_extend32(value, 15); ++ ++ return IIO_VAL_INT; ++} ++ ++static int bmi323_get_odr(struct bmi323_data *data, ++ enum bmi323_sensor_type sensor, int *odr, int *uodr) ++{ ++ int ret, value, odr_raw; ++ ++ scoped_guard(mutex, &data->mutex) { ++ ret = regmap_read(data->regmap, bmi323_hw[sensor].config, &value); ++ if (ret) ++ return ret; ++ } ++ ++ odr_raw = FIELD_GET(BMI323_ACC_GYRO_CONF_ODR_MSK, value); ++ *odr = bmi323_acc_gyro_odr[odr_raw - 1][0]; ++ *uodr = bmi323_acc_gyro_odr[odr_raw - 1][1]; ++ ++ return IIO_VAL_INT_PLUS_MICRO; ++} ++ ++static int bmi323_configure_power_mode(struct bmi323_data *data, ++ enum bmi323_sensor_type sensor, ++ int odr_index) ++{ ++ enum bmi323_opr_mode mode; ++ ++ if (bmi323_acc_gyro_odr[odr_index][0] > 25) ++ mode = ACC_GYRO_MODE_CONTINOUS; ++ else ++ mode = ACC_GYRO_MODE_DUTYCYCLE; ++ ++ return bmi323_set_mode(data, sensor, mode); ++} ++ ++static int bmi323_set_odr(struct bmi323_data *data, ++ enum bmi323_sensor_type sensor, int odr, int uodr) ++{ ++ int odr_raw, ret; ++ ++ odr_raw = ARRAY_SIZE(bmi323_acc_gyro_odr); ++ ++ while (odr_raw--) ++ if (odr == bmi323_acc_gyro_odr[odr_raw][0] && ++ uodr == bmi323_acc_gyro_odr[odr_raw][1]) ++ break; ++ if (odr_raw < 0) ++ return -EINVAL; ++ ++ ret = bmi323_configure_power_mode(data, sensor, odr_raw); ++ if (ret) ++ return -EINVAL; ++ ++ guard(mutex)(&data->mutex); ++ data->odrhz[sensor] = bmi323_acc_gyro_odr[odr_raw][0]; ++ data->odrns[sensor] = bmi323_acc_gyro_odrns[odr_raw]; ++ ++ odr_raw++; ++ ++ return regmap_update_bits(data->regmap, bmi323_hw[sensor].config, ++ BMI323_ACC_GYRO_CONF_ODR_MSK, ++ FIELD_PREP(BMI323_ACC_GYRO_CONF_ODR_MSK, ++ odr_raw)); ++} ++ ++static int bmi323_get_scale(struct bmi323_data *data, ++ enum bmi323_sensor_type sensor, int *val2) ++{ ++ int ret, value, scale_raw; ++ ++ scoped_guard(mutex, &data->mutex) { ++ ret = regmap_read(data->regmap, bmi323_hw[sensor].config, ++ &value); ++ if (ret) ++ return ret; ++ } ++ ++ scale_raw = FIELD_GET(BMI323_ACC_GYRO_CONF_SCL_MSK, value); ++ *val2 = bmi323_hw[sensor].scale_table[scale_raw][1]; ++ ++ return IIO_VAL_INT_PLUS_MICRO; ++} ++ ++static int bmi323_set_scale(struct bmi323_data *data, ++ enum bmi323_sensor_type sensor, int val, int val2) ++{ ++ int scale_raw; ++ ++ scale_raw = bmi323_hw[sensor].scale_table_len; ++ ++ while (scale_raw--) ++ if (val == bmi323_hw[sensor].scale_table[scale_raw][0] && ++ val2 == bmi323_hw[sensor].scale_table[scale_raw][1]) ++ break; ++ if (scale_raw < 0) ++ return -EINVAL; ++ ++ guard(mutex)(&data->mutex); ++ return regmap_update_bits(data->regmap, bmi323_hw[sensor].config, ++ BMI323_ACC_GYRO_CONF_SCL_MSK, ++ FIELD_PREP(BMI323_ACC_GYRO_CONF_SCL_MSK, ++ scale_raw)); ++} ++ ++static int bmi323_read_avail(struct iio_dev *indio_dev, ++ struct iio_chan_spec const *chan, ++ const int **vals, int *type, int *length, ++ long mask) ++{ ++ enum bmi323_sensor_type sensor; ++ ++ switch (mask) { ++ case IIO_CHAN_INFO_SAMP_FREQ: ++ *type = IIO_VAL_INT_PLUS_MICRO; ++ *vals = (const int *)bmi323_acc_gyro_odr; ++ *length = ARRAY_SIZE(bmi323_acc_gyro_odr) * 2; ++ return IIO_AVAIL_LIST; ++ case IIO_CHAN_INFO_SCALE: ++ sensor = bmi323_iio_to_sensor(chan->type); ++ *type = IIO_VAL_INT_PLUS_MICRO; ++ *vals = (const int *)bmi323_hw[sensor].scale_table; ++ *length = bmi323_hw[sensor].scale_table_len * 2; ++ return IIO_AVAIL_LIST; ++ case IIO_CHAN_INFO_OVERSAMPLING_RATIO: ++ *type = IIO_VAL_INT; ++ *vals = (const int *)bmi323_accel_gyro_avrg; ++ *length = ARRAY_SIZE(bmi323_accel_gyro_avrg); ++ return IIO_AVAIL_LIST; ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int bmi323_write_raw(struct iio_dev *indio_dev, ++ struct iio_chan_spec const *chan, int val, ++ int val2, long mask) ++{ ++ struct bmi323_data *data = iio_priv(indio_dev); ++ int ret; ++ ++ switch (mask) { ++ case IIO_CHAN_INFO_SAMP_FREQ: ++ ret = iio_device_claim_direct_mode(indio_dev); ++ if (ret) ++ return ret; ++ ++ ret = bmi323_set_odr(data, bmi323_iio_to_sensor(chan->type), ++ val, val2); ++ iio_device_release_direct_mode(indio_dev); ++ return ret; ++ case IIO_CHAN_INFO_SCALE: ++ ret = iio_device_claim_direct_mode(indio_dev); ++ if (ret) ++ return ret; ++ ++ ret = bmi323_set_scale(data, bmi323_iio_to_sensor(chan->type), ++ val, val2); ++ iio_device_release_direct_mode(indio_dev); ++ return ret; ++ case IIO_CHAN_INFO_OVERSAMPLING_RATIO: ++ ret = iio_device_claim_direct_mode(indio_dev); ++ if (ret) ++ return ret; ++ ++ ret = bmi323_set_average(data, bmi323_iio_to_sensor(chan->type), ++ val); ++ ++ iio_device_release_direct_mode(indio_dev); ++ return ret; ++ case IIO_CHAN_INFO_ENABLE: ++ return bmi323_enable_steps(data, val); ++ case IIO_CHAN_INFO_PROCESSED: ++ scoped_guard(mutex, &data->mutex) { ++ if (val || !FIELD_GET(BMI323_FEAT_IO0_STP_CNT_MSK, ++ data->feature_events)) ++ return -EINVAL; ++ ++ /* Clear step counter value */ ++ ret = bmi323_update_ext_reg(data, BMI323_STEP_SC1_REG, ++ BMI323_STEP_SC1_RST_CNT_MSK, ++ FIELD_PREP(BMI323_STEP_SC1_RST_CNT_MSK, ++ 1)); ++ } ++ return ret; ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int bmi323_read_raw(struct iio_dev *indio_dev, ++ struct iio_chan_spec const *chan, int *val, ++ int *val2, long mask) ++{ ++ struct bmi323_data *data = iio_priv(indio_dev); ++ int ret; ++ ++ switch (mask) { ++ case IIO_CHAN_INFO_PROCESSED: ++ return bmi323_read_steps(data, val); ++ case IIO_CHAN_INFO_RAW: ++ switch (chan->type) { ++ case IIO_ACCEL: ++ case IIO_ANGL_VEL: ++ ret = iio_device_claim_direct_mode(indio_dev); ++ if (ret) ++ return ret; ++ ++ ret = bmi323_read_axis(data, chan, val); ++ ++ iio_device_release_direct_mode(indio_dev); ++ return ret; ++ case IIO_TEMP: ++ return bmi323_get_temp_data(data, val); ++ default: ++ return -EINVAL; ++ } ++ case IIO_CHAN_INFO_SAMP_FREQ: ++ return bmi323_get_odr(data, bmi323_iio_to_sensor(chan->type), ++ val, val2); ++ case IIO_CHAN_INFO_SCALE: ++ switch (chan->type) { ++ case IIO_ACCEL: ++ case IIO_ANGL_VEL: ++ *val = 0; ++ return bmi323_get_scale(data, ++ bmi323_iio_to_sensor(chan->type), ++ val2); ++ case IIO_TEMP: ++ *val = BMI323_TEMP_SCALE / MEGA; ++ *val2 = BMI323_TEMP_SCALE % MEGA; ++ return IIO_VAL_INT_PLUS_MICRO; ++ default: ++ return -EINVAL; ++ } ++ case IIO_CHAN_INFO_OVERSAMPLING_RATIO: ++ return bmi323_get_average(data, ++ bmi323_iio_to_sensor(chan->type), ++ val); ++ case IIO_CHAN_INFO_OFFSET: ++ switch (chan->type) { ++ case IIO_TEMP: ++ *val = BMI323_TEMP_OFFSET; ++ return IIO_VAL_INT; ++ default: ++ return -EINVAL; ++ } ++ case IIO_CHAN_INFO_ENABLE: ++ scoped_guard(mutex, &data->mutex) ++ *val = FIELD_GET(BMI323_FEAT_IO0_STP_CNT_MSK, ++ data->feature_events); ++ return IIO_VAL_INT; ++ default: ++ return -EINVAL; ++ } ++} ++ ++static const struct iio_info bmi323_info = { ++ .read_raw = bmi323_read_raw, ++ .write_raw = bmi323_write_raw, ++ .read_avail = bmi323_read_avail, ++ .hwfifo_set_watermark = bmi323_set_watermark, ++ .write_event_config = bmi323_write_event_config, ++ .read_event_config = bmi323_read_event_config, ++ .write_event_value = bmi323_write_event_value, ++ .read_event_value = bmi323_read_event_value, ++ .event_attrs = &bmi323_event_attribute_group, ++}; ++ ++#define BMI323_SCAN_MASK_ACCEL_3AXIS \ ++ (BIT(BMI323_ACCEL_X) | BIT(BMI323_ACCEL_Y) | BIT(BMI323_ACCEL_Z)) ++ ++#define BMI323_SCAN_MASK_GYRO_3AXIS \ ++ (BIT(BMI323_GYRO_X) | BIT(BMI323_GYRO_Y) | BIT(BMI323_GYRO_Z)) ++ ++static const unsigned long bmi323_avail_scan_masks[] = { ++ /* 3-axis accel */ ++ BMI323_SCAN_MASK_ACCEL_3AXIS, ++ /* 3-axis gyro */ ++ BMI323_SCAN_MASK_GYRO_3AXIS, ++ /* 3-axis accel + 3-axis gyro */ ++ BMI323_SCAN_MASK_ACCEL_3AXIS | BMI323_SCAN_MASK_GYRO_3AXIS, ++ 0 ++}; ++ ++static int bmi323_int_pin_config(struct bmi323_data *data, ++ enum bmi323_irq_pin irq_pin, ++ bool active_high, bool open_drain, bool latch) ++{ ++ unsigned int mask, field_value; ++ int ret; ++ ++ ret = regmap_update_bits(data->regmap, BMI323_IO_INT_CONF_REG, ++ BMI323_IO_INT_LTCH_MSK, ++ FIELD_PREP(BMI323_IO_INT_LTCH_MSK, latch)); ++ if (ret) ++ return ret; ++ ++ ret = bmi323_update_ext_reg(data, BMI323_GEN_SET1_REG, ++ BMI323_GEN_HOLD_DUR_MSK, ++ FIELD_PREP(BMI323_GEN_HOLD_DUR_MSK, 0)); ++ if (ret) ++ return ret; ++ ++ switch (irq_pin) { ++ case BMI323_IRQ_INT1: ++ mask = BMI323_IO_INT1_LVL_OD_OP_MSK; ++ ++ field_value = FIELD_PREP(BMI323_IO_INT1_LVL_MSK, active_high) | ++ FIELD_PREP(BMI323_IO_INT1_OD_MSK, open_drain) | ++ FIELD_PREP(BMI323_IO_INT1_OP_EN_MSK, 1); ++ break; ++ case BMI323_IRQ_INT2: ++ mask = BMI323_IO_INT2_LVL_OD_OP_MSK; ++ ++ field_value = FIELD_PREP(BMI323_IO_INT2_LVL_MSK, active_high) | ++ FIELD_PREP(BMI323_IO_INT2_OD_MSK, open_drain) | ++ FIELD_PREP(BMI323_IO_INT2_OP_EN_MSK, 1); ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ return regmap_update_bits(data->regmap, BMI323_IO_INT_CTR_REG, mask, ++ field_value); ++} ++ ++static int bmi323_trigger_probe(struct bmi323_data *data, ++ struct iio_dev *indio_dev) ++{ ++ bool open_drain, active_high, latch; ++ struct fwnode_handle *fwnode; ++ enum bmi323_irq_pin irq_pin; ++ int ret, irq, irq_type; ++ struct irq_data *desc; ++ ++ fwnode = dev_fwnode(data->dev); ++ if (!fwnode) ++ return -ENODEV; ++ ++ irq = fwnode_irq_get_byname(fwnode, "INT1"); ++ if (irq > 0) { ++ irq_pin = BMI323_IRQ_INT1; ++ } else { ++ irq = fwnode_irq_get_byname(fwnode, "INT2"); ++ if (irq < 0) ++ return 0; ++ ++ irq_pin = BMI323_IRQ_INT2; ++ } ++ ++ desc = irq_get_irq_data(irq); ++ if (!desc) ++ return dev_err_probe(data->dev, -EINVAL, ++ "Could not find IRQ %d\n", irq); ++ ++ irq_type = irqd_get_trigger_type(desc); ++ switch (irq_type) { ++ case IRQF_TRIGGER_RISING: ++ latch = false; ++ active_high = true; ++ break; ++ case IRQF_TRIGGER_HIGH: ++ latch = true; ++ active_high = true; ++ break; ++ case IRQF_TRIGGER_FALLING: ++ latch = false; ++ active_high = false; ++ break; ++ case IRQF_TRIGGER_LOW: ++ latch = true; ++ active_high = false; ++ break; ++ default: ++ return dev_err_probe(data->dev, -EINVAL, ++ "Invalid interrupt type 0x%x specified\n", ++ irq_type); ++ } ++ ++ open_drain = fwnode_property_read_bool(fwnode, "drive-open-drain"); ++ ++ ret = bmi323_int_pin_config(data, irq_pin, active_high, open_drain, ++ latch); ++ if (ret) ++ return dev_err_probe(data->dev, ret, ++ "Failed to configure irq line\n"); ++ ++ data->trig = devm_iio_trigger_alloc(data->dev, "%s-trig-%d", ++ indio_dev->name, irq_pin); ++ if (!data->trig) ++ return -ENOMEM; ++ ++ data->trig->ops = &bmi323_trigger_ops; ++ iio_trigger_set_drvdata(data->trig, data); ++ ++ ret = devm_request_threaded_irq(data->dev, irq, NULL, ++ bmi323_irq_thread_handler, ++ IRQF_ONESHOT, "bmi323-int", indio_dev); ++ if (ret) ++ return dev_err_probe(data->dev, ret, "Failed to request IRQ\n"); ++ ++ ret = devm_iio_trigger_register(data->dev, data->trig); ++ if (ret) ++ return dev_err_probe(data->dev, ret, ++ "Trigger registration failed\n"); ++ ++ data->irq_pin = irq_pin; ++ ++ return 0; ++} ++ ++static int bmi323_feature_engine_enable(struct bmi323_data *data, bool en) ++{ ++ unsigned int feature_status; ++ int ret; ++ ++ if (!en) ++ return regmap_write(data->regmap, BMI323_FEAT_CTRL_REG, 0); ++ ++ ret = regmap_write(data->regmap, BMI323_FEAT_IO2_REG, 0x012c); ++ if (ret) ++ return ret; ++ ++ ret = regmap_write(data->regmap, BMI323_FEAT_IO_STATUS_REG, ++ BMI323_FEAT_IO_STATUS_MSK); ++ if (ret) ++ return ret; ++ ++ ret = regmap_write(data->regmap, BMI323_FEAT_CTRL_REG, ++ BMI323_FEAT_ENG_EN_MSK); ++ if (ret) ++ return ret; ++ ++ /* ++ * It takes around 4 msec to enable the Feature engine, so check ++ * the status of the feature engine every 2 msec for a maximum ++ * of 5 trials. ++ */ ++ ret = regmap_read_poll_timeout(data->regmap, BMI323_FEAT_IO1_REG, ++ feature_status, ++ FIELD_GET(BMI323_FEAT_IO1_ERR_MSK, ++ feature_status) == 1, ++ BMI323_FEAT_ENG_POLL, ++ BMI323_FEAT_ENG_TIMEOUT); ++ if (ret) ++ return dev_err_probe(data->dev, -EINVAL, ++ "Failed to enable feature engine\n"); ++ ++ return 0; ++} ++ ++static void bmi323_disable(void *data_ptr) ++{ ++ struct bmi323_data *data = data_ptr; ++ ++ bmi323_set_mode(data, BMI323_ACCEL, ACC_GYRO_MODE_DISABLE); ++ bmi323_set_mode(data, BMI323_GYRO, ACC_GYRO_MODE_DISABLE); ++} ++ ++static int bmi323_set_bw(struct bmi323_data *data, ++ enum bmi323_sensor_type sensor, enum bmi323_3db_bw bw) ++{ ++ return regmap_update_bits(data->regmap, bmi323_hw[sensor].config, ++ BMI323_ACC_GYRO_CONF_BW_MSK, ++ FIELD_PREP(BMI323_ACC_GYRO_CONF_BW_MSK, bw)); ++} ++ ++static int bmi323_init(struct bmi323_data *data) ++{ ++ int ret, val; ++ ++ /* ++ * Perform soft reset to make sure the device is in a known state after ++ * start up. A delay of 1.5 ms is required after reset. ++ * See datasheet section 5.17 "Soft Reset". ++ */ ++ ret = regmap_write(data->regmap, BMI323_CMD_REG, BMI323_RST_VAL); ++ if (ret) ++ return ret; ++ ++ usleep_range(1500, 2000); ++ ++ /* ++ * Dummy read is required to enable SPI interface after reset. ++ * See datasheet section 7.2.1 "Protocol Selection". ++ */ ++ regmap_read(data->regmap, BMI323_CHIP_ID_REG, &val); ++ ++ ret = regmap_read(data->regmap, BMI323_STATUS_REG, &val); ++ if (ret) ++ return ret; ++ ++ if (!FIELD_GET(BMI323_STATUS_POR_MSK, val)) ++ return dev_err_probe(data->dev, -EINVAL, ++ "Sensor initialization error\n"); ++ ++ ret = regmap_read(data->regmap, BMI323_CHIP_ID_REG, &val); ++ if (ret) ++ return ret; ++ ++ if (FIELD_GET(BMI323_CHIP_ID_MSK, val) != BMI323_CHIP_ID_VAL) ++ return dev_err_probe(data->dev, -EINVAL, "Chip ID mismatch\n"); ++ ++ ret = bmi323_feature_engine_enable(data, true); ++ if (ret) ++ return ret; ++ ++ ret = regmap_read(data->regmap, BMI323_ERR_REG, &val); ++ if (ret) ++ return ret; ++ ++ if (val) ++ return dev_err_probe(data->dev, -EINVAL, ++ "Sensor power error = 0x%x\n", val); ++ ++ /* ++ * Set the Bandwidth coefficient which defines the 3 dB cutoff ++ * frequency in relation to the ODR. ++ */ ++ ret = bmi323_set_bw(data, BMI323_ACCEL, BMI323_BW_ODR_BY_2); ++ if (ret) ++ return ret; ++ ++ ret = bmi323_set_bw(data, BMI323_GYRO, BMI323_BW_ODR_BY_2); ++ if (ret) ++ return ret; ++ ++ ret = bmi323_set_odr(data, BMI323_ACCEL, 25, 0); ++ if (ret) ++ return ret; ++ ++ ret = bmi323_set_odr(data, BMI323_GYRO, 25, 0); ++ if (ret) ++ return ret; ++ ++ return devm_add_action_or_reset(data->dev, bmi323_disable, data); ++} ++ ++int bmi323_core_probe(struct device *dev) ++{ ++ static const char * const regulator_names[] = { "vdd", "vddio" }; ++ struct iio_dev *indio_dev; ++ struct bmi323_data *data; ++ struct regmap *regmap; ++ int ret; ++ ++ regmap = dev_get_regmap(dev, NULL); ++ if (!regmap) ++ return dev_err_probe(dev, -ENODEV, "Failed to get regmap\n"); ++ ++ indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); ++ if (!indio_dev) ++ return dev_err_probe(dev, -ENOMEM, ++ "Failed to allocate device\n"); ++ ++ ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulator_names), ++ regulator_names); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to enable regulators\n"); ++ ++ data = iio_priv(indio_dev); ++ data->dev = dev; ++ data->regmap = regmap; ++ mutex_init(&data->mutex); ++ ++ ret = bmi323_init(data); ++ if (ret) ++ return -EINVAL; ++ ++ ret = iio_read_mount_matrix(dev, &data->orientation); ++ if (ret) ++ return ret; ++ ++ indio_dev->name = "bmi323-imu"; ++ indio_dev->info = &bmi323_info; ++ indio_dev->channels = bmi323_channels; ++ indio_dev->num_channels = ARRAY_SIZE(bmi323_channels); ++ indio_dev->available_scan_masks = bmi323_avail_scan_masks; ++ indio_dev->modes = INDIO_DIRECT_MODE | INDIO_BUFFER_SOFTWARE; ++ dev_set_drvdata(data->dev, indio_dev); ++ ++ ret = bmi323_trigger_probe(data, indio_dev); ++ if (ret) ++ return -EINVAL; ++ ++ ret = devm_iio_triggered_buffer_setup_ext(data->dev, indio_dev, ++ &iio_pollfunc_store_time, ++ bmi323_trigger_handler, ++ IIO_BUFFER_DIRECTION_IN, ++ &bmi323_buffer_ops, ++ bmi323_fifo_attributes); ++ if (ret) ++ return dev_err_probe(data->dev, ret, ++ "Failed to setup trigger buffer\n"); ++ ++ ret = devm_iio_device_register(data->dev, indio_dev); ++ if (ret) ++ return dev_err_probe(data->dev, ret, ++ "Unable to register iio device\n"); ++ ++ return 0; ++} ++EXPORT_SYMBOL_NS_GPL(bmi323_core_probe, IIO_BMI323); ++ ++MODULE_DESCRIPTION("Bosch BMI323 IMU driver"); ++MODULE_AUTHOR("Jagath Jog J "); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/iio/imu/bmi323/bmi323_i2c.c b/drivers/iio/imu/bmi323/bmi323_i2c.c +new file mode 100644 +index 000000000000..0008e186367d +--- /dev/null ++++ b/drivers/iio/imu/bmi323/bmi323_i2c.c +@@ -0,0 +1,121 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * I2C driver for Bosch BMI323 6-Axis IMU. ++ * ++ * Copyright (C) 2023, Jagath Jog J ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include "bmi323.h" ++ ++struct bmi323_i2c_priv { ++ struct i2c_client *i2c; ++ u8 i2c_rx_buffer[BMI323_FIFO_LENGTH_IN_BYTES + BMI323_I2C_DUMMY]; ++}; ++ ++/* ++ * From BMI323 datasheet section 4: Notes on the Serial Interface Support. ++ * Each I2C register read operation requires to read two dummy bytes before ++ * the actual payload. ++ */ ++static int bmi323_regmap_i2c_read(void *context, const void *reg_buf, ++ size_t reg_size, void *val_buf, ++ size_t val_size) ++{ ++ struct bmi323_i2c_priv *priv = context; ++ struct i2c_msg msgs[2]; ++ int ret; ++ ++ msgs[0].addr = priv->i2c->addr; ++ msgs[0].flags = priv->i2c->flags; ++ msgs[0].len = reg_size; ++ msgs[0].buf = (u8 *)reg_buf; ++ ++ msgs[1].addr = priv->i2c->addr; ++ msgs[1].len = val_size + BMI323_I2C_DUMMY; ++ msgs[1].buf = priv->i2c_rx_buffer; ++ msgs[1].flags = priv->i2c->flags | I2C_M_RD; ++ ++ ret = i2c_transfer(priv->i2c->adapter, msgs, ARRAY_SIZE(msgs)); ++ if (ret < 0) ++ return -EIO; ++ ++ memcpy(val_buf, priv->i2c_rx_buffer + BMI323_I2C_DUMMY, val_size); ++ ++ return 0; ++} ++ ++static int bmi323_regmap_i2c_write(void *context, const void *data, ++ size_t count) ++{ ++ struct bmi323_i2c_priv *priv = context; ++ u8 reg; ++ ++ reg = *(u8 *)data; ++ return i2c_smbus_write_i2c_block_data(priv->i2c, reg, ++ count - sizeof(u8), ++ data + sizeof(u8)); ++} ++ ++static struct regmap_bus bmi323_regmap_bus = { ++ .read = bmi323_regmap_i2c_read, ++ .write = bmi323_regmap_i2c_write, ++}; ++ ++const struct regmap_config bmi323_i2c_regmap_config = { ++ .reg_bits = 8, ++ .val_bits = 16, ++ .max_register = BMI323_CFG_RES_REG, ++ .val_format_endian = REGMAP_ENDIAN_LITTLE, ++}; ++ ++static int bmi323_i2c_probe(struct i2c_client *i2c) ++{ ++ struct device *dev = &i2c->dev; ++ struct bmi323_i2c_priv *priv; ++ struct regmap *regmap; ++ ++ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); ++ if (!priv) ++ return -ENOMEM; ++ ++ priv->i2c = i2c; ++ regmap = devm_regmap_init(dev, &bmi323_regmap_bus, priv, ++ &bmi323_i2c_regmap_config); ++ if (IS_ERR(regmap)) ++ return dev_err_probe(dev, PTR_ERR(regmap), ++ "Failed to initialize I2C Regmap\n"); ++ ++ return bmi323_core_probe(dev); ++} ++ ++static const struct i2c_device_id bmi323_i2c_ids[] = { ++ { "bmi323" }, ++ { } ++}; ++MODULE_DEVICE_TABLE(i2c, bmi323_i2c_ids); ++ ++static const struct of_device_id bmi323_of_i2c_match[] = { ++ { .compatible = "bosch,bmi323" }, ++ { } ++}; ++MODULE_DEVICE_TABLE(of, bmi323_of_i2c_match); ++ ++static struct i2c_driver bmi323_i2c_driver = { ++ .driver = { ++ .name = "bmi323", ++ .of_match_table = bmi323_of_i2c_match, ++ }, ++ .probe = bmi323_i2c_probe, ++ .id_table = bmi323_i2c_ids, ++}; ++module_i2c_driver(bmi323_i2c_driver); ++ ++MODULE_DESCRIPTION("Bosch BMI323 IMU driver"); ++MODULE_AUTHOR("Jagath Jog J "); ++MODULE_LICENSE("GPL"); ++MODULE_IMPORT_NS(IIO_BMI323); +diff --git a/drivers/iio/imu/bmi323/bmi323_spi.c b/drivers/iio/imu/bmi323/bmi323_spi.c +new file mode 100644 +index 000000000000..6dc3352dd714 +--- /dev/null ++++ b/drivers/iio/imu/bmi323/bmi323_spi.c +@@ -0,0 +1,92 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * SPI driver for Bosch BMI323 6-Axis IMU. ++ * ++ * Copyright (C) 2023, Jagath Jog J ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include "bmi323.h" ++ ++/* ++ * From BMI323 datasheet section 4: Notes on the Serial Interface Support. ++ * Each SPI register read operation requires to read one dummy byte before ++ * the actual payload. ++ */ ++static int bmi323_regmap_spi_read(void *context, const void *reg_buf, ++ size_t reg_size, void *val_buf, ++ size_t val_size) ++{ ++ struct spi_device *spi = context; ++ ++ return spi_write_then_read(spi, reg_buf, reg_size, val_buf, val_size); ++} ++ ++static int bmi323_regmap_spi_write(void *context, const void *data, ++ size_t count) ++{ ++ struct spi_device *spi = context; ++ u8 *data_buff = (u8 *)data; ++ ++ data_buff[1] = data_buff[0]; ++ return spi_write(spi, data_buff + 1, count - 1); ++} ++ ++static struct regmap_bus bmi323_regmap_bus = { ++ .read = bmi323_regmap_spi_read, ++ .write = bmi323_regmap_spi_write, ++}; ++ ++const struct regmap_config bmi323_spi_regmap_config = { ++ .reg_bits = 8, ++ .val_bits = 16, ++ .pad_bits = 8, ++ .read_flag_mask = BIT(7), ++ .max_register = BMI323_CFG_RES_REG, ++ .val_format_endian = REGMAP_ENDIAN_LITTLE, ++}; ++ ++static int bmi323_spi_probe(struct spi_device *spi) ++{ ++ struct device *dev = &spi->dev; ++ struct regmap *regmap; ++ ++ regmap = devm_regmap_init(dev, &bmi323_regmap_bus, dev, ++ &bmi323_spi_regmap_config); ++ if (IS_ERR(regmap)) ++ return dev_err_probe(dev, PTR_ERR(regmap), ++ "Failed to initialize SPI Regmap\n"); ++ ++ return bmi323_core_probe(dev); ++} ++ ++static const struct spi_device_id bmi323_spi_ids[] = { ++ { "bmi323" }, ++ { } ++}; ++MODULE_DEVICE_TABLE(spi, bmi323_spi_ids); ++ ++static const struct of_device_id bmi323_of_spi_match[] = { ++ { .compatible = "bosch,bmi323" }, ++ { } ++}; ++MODULE_DEVICE_TABLE(of, bmi323_of_spi_match); ++ ++static struct spi_driver bmi323_spi_driver = { ++ .driver = { ++ .name = "bmi323", ++ .of_match_table = bmi323_of_spi_match, ++ }, ++ .probe = bmi323_spi_probe, ++ .id_table = bmi323_spi_ids, ++}; ++module_spi_driver(bmi323_spi_driver); ++ ++MODULE_DESCRIPTION("Bosch BMI323 IMU driver"); ++MODULE_AUTHOR("Jagath Jog J "); ++MODULE_LICENSE("GPL"); ++MODULE_IMPORT_NS(IIO_BMI323); +Make the local structures static within their respective driver files. + +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202311070530.qKhLTz1Y-lkp@intel.com/ +Fixes: b512c767e7bc ("iio: imu: Add driver for BMI323 IMU") +Signed-off-by: Jagath Jog J +--- + drivers/iio/imu/bmi323/bmi323_i2c.c | 2 +- + drivers/iio/imu/bmi323/bmi323_spi.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/iio/imu/bmi323/bmi323_i2c.c b/drivers/iio/imu/bmi323/bmi323_i2c.c +index 0008e186367d..20a8001b9956 100644 +--- a/drivers/iio/imu/bmi323/bmi323_i2c.c ++++ b/drivers/iio/imu/bmi323/bmi323_i2c.c +@@ -66,7 +66,7 @@ static struct regmap_bus bmi323_regmap_bus = { + .write = bmi323_regmap_i2c_write, + }; + +-const struct regmap_config bmi323_i2c_regmap_config = { ++static const struct regmap_config bmi323_i2c_regmap_config = { + .reg_bits = 8, + .val_bits = 16, + .max_register = BMI323_CFG_RES_REG, +diff --git a/drivers/iio/imu/bmi323/bmi323_spi.c b/drivers/iio/imu/bmi323/bmi323_spi.c +index 6dc3352dd714..7b1e8127d0dd 100644 +--- a/drivers/iio/imu/bmi323/bmi323_spi.c ++++ b/drivers/iio/imu/bmi323/bmi323_spi.c +@@ -41,7 +41,7 @@ static struct regmap_bus bmi323_regmap_bus = { + .write = bmi323_regmap_spi_write, + }; + +-const struct regmap_config bmi323_spi_regmap_config = { ++static const struct regmap_config bmi323_spi_regmap_config = { + .reg_bits = 8, + .val_bits = 16, + .pad_bits = 8, +diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c +index 1105918..d665a6e 100644 +--- a/drivers/iio/accel/bmc150-accel-core.c ++++ b/drivers/iio/accel/bmc150-accel-core.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1670,6 +1671,8 @@ int bmc150_accel_core_probe(struct device *dev, struct regmap *regmap, int irq, + struct iio_dev *indio_dev; + int ret; + ++ if (dmi_match(DMI_BOARD_NAME, "RC71L") || (dmi_match(DMI_BOARD_NAME, "AB05-AMD") && dmi_match(DMI_PRODUCT_NAME, "AIR Plus"))) ++ return -ENODEV; // Abort loading bmc150 for ASUS ROG ALLY, Ayaneo Air Plus + indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); + if (!indio_dev) + return -ENOMEM; +diff --git a/drivers/iio/imu/bmi323/bmi323_i2c.c b/drivers/iio/imu/bmi323/bmi323_i2c.c +index 20a8001..346ba2d 100644 +--- a/drivers/iio/imu/bmi323/bmi323_i2c.c ++++ b/drivers/iio/imu/bmi323/bmi323_i2c.c +@@ -5,6 +5,7 @@ + * Copyright (C) 2023, Jagath Jog J + */ + ++#include + #include + #include + #include +@@ -93,6 +94,12 @@ static int bmi323_i2c_probe(struct i2c_client *i2c) + return bmi323_core_probe(dev); + } + ++static const struct acpi_device_id bmi323_acpi_match[] = { ++ {"BOSC0200"}, ++ { }, ++}; ++MODULE_DEVICE_TABLE(acpi, bmi323_acpi_match); ++ + static const struct i2c_device_id bmi323_i2c_ids[] = { + { "bmi323" }, + { } +@@ -109,6 +116,7 @@ static struct i2c_driver bmi323_i2c_driver = { + .driver = { + .name = "bmi323", + .of_match_table = bmi323_of_i2c_match, ++ .acpi_match_table = ACPI_PTR(bmi323_acpi_match), + }, + .probe = bmi323_i2c_probe, + .id_table = bmi323_i2c_ids, +diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c +index d752e9c..b495dba 100644 +--- a/drivers/iio/industrialio-core.c ++++ b/drivers/iio/industrialio-core.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -571,6 +572,14 @@ static const struct iio_mount_matrix iio_mount_idmatrix = { + } + }; + ++static const struct iio_mount_matrix iio_mount_invert_x_matrix = { ++ .rotation = { ++ "-1", "0", "0", ++ "0", "1", "0", ++ "0", "0", "1" ++ } ++}; ++ + static int iio_setup_mount_idmatrix(const struct device *dev, + struct iio_mount_matrix *matrix) + { +@@ -579,6 +588,14 @@ static int iio_setup_mount_idmatrix(const struct device *dev, + return 0; + } + ++static int iio_setup_mount_invert_x_matrix(const struct device *dev, ++ struct iio_mount_matrix *matrix) ++{ ++ *matrix = iio_mount_invert_x_matrix; ++ dev_info(dev, "using inverted X-axis mounting matrix...\n"); ++ return 0; ++} ++ + ssize_t iio_show_mount_matrix(struct iio_dev *indio_dev, uintptr_t priv, + const struct iio_chan_spec *chan, char *buf) + { +@@ -615,6 +632,8 @@ int iio_read_mount_matrix(struct device *dev, struct iio_mount_matrix *matrix) + int err; + + err = device_property_read_string_array(dev, "mount-matrix", matrix->rotation, len); ++ if (dmi_match(DMI_BOARD_NAME, "RC71L")) ++ return iio_setup_mount_invert_x_matrix(dev, matrix); + if (err == len) + return 0; + +diff --git a/drivers/iio/imu/bmi323/bmi323_core.c b/drivers/iio/imu/bmi323/bmi323_core.c +index 0bd5ded..ded8596 100644 +--- a/drivers/iio/imu/bmi323/bmi323_core.c ++++ b/drivers/iio/imu/bmi323/bmi323_core.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -285,6 +286,9 @@ static const int bmi323_acc_gyro_odr[][2] = { + { 200, 0 }, + { 400, 0 }, + { 800, 0 }, ++ { 1600, 0}, ++ { 3200, 0}, ++ { 6400, 0}, + }; + + static const int bmi323_acc_gyro_odrns[] = { diff --git a/SOURCES/rpminspect.yaml b/SOURCES/rpminspect.yaml index b4e599a..87c9272 100644 --- a/SOURCES/rpminspect.yaml +++ b/SOURCES/rpminspect.yaml @@ -23,7 +23,7 @@ emptyrpm: patches: ignore_list: - linux-kernel-test.patch - - patch-6.6-redhat.patch + - patch-6.7-redhat.patch runpath: ignore: diff --git a/SOURCES/steam-deck.patch b/SOURCES/steam-deck.patch index 9eba750..bbdb9ce 100644 --- a/SOURCES/steam-deck.patch +++ b/SOURCES/steam-deck.patch @@ -225,14 +225,14 @@ diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 11d076cad8a2..d03c1e1d339f 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile -@@ -191,6 +191,7 @@ obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o +@@ -199,6 +199,7 @@ obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47m1.o obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o obj-$(CONFIG_SENSORS_SPARX5) += sparx5-temp.o +obj-$(CONFIG_SENSORS_STEAMDECK) += steamdeck-hwmon.o obj-$(CONFIG_SENSORS_STTS751) += stts751.o - obj-$(CONFIG_SENSORS_SY7636A) += sy7636a-hwmon.o - obj-$(CONFIG_SENSORS_AMC6821) += amc6821.o + obj-$(CONFIG_SENSORS_SURFACE_FAN)+= surface_fan.o + obj-$(CONFIG_SENSORS_SURFACE_TEMP)+= surface_temp.o diff --git a/drivers/hwmon/steamdeck-hwmon.c b/drivers/hwmon/steamdeck-hwmon.c new file mode 100644 index 000000000000..fab9e9460bd4 @@ -609,10 +609,10 @@ diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig index 290186e44e6b..4d444a9e2c1f 100644 --- a/drivers/extcon/Kconfig +++ b/drivers/extcon/Kconfig -@@ -189,4 +189,11 @@ config EXTCON_USBC_TUSB320 - Say Y here to enable support for USB Type C cable detection extcon - support using a TUSB320. - +@@ -202,4 +202,11 @@ + The DHC (Digital Home Hub) RTD series SoC contains a type c module. + This driver will detect the status of the type-c port. + +config EXTCON_STEAMDECK + tristate "Steam Deck extcon support" + depends on MFD_STEAMDECK @@ -625,10 +625,10 @@ diff --git a/drivers/extcon/Makefile b/drivers/extcon/Makefile index 1b390d934ca9..1c7e217f29e4 100644 --- a/drivers/extcon/Makefile +++ b/drivers/extcon/Makefile -@@ -25,3 +25,4 @@ obj-$(CONFIG_EXTCON_SM5502) += extcon-sm5502.o - obj-$(CONFIG_EXTCON_USB_GPIO) += extcon-usb-gpio.o +@@ -26,3 +26,4 @@ obj-$(CONFIG_EXTCON_USBC_CROS_EC) += extcon-usbc-cros-ec.o obj-$(CONFIG_EXTCON_USBC_TUSB320) += extcon-usbc-tusb320.o + obj-$(CONFIG_EXTCON_RTK_TYPE_C) += extcon-rtk-type-c.o +obj-$(CONFIG_EXTCON_STEAMDECK) += extcon-steamdeck.o diff --git a/drivers/extcon/extcon-steamdeck.c b/drivers/extcon/extcon-steamdeck.c new file mode 100644 diff --git a/SOURCES/steamdeck-oled-audio.patch b/SOURCES/steamdeck-oled-audio.patch index 978e76a..eee4e1e 100644 --- a/SOURCES/steamdeck-oled-audio.patch +++ b/SOURCES/steamdeck-oled-audio.patch @@ -209,7 +209,7 @@ index a06af82b8..ae32748a5 100644 +static int acp_cs35l41_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ -+ struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); ++ struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); + struct snd_soc_card *card = rtd->card; + struct snd_soc_dai *codec_dai; + int ret, i; @@ -218,7 +218,7 @@ index a06af82b8..ae32748a5 100644 + + ret = 0; + for (i = 0; i < num_codecs; i++) { -+ codec_dai = asoc_rtd_to_codec(rtd, i); ++ codec_dai = snd_soc_rtd_to_codec(rtd, i); + if (strcmp(codec_dai->name, "cs35l41-pcm") == 0) { + switch (params_rate(params)) { + case 48000: @@ -326,7 +326,7 @@ index a06af82b8..ae32748a5 100644 + links[i].no_pcm = 1; + if (!drv_data->bt_codec_id) { + /* Use dummy codec if codec id not specified */ -+ links[i].codecs = &asoc_dummy_dlc; ++ links[i].codecs = &snd_soc_dummy_dlc; + links[i].num_codecs = 1; + } i++; @@ -344,14 +344,14 @@ index 2b3ec6594..6feef5a93 100644 DMIC_BE_ID, }; -@@ -41,6 +42,7 @@ enum codec_endpoints { +@@ -45,6 +45,7 @@ MAX98360A, RT5682S, NAU8825, + CS35L41, NAU8821, MAX98388, - }; + ES83XX, @@ -53,9 +55,11 @@ enum platform_end_point { struct acp_card_drvdata { unsigned int hs_cpu_id; diff --git a/SOURCES/steamdeck-oled-bt.patch b/SOURCES/steamdeck-oled-bt.patch deleted file mode 100644 index 20cf681..0000000 --- a/SOURCES/steamdeck-oled-bt.patch +++ /dev/null @@ -1,239 +0,0 @@ -From fca3761de38864b0422006aaaf9ce8e0aba5e316 Mon Sep 17 00:00:00 2001 -From: Thomas Crider -Date: Sat, 2 Dec 2023 05:07:16 -0500 -Subject: [PATCH] steamdeck-bt-unified - ---- - drivers/bluetooth/btqca.c | 78 +++++++++++++++++++++++++++++++++++++ - drivers/bluetooth/btqca.h | 3 ++ - drivers/bluetooth/hci_qca.c | 9 ++++- - net/bluetooth/hci_sync.c | 10 +++-- - 4 files changed, 95 insertions(+), 5 deletions(-) - -diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c -index 5a35ac413..de2195b72 100644 ---- a/drivers/bluetooth/btqca.c -+++ b/drivers/bluetooth/btqca.c -@@ -205,6 +205,48 @@ static int qca_send_reset(struct hci_dev *hdev) - return 0; - } - -+static int qca_read_fw_board_id(struct hci_dev *hdev, u16 *bid) -+{ -+ u8 cmd; -+ struct sk_buff *skb; -+ struct edl_event_hdr *edl; -+ int err = 0; -+ int bid_len; -+ -+ bt_dev_dbg(hdev, "QCA read board ID"); -+ -+ cmd = EDL_GET_BID_REQ_CMD; -+ skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, EDL_PATCH_CMD_LEN, -+ &cmd, 0, HCI_INIT_TIMEOUT); -+ if (IS_ERR(skb)) { -+ err = PTR_ERR(skb); -+ bt_dev_err(hdev, "Reading QCA board ID failed (%d)", err); -+ return err; -+ } -+ -+ edl = skb_pull_data(skb, sizeof(*edl)); -+ if (!edl) { -+ bt_dev_err(hdev, "QCA read board ID with no header"); -+ err = -EILSEQ; -+ goto out; -+ } -+ -+ if (edl->cresp != EDL_CMD_REQ_RES_EVT || -+ edl->rtype != EDL_GET_BID_REQ_CMD) { -+ bt_dev_err(hdev, "QCA Wrong packet: %d %d", edl->cresp, edl->rtype); -+ err = -EIO; -+ goto out; -+ } -+ -+ bid_len = edl->data[0]; -+ *bid = (edl->data[1] << 8) + edl->data[2]; -+ bt_dev_info(hdev, "%s: bid len = %x, bid = %x", __func__, bid_len, *bid); -+ -+out: -+ kfree_skb(skb); -+ return err; -+} -+ - int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) - { - struct sk_buff *skb; -@@ -574,6 +616,30 @@ int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr) - } - EXPORT_SYMBOL_GPL(qca_set_bdaddr_rome); - -+static void qca_generate_nvm_name(struct hci_dev *hdev, char *fwname, -+ size_t max_size, struct qca_btsoc_version ver, u16 bid) -+{ -+ u8 rom_ver = 0; -+ u32 soc_ver; -+ const char *variant; -+ -+ soc_ver = get_soc_ver(ver.soc_id, ver.rom_ver); -+ rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f); -+ -+ if ((ver.soc_id & 0x0000ff00) == QCA_HSP_GF_SOC_ID) /* hsp gf chip */ -+ variant = "g"; -+ else -+ variant = ""; -+ -+ if (bid == 0x0) -+ snprintf(fwname, max_size, "qca/hpnv%02x%s.bin", rom_ver, variant); -+ else -+ snprintf(fwname, max_size, "qca/hpnv%02x%s.%x", -+ rom_ver, variant, bid); -+ -+ bt_dev_info(hdev, "%s: nvm name is %s", __func__, fwname); -+} -+ - int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, - enum qca_btsoc_type soc_type, struct qca_btsoc_version ver, - const char *firmware_name) -@@ -582,6 +648,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, - int err; - u8 rom_ver = 0; - u32 soc_ver; -+ u16 boardid = 0; - - bt_dev_dbg(hdev, "QCA setup on UART"); - -@@ -605,6 +672,10 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, - /* Download rampatch file */ - config.type = TLV_TYPE_PATCH; - switch (soc_type) { -+ case QCA_QCA2066: -+ snprintf(config.fwname, sizeof(config.fwname), -+ "qca/hpbtfw%02x.tlv", rom_ver); -+ break; - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: -@@ -649,6 +720,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, - /* Give the controller some time to get ready to receive the NVM */ - msleep(10); - -+ if (soc_type == QCA_QCA2066) -+ qca_read_fw_board_id(hdev, &boardid); -+ - /* Download NVM configuration */ - config.type = TLV_TYPE_NVM; - if (firmware_name) { -@@ -656,6 +730,10 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, - "qca/%s", firmware_name); - } else { - switch (soc_type) { -+ case QCA_QCA2066: -+ qca_generate_nvm_name(hdev, config.fwname, sizeof(config.fwname), -+ ver, boardid); -+ break; - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: -diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h -index 03bff5c00..ffed9ea8a 100644 ---- a/drivers/bluetooth/btqca.h -+++ b/drivers/bluetooth/btqca.h -@@ -13,6 +13,7 @@ - #define EDL_PATCH_TLV_REQ_CMD (0x1E) - #define EDL_GET_BUILD_INFO_CMD (0x20) - #define EDL_NVM_ACCESS_SET_REQ_CMD (0x01) -+#define EDL_GET_BID_REQ_CMD (0x23) - #define EDL_PATCH_CONFIG_CMD (0x28) - #define MAX_SIZE_PER_TLV_SEGMENT (243) - #define QCA_PRE_SHUTDOWN_CMD (0xFC08) -@@ -48,6 +49,7 @@ - - #define QCA_FW_BUILD_VER_LEN 255 - -+#define QCA_HSP_GF_SOC_ID 0x1200 - - enum qca_baudrate { - QCA_BAUDRATE_115200 = 0, -@@ -146,6 +148,7 @@ enum qca_btsoc_type { - QCA_WCN3990, - QCA_WCN3998, - QCA_WCN3991, -+ QCA_QCA2066, - QCA_QCA6390, - QCA_WCN6750, - QCA_WCN6855, -diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c -index 4b57e15f9..891c25ffc 100644 ---- a/drivers/bluetooth/hci_qca.c -+++ b/drivers/bluetooth/hci_qca.c -@@ -1861,7 +1861,7 @@ static int qca_setup(struct hci_uart *hu) - break; - - default: -- soc_name = "ROME/QCA6390"; -+ soc_name = "ROME/QCA6390/QCA2066"; - } - bt_dev_info(hdev, "setting up %s", soc_name); - -@@ -1987,6 +1987,11 @@ static const struct hci_uart_proto qca_proto = { - .dequeue = qca_dequeue, - }; - -+static const struct qca_device_data qca_soc_data_qca2066 = { -+ .soc_type = QCA_QCA2066, -+ .num_vregs = 0, -+}; -+ - static const struct qca_device_data qca_soc_data_wcn3988 __maybe_unused = { - .soc_type = QCA_WCN3988, - .vregs = (struct qca_vreg []) { -@@ -2569,6 +2574,7 @@ static const struct of_device_id qca_bluetooth_of_match[] = { - { .compatible = "qcom,wcn6750-bt", .data = &qca_soc_data_wcn6750}, - { .compatible = "qcom,wcn6855-bt", .data = &qca_soc_data_wcn6855}, - { .compatible = "qcom,wcn7850-bt", .data = &qca_soc_data_wcn7850}, -+ { .compatible = "qcom,qca2066-bt", .data = &qca_soc_data_qca2066}, - { /* sentinel */ } - }; - MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match); -@@ -2580,6 +2586,7 @@ static const struct acpi_device_id qca_bluetooth_acpi_match[] = { - { "DLA16390", (kernel_ulong_t)&qca_soc_data_qca6390 }, - { "DLB16390", (kernel_ulong_t)&qca_soc_data_qca6390 }, - { "DLB26390", (kernel_ulong_t)&qca_soc_data_qca6390 }, -+ { "QCOM2066", (kernel_ulong_t)&qca_soc_data_qca2066 }, - { }, - }; - MODULE_DEVICE_TABLE(acpi, qca_bluetooth_acpi_match); -diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c -index 9e71362c0..ac5c0cafd 100644 ---- a/net/bluetooth/hci_sync.c -+++ b/net/bluetooth/hci_sync.c -@@ -3800,12 +3800,14 @@ static int hci_set_event_mask_sync(struct hci_dev *hdev) - if (lmp_bredr_capable(hdev)) { - events[4] |= 0x01; /* Flow Specification Complete */ - -- /* Don't set Disconnect Complete when suspended as that -- * would wakeup the host when disconnecting due to -+ /* Don't set Disconnect Complete and mode change when suspended -+ * as that would wakeup the host when disconnecting due to - * suspend. - */ -- if (hdev->suspended) -+ if (hdev->suspended){ - events[0] &= 0xef; -+ events[2] &= 0xf7; -+ } - } else { - /* Use a different default for LE-only devices */ - memset(events, 0, sizeof(events)); -@@ -5931,7 +5933,7 @@ int hci_suspend_sync(struct hci_dev *hdev) - - if (hci_conn_count(hdev)) { - /* Soft disconnect everything (power off) */ -- err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF); -+ err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_USER_TERM); - if (err) { - /* Set state to BT_RUNNING so resume doesn't notify */ - hdev->suspend_state = BT_RUNNING; --- -2.43.0 - diff --git a/SOURCES/steamdeck-oled-hw-quirks.patch b/SOURCES/steamdeck-oled-hw-quirks.patch index 644c270..28cb762 100644 --- a/SOURCES/steamdeck-oled-hw-quirks.patch +++ b/SOURCES/steamdeck-oled-hw-quirks.patch @@ -125,10 +125,10 @@ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdg index b8633df418d43..77a1bedaee98c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h -@@ -416,8 +416,6 @@ - struct drm_property *regamma_tf_property; +@@ -346,8 +346,6 @@ struct amdgpu_mode_info { + const enum drm_plane_type *plane_type; }; - + -#define AMDGPU_MAX_BL_LEVEL 0xFF - struct amdgpu_backlight_privdata { @@ -277,6 +277,89 @@ index e1a77a0d66336..8e61c86819fe2 100644 GitLab +From f1f63fbd6a31efad6165f4b35b20ba65f25f877b Mon Sep 17 00:00:00 2001 +From: Christian Marcheselli +Date: Thu, 23 Feb 2023 16:41:42 -0800 +Subject: [PATCH] Galileo-only workaround for backlight settings + +(cherry picked from commit 657d5054e6ed013000111db0cc2612f525d5e42d) +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +index e959aa28b019..ccda049be022 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -147,7 +147,7 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU); + #define PSP_FOOTER_BYTES 0x100 + + /* Maximum backlight level. */ +-#define AMDGPU_MAX_BL_LEVEL 0xFFFF ++#define AMDGPU_MAX_BL_LEVEL 0xFFF + + /** + * DOC: overview +@@ -4103,9 +4103,12 @@ static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *c + { + unsigned min, max; + +- if (!get_brightness_range(caps, &min, &max)) +- return brightness; ++ //if (!get_brightness_range(caps, &min, &max)) ++ // return brightness; ++ min = 0; ++ max = 500000; + ++ DRM_INFO("[%s, %d] brightness range %d - %d", __func__, __LINE__, min, max); + // Rescale 0..AMDGPU_MAX_BL_LEVEL to min..max + return min + DIV_ROUND_CLOSEST((max - min) * brightness, + AMDGPU_MAX_BL_LEVEL); +-- +GitLab + + +From 234d6d21b9eda7fba368e6423626db4bd04e4afd Mon Sep 17 00:00:00 2001 +From: "Pierre-Loup A. Griffais" +Date: Tue, 7 Nov 2023 16:57:15 -0800 +Subject: [PATCH] Revert "Galileo-only workaround for backlight settings" + +This reverts commit f1f63fbd6a31efad6165f4b35b20ba65f25f877b. +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +index b004154ba913..6d7df6ae890a 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -147,7 +147,7 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU); + #define PSP_FOOTER_BYTES 0x100 + + /* Maximum backlight level. */ +-#define AMDGPU_MAX_BL_LEVEL 0xFFF ++#define AMDGPU_MAX_BL_LEVEL 0xFFFF + + /** + * DOC: overview +@@ -4123,12 +4123,9 @@ static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *c + { + unsigned min, max; + +- //if (!get_brightness_range(caps, &min, &max)) +- // return brightness; +- min = 0; +- max = 500000; ++ if (!get_brightness_range(caps, &min, &max)) ++ return brightness; + +- DRM_INFO("[%s, %d] brightness range %d - %d", __func__, __LINE__, min, max); + // Rescale 0..AMDGPU_MAX_BL_LEVEL to min..max + return min + DIV_ROUND_CLOSEST((max - min) * brightness, + AMDGPU_MAX_BL_LEVEL); +-- +GitLab + From ab7d646eacf9f1c745d284e293211569a4428573 Mon Sep 17 00:00:00 2001 From: "Pierre-Loup A. Griffais" Date: Wed, 8 Nov 2023 19:45:52 -0800 @@ -352,18 +435,6 @@ index 30e7c627f21a7..472fa2c8ebcec 100644 /* * Synopsys USB 3.x host HAPS platform has a class code of -diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h -index b76ff08506181..95f33dadb2be2 100644 ---- a/include/linux/pci_ids.h -+++ b/include/linux/pci_ids.h -@@ -568,7 +568,6 @@ - #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3 - #define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb - #define PCI_DEVICE_ID_AMD_MI200_DF_F3 0x14d3 --#define PCI_DEVICE_ID_AMD_VANGOGH_USB 0x163a - #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 - #define PCI_DEVICE_ID_AMD_LANCE 0x2000 - #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 -- GitLab @@ -435,15 +506,6 @@ index b87797bc5874..28e6fa8d7860 100644 adev->family < AMDGPU_FAMILY_AI) { spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); mod_freesync_handle_v_update( -@@ -8098,7 +8098,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, - int planes_count = 0, vpos, hpos; - unsigned long flags; - u32 target_vblank, last_flip_vblank; -- bool vrr_active = amdgpu_dm_crtc_vrr_active(acrtc_state); -+ bool vrr_active = true;//amdgpu_dm_crtc_vrr_active(acrtc_state); - bool cursor_update = false; - bool pflip_present = false; - bool dirty_rects_changed = false; -- GitLab diff --git a/SOURCES/steamdeck-oled-wifi.patch b/SOURCES/steamdeck-oled-wifi.patch index a25f07d..65494c6 100644 --- a/SOURCES/steamdeck-oled-wifi.patch +++ b/SOURCES/steamdeck-oled-wifi.patch @@ -1,18 +1,19 @@ -From 01fd63d2e9b32cd917c9036dfb703b5c4bbd872d Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: "neil.shi" Date: Tue, 23 May 2023 16:58:08 +0800 Subject: [PATCH] wifi: ath11k: [DBS PATCH 1/6]: Indicate NAN support to firmware Signed-off-by: neil.shi +Signed-off-by: Jan200101 --- drivers/net/wireless/ath/ath11k/hw.c | 1 + drivers/net/wireless/ath/ath11k/wmi.c | 1 + - drivers/net/wireless/ath/ath11k/wmi.h | 19 +++++++++++++++++++ - 3 files changed, 21 insertions(+) + drivers/net/wireless/ath/ath11k/wmi.h | 10 ++++++++++ + 3 files changed, 12 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c -index dbcc0c4035b62..6309efe4b7c1f 100644 +index d7b5ec6e6904..210759ddf2fb 100644 --- a/drivers/net/wireless/ath/ath11k/hw.c +++ b/drivers/net/wireless/ath/ath11k/hw.c @@ -100,6 +100,7 @@ static void ath11k_init_wmi_config_qca6390(struct ath11k_base *ab, @@ -21,13 +22,13 @@ index dbcc0c4035b62..6309efe4b7c1f 100644 config->flag1 |= WMI_RSRC_CFG_FLAG1_BSS_CHANNEL_INFO_64; + config->host_service_flags |= WMI_RSRC_CFG_HOST_SERVICE_FLAG_NAN_IFACE_SUPPORT; } - + static void ath11k_hw_ipq8074_reo_setup(struct ath11k_base *ab) diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c -index 3e0a47f4a3ebd..64648e0d9484d 100644 +index 2845b4313d3a..e2dcdb3b78cc 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c -@@ -4069,6 +4132,7 @@ ath11k_wmi_copy_resource_config(struct wmi_resource_config *wmi_cfg, +@@ -4069,6 +4069,7 @@ ath11k_wmi_copy_resource_config(struct wmi_resource_config *wmi_cfg, wmi_cfg->sched_params = tg_cfg->sched_params; wmi_cfg->twt_ap_pdev_count = tg_cfg->twt_ap_pdev_count; wmi_cfg->twt_ap_sta_count = tg_cfg->twt_ap_sta_count; @@ -36,7 +37,7 @@ index 3e0a47f4a3ebd..64648e0d9484d 100644 ~(1 << WMI_CFG_HOST_SERVICE_FLAG_REG_CC_EXT); wmi_cfg->host_service_flags |= (tg_cfg->is_reg_cc_ext_event_supported << diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h -index 8f2c07d70a4a2..042c7b0d16631 100644 +index 100bb816b592..a8354022f575 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.h +++ b/drivers/net/wireless/ath/ath11k/wmi.h @@ -2330,6 +2330,7 @@ struct wmi_init_cmd { @@ -44,10 +45,10 @@ index 8f2c07d70a4a2..042c7b0d16631 100644 #define WMI_RSRC_CFG_FLAG2_CALC_NEXT_DTIM_COUNT_SET BIT(9) #define WMI_RSRC_CFG_FLAG1_ACK_RSSI BIT(18) +#define WMI_RSRC_CFG_HOST_SERVICE_FLAG_NAN_IFACE_SUPPORT BIT(0) - + #define WMI_CFG_HOST_SERVICE_FLAG_REG_CC_EXT 4 - -@@ -5700,6 +5710,15 @@ struct target_resource_config { + +@@ -5700,6 +5701,15 @@ struct target_resource_config { u8 is_reg_cc_ext_event_supported; u32 ema_max_vap_cnt; u32 ema_max_profile_period; @@ -61,17 +62,16 @@ index 8f2c07d70a4a2..042c7b0d16631 100644 + u32 flags2; + u32 host_service_flags; }; - + enum wmi_debug_log_param { --- -GitLab -From bc6d3226e567630188a41a78a12514c74babdea9 Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: "neil.shi" Date: Tue, 23 May 2023 17:01:06 +0800 Subject: [PATCH] wifi: ath11k: [DBS PATCH 2/6] wifi: ath11k: add support for QCA206X +Signed-off-by: Jan200101 --- drivers/net/wireless/ath/ath11k/core.c | 64 ++++++++++++++++++++++++++ drivers/net/wireless/ath/ath11k/core.h | 1 + @@ -80,13 +80,13 @@ Subject: [PATCH] wifi: ath11k: [DBS PATCH 2/6] wifi: ath11k: add support for 4 files changed, 82 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c -index 893fefadbba96..96ed5b7cd0048 100644 +index 0c6ecbb9a066..766cf2db5b45 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c -@@ -394,6 +394,70 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { +@@ -411,6 +411,70 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .fragment_160mhz = false, }, - + + .interface_modes = BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_AP), + .supports_monitor = false, @@ -155,22 +155,22 @@ index 893fefadbba96..96ed5b7cd0048 100644 BIT(NL80211_IFTYPE_AP), .supports_monitor = false, diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h -index bd06536f82a64..ab120329619c0 100644 +index 667d55e26156..0c0960994231 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h -@@ -144,6 +144,7 @@ enum ath11k_hw_rev { +@@ -147,6 +147,7 @@ enum ath11k_hw_rev { ATH11K_HW_WCN6855_HW21, ATH11K_HW_WCN6750_HW10, ATH11K_HW_IPQ5018_HW10, + ATH11K_HW_QCA206X_HW21, }; - + enum ath11k_firmware_mode { diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c -index a62ee05c54097..c76f665dc369d 100644 +index afeabd6ecc67..92917d842d57 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c -@@ -434,6 +434,7 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci) +@@ -443,6 +443,7 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci) case ATH11K_HW_QCA6390_HW20: case ATH11K_HW_WCN6855_HW20: case ATH11K_HW_WCN6855_HW21: @@ -179,19 +179,19 @@ index a62ee05c54097..c76f665dc369d 100644 break; default: diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c -index 3c6005ab9a717..93dd259bd85ad 100644 +index 09e65c5e55c4..2fa4a99e7b08 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c -@@ -27,6 +27,8 @@ +@@ -28,6 +28,8 @@ #define QCN9074_DEVICE_ID 0x1104 #define WCN6855_DEVICE_ID 0x1103 - + +#define SUB_VERSION 0x1910010 + static const struct pci_device_id ath11k_pci_id_table[] = { { PCI_VDEVICE(QCOM, QCA6390_DEVICE_ID) }, { PCI_VDEVICE(QCOM, WCN6855_DEVICE_ID) }, -@@ -806,7 +808,19 @@ static int ath11k_pci_probe(struct pci_dev *pdev, +@@ -809,7 +811,19 @@ static int ath11k_pci_probe(struct pci_dev *pdev, break; case 0x10: case 0x11: @@ -212,18 +212,16 @@ index 3c6005ab9a717..93dd259bd85ad 100644 break; default: goto unsupported_wcn6855_soc; -@@ -1017,6 +1031,7 @@ static struct pci_driver ath11k_pci_driver = { +@@ -1021,6 +1035,7 @@ static struct pci_driver ath11k_pci_driver = { static int ath11k_pci_init(void) { int ret; + u32 sub_version; - + ret = pci_register_driver(&ath11k_pci_driver); if (ret) --- -GitLab -From 707933ef2a20db8f7c3d9d3c654a8dcb2f582436 Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: "neil.shi" Date: Tue, 23 May 2023 17:04:27 +0800 Subject: [PATCH] wifi: ath11k: [DBS PATCH 3/6]: support 2 stations and report @@ -234,18 +232,19 @@ num_rxmda_per_pdev in hw_params, and report addresses for these interfaces. Signed-off-by: neil.shi +Signed-off-by: Jan200101 --- drivers/net/wireless/ath/ath11k/mac.c | 83 ++++++++++++++++++++------- 1 file changed, 63 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c -index cb77dd6ce9665..c7fa31deefacd 100644 +index 71c6dab1aedb..967dbe3cfe94 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c -@@ -8774,6 +8774,31 @@ static int ath11k_mac_setup_channels_rates(struct ath11k *ar, +@@ -9284,6 +9284,31 @@ static int ath11k_mac_setup_channels_rates(struct ath11k *ar, return 0; } - + +static void ath11k_mac_setup_mac_address_list(struct ath11k *ar) +{ + struct mac_address *addresses; @@ -274,10 +273,10 @@ index cb77dd6ce9665..c7fa31deefacd 100644 static int ath11k_mac_setup_iface_combinations(struct ath11k *ar) { struct ath11k_base *ab = ar->ab; -@@ -8793,28 +8818,43 @@ static int ath11k_mac_setup_iface_combinations(struct ath11k *ar) +@@ -9303,28 +9328,43 @@ static int ath11k_mac_setup_iface_combinations(struct ath11k *ar) return -ENOMEM; } - + - limits[0].max = 1; - limits[0].types |= BIT(NL80211_IFTYPE_STATION); - @@ -286,13 +285,13 @@ index cb77dd6ce9665..c7fa31deefacd 100644 + if (ab->hw_params.single_pdev_only && ar->ab->hw_params.num_rxmda_per_pdev > 1) { + limits[0].max = 2; + limits[0].types |= BIT(NL80211_IFTYPE_STATION); - + - if (IS_ENABLED(CONFIG_MAC80211_MESH) && - ab->hw_params.interface_modes & BIT(NL80211_IFTYPE_MESH_POINT)) - limits[1].types |= BIT(NL80211_IFTYPE_MESH_POINT); + limits[1].max = 1; + limits[1].types |= BIT(NL80211_IFTYPE_AP); - + - combinations[0].limits = limits; - combinations[0].n_limits = n_limits; - combinations[0].max_interfaces = 16; @@ -335,56 +334,47 @@ index cb77dd6ce9665..c7fa31deefacd 100644 + BIT(NL80211_CHAN_WIDTH_80P80) | + BIT(NL80211_CHAN_WIDTH_160); + } - + ar->hw->wiphy->iface_combinations = combinations; ar->hw->wiphy->n_iface_combinations = 1; -@@ -8875,6 +8915,8 @@ static void __ath11k_mac_unregister(struct ath11k *ar) +@@ -9389,6 +9429,8 @@ static void __ath11k_mac_unregister(struct ath11k *ar) kfree(ar->hw->wiphy->iface_combinations[0].limits); kfree(ar->hw->wiphy->iface_combinations); - + + kfree(ar->hw->wiphy->addresses); + SET_IEEE80211_DEV(ar->hw, NULL); } - -@@ -8917,6 +8959,7 @@ static int __ath11k_mac_register(struct ath11k *ar) + +@@ -9431,6 +9473,7 @@ static int __ath11k_mac_register(struct ath11k *ar) ath11k_pdev_caps_update(ar); - + SET_IEEE80211_PERM_ADDR(ar->hw, ar->mac_addr); + ath11k_mac_setup_mac_address_list(ar); - + SET_IEEE80211_DEV(ar->hw, ab->dev); + --- -GitLab - -From 6591470d389d674f100568393112c169841db26f Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: "neil.shi" Date: Tue, 23 May 2023 17:07:21 +0800 Subject: [PATCH] wifi: ath11k: [DBS PATCH 6/6]: send coex config to firmware for QCA206X Signed-off-by: neil.shi +Signed-off-by: Jan200101 --- - drivers/net/wireless/ath/ath11k/core.c | 27 +++++++++ + drivers/net/wireless/ath/ath11k/core.c | 26 +++++++++ drivers/net/wireless/ath/ath11k/hw.h | 1 + drivers/net/wireless/ath/ath11k/wmi.c | 26 +++++++++ drivers/net/wireless/ath/ath11k/wmi.h | 77 ++++++++++++++++++++++++++ - 4 files changed, 131 insertions(+) + 4 files changed, 130 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c -index 96ed5b7cd0048..849c7c12198e0 100644 +index 766cf2db5b45..e3c55b1e792e 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c -@@ -16,6 +16,7 @@ - #include "debug.h" - #include "hif.h" - #include "wow.h" -+#include "wmi.h" - - unsigned int ath11k_debug_mask; - EXPORT_SYMBOL(ath11k_debug_mask); -@@ -115,6 +116,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { +@@ -122,6 +122,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .tcl_ring_retry = true, .tx_ring_size = DP_TCL_DATA_RING_SIZE, .smp2p_wow_exit = false, @@ -392,7 +382,7 @@ index 96ed5b7cd0048..849c7c12198e0 100644 }, { .hw_rev = ATH11K_HW_IPQ6018_HW10, -@@ -204,6 +206,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { +@@ -205,6 +206,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .tx_ring_size = DP_TCL_DATA_RING_SIZE, .smp2p_wow_exit = false, .support_fw_mac_sequence = false, @@ -400,7 +390,7 @@ index 96ed5b7cd0048..849c7c12198e0 100644 }, { .name = "qca6390 hw2.0", -@@ -371,6 +374,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { +@@ -372,6 +374,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .tx_ring_size = DP_TCL_DATA_RING_SIZE, .smp2p_wow_exit = false, .support_fw_mac_sequence = false, @@ -408,7 +398,7 @@ index 96ed5b7cd0048..849c7c12198e0 100644 }, { .name = "wcn6855 hw2.0", -@@ -418,6 +422,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { +@@ -435,6 +438,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .fw_wmi_diag_event = true, .current_cc_support = true, .dbr_debug_support = false, @@ -416,7 +406,7 @@ index 96ed5b7cd0048..849c7c12198e0 100644 }, { .name = "qca206x hw2.1", -@@ -456,6 +525,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { +@@ -521,6 +525,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .tx_ring_size = DP_TCL_DATA_RING_SIZE, .smp2p_wow_exit = false, .support_fw_mac_sequence = true, @@ -424,10 +414,10 @@ index 96ed5b7cd0048..849c7c12198e0 100644 }, { .name = "wcn6855 hw2.1", -@@ -1448,6 +1454,18 @@ static void ath11k_core_pdev_destroy(struct ath11k_base *ab) +@@ -1637,6 +1642,18 @@ static void ath11k_core_pdev_destroy(struct ath11k_base *ab) ath11k_debugfs_pdev_destroy(ab); } - + +static int ath11k_core_config_coex_isolation(struct ath11k_base *ab) +{ + struct ath11k *ar = ath11k_ab_to_ar(ab, 0); @@ -443,10 +433,10 @@ index 96ed5b7cd0048..849c7c12198e0 100644 static int ath11k_core_start(struct ath11k_base *ab) { int ret; -@@ -1545,6 +1563,15 @@ static int ath11k_core_start(struct ath11k_base *ab) +@@ -1734,6 +1751,15 @@ static int ath11k_core_start(struct ath11k_base *ab) goto err_reo_cleanup; } - + + if (ab->hw_params.coex_isolation) { + ret = ath11k_core_config_coex_isolation(ab); + if (ret) { @@ -457,13 +447,13 @@ index 96ed5b7cd0048..849c7c12198e0 100644 + } + return 0; - + err_reo_cleanup: diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h -index 8a3f24862edc4..4da64301d2974 100644 +index d51a99669dd6..9b96ce7f6538 100644 --- a/drivers/net/wireless/ath/ath11k/hw.h +++ b/drivers/net/wireless/ath/ath11k/hw.h -@@ -200,6 +200,7 @@ struct ath11k_hw_params { +@@ -206,6 +206,7 @@ struct ath11k_hw_params { bool fw_wmi_diag_event; bool current_cc_support; bool dbr_debug_support; @@ -472,10 +462,10 @@ index 8a3f24862edc4..4da64301d2974 100644 const struct cfg80211_sar_capa *bios_sar_capa; bool m3_fw_support; diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c -index 9037919a3ae90..5050c0dfe2508 100644 +index e2dcdb3b78cc..cf292442fab4 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c -@@ -8880,6 +8880,32 @@ +@@ -8880,6 +8880,32 @@ ath11k_wmi_send_unit_test_cmd(struct ath11k *ar, return ret; } @@ -509,11 +499,11 @@ index 9037919a3ae90..5050c0dfe2508 100644 { struct ath11k_vif *arvif; diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h -index 373d38538db0c..d63073eaaec3d 100644 +index a8354022f575..a18723fe9aae 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.h +++ b/drivers/net/wireless/ath/ath11k/wmi.h -@@ -6322,6 +6323,82 @@ enum wmi_sta_keepalive_method { - +@@ -6332,6 +6332,82 @@ enum wmi_sta_keepalive_method { + const void **ath11k_wmi_tlv_parse_alloc(struct ath11k_base *ab, const void *ptr, size_t len, gfp_t gfp); +enum wmi_coex_config_type { @@ -595,7 +585,7 @@ index 373d38538db0c..d63073eaaec3d 100644 int ath11k_wmi_cmd_send(struct ath11k_pdev_wmi *wmi, struct sk_buff *skb, u32 cmd_id); struct sk_buff *ath11k_wmi_alloc_skb(struct ath11k_wmi_base *wmi_sc, u32 len); -@@ -6171,6 +6247,7 @@ int ath11k_wmi_scan_prob_req_oui(struct ath11k *ar, +@@ -6493,6 +6569,7 @@ int ath11k_wmi_scan_prob_req_oui(struct ath11k *ar, const u8 mac_addr[ETH_ALEN]); int ath11k_wmi_fw_dbglog_cfg(struct ath11k *ar, u32 *module_id_bitmap, struct ath11k_fw_dbglog *dbglog); @@ -603,32 +593,28 @@ index 373d38538db0c..d63073eaaec3d 100644 int ath11k_wmi_wow_config_pno(struct ath11k *ar, u32 vdev_id, struct wmi_pno_scan_req *pno_scan); int ath11k_wmi_wow_del_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id); --- -GitLab - -From 0f02da05404b27449b01cc3b3a992dcb6f795287 Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: "neil.shi" Date: Tue, 23 May 2023 17:41:00 +0800 Subject: [PATCH] wifi: ath11k: merge all patches to 6.1.11, fix exceptions and compilation errors Signed-off-by: neil.shi +Signed-off-by: Jan200101 --- - drivers/net/wireless/ath/ath11k/debugfs.c | 1 + - drivers/net/wireless/ath/ath11k/hw.h | 6 ++++ - drivers/net/wireless/ath/ath11k/pci.c | 35 ++++++++++++++++------- - drivers/net/wireless/ath/ath11k/pcic.c | 11 +++++++ - 4 files changed, 43 insertions(+), 10 deletions(-) + drivers/net/wireless/ath/ath11k/pci.c | 35 ++++++++++++++++++-------- + drivers/net/wireless/ath/ath11k/pcic.c | 11 ++++++++ + 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c -index 61c8840a0559..798c1010b215 100644 +index 2fa4a99e7b08..88f11f6e47b7 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c -@@ -228,7 +228,12 @@ static u32 ath11k_pci_window_read32(struct ath11k_base *ab, u32 offset) +@@ -110,7 +110,12 @@ static u32 ath11k_pci_window_read32(struct ath11k_base *ab, u32 offset) struct ath11k_pci *ab_pci = ath11k_pci_priv(ab); u32 window_start, val; - + - window_start = ath11k_pci_get_window_start(ab, offset); + if (ab->hw_params.static_window_map) + window_start = ath11k_pci_get_window_start(ab, offset); @@ -636,19 +622,19 @@ index 61c8840a0559..798c1010b215 100644 + window_start = ATH11K_PCI_WINDOW_START; + + //window_start = ath11k_pci_get_window_start(ab, offset); - + if (window_start == ATH11K_PCI_WINDOW_START) { spin_lock_bh(&ab_pci->window_lock); -@@ -852,6 +857,8 @@ static int ath11k_pci_probe(struct pci_dev *pdev, +@@ -735,6 +740,8 @@ static int ath11k_pci_probe(struct pci_dev *pdev, u32 soc_hw_version_major, soc_hw_version_minor, addr; const struct ath11k_pci_ops *pci_ops; int ret; + u32 sub_version; + int ops_init = 0; - + ab = ath11k_core_alloc(&pdev->dev, sizeof(*ab_pci), ATH11K_BUS_PCI); - -@@ -899,8 +906,8 @@ static int ath11k_pci_probe(struct pci_dev *pdev, + +@@ -781,8 +788,8 @@ static int ath11k_pci_probe(struct pci_dev *pdev, case QCA6390_DEVICE_ID: ath11k_pci_read_hw_version(ab, &soc_hw_version_major, &soc_hw_version_minor); @@ -659,7 +645,7 @@ index 61c8840a0559..798c1010b215 100644 ab->hw_rev = ATH11K_HW_QCA6390_HW20; break; default: -@@ -920,6 +927,13 @@ static int ath11k_pci_probe(struct pci_dev *pdev, +@@ -802,6 +809,13 @@ static int ath11k_pci_probe(struct pci_dev *pdev, ab->id.bdf_search = ATH11K_BDF_SEARCH_BUS_AND_BOARD; ath11k_pci_read_hw_version(ab, &soc_hw_version_major, &soc_hw_version_minor); @@ -673,7 +659,7 @@ index 61c8840a0559..798c1010b215 100644 switch (soc_hw_version_major) { case 2: switch (soc_hw_version_minor) { -@@ -930,7 +944,7 @@ static int ath11k_pci_probe(struct pci_dev *pdev, +@@ -812,7 +826,7 @@ static int ath11k_pci_probe(struct pci_dev *pdev, case 0x10: case 0x11: //ab->hw_rev = ATH11K_HW_WCN6855_HW21; @@ -682,18 +668,18 @@ index 61c8840a0559..798c1010b215 100644 ath11k_dbg(ab, ATH11K_DBG_PCI, "sub_version 0x%x\n", sub_version); switch (sub_version) { case 0x1019A0E1: -@@ -955,7 +969,6 @@ static int ath11k_pci_probe(struct pci_dev *pdev, +@@ -837,7 +851,6 @@ static int ath11k_pci_probe(struct pci_dev *pdev, goto err_pci_free_region; } - + - pci_ops = &ath11k_pci_ops_qca6390; break; default: dev_err(&pdev->dev, "Unknown PCI device found: 0x%x\n", -@@ -964,11 +977,13 @@ static int ath11k_pci_probe(struct pci_dev *pdev, +@@ -846,11 +859,13 @@ static int ath11k_pci_probe(struct pci_dev *pdev, goto err_pci_free_region; } - + - ret = ath11k_pcic_register_pci_ops(ab, pci_ops); - if (ret) { - ath11k_err(ab, "failed to register PCI ops: %d\n", ret); @@ -706,11 +692,11 @@ index 61c8840a0559..798c1010b215 100644 + goto err_pci_free_region; + } + } - + ret = ath11k_pcic_init_msi_config(ab); if (ret) { diff --git a/drivers/net/wireless/ath/ath11k/pcic.c b/drivers/net/wireless/ath/ath11k/pcic.c -index 063e97815455..82233e30c835 100644 +index 16d1e332193f..406eef086836 100644 --- a/drivers/net/wireless/ath/ath11k/pcic.c +++ b/drivers/net/wireless/ath/ath11k/pcic.c @@ -115,6 +115,17 @@ static const struct ath11k_msi_config ath11k_msi_config[] = { @@ -729,21 +715,21 @@ index 063e97815455..82233e30c835 100644 + .hw_rev = ATH11K_HW_QCA206X_HW21, + }, }; - + int ath11k_pcic_init_msi_config(struct ath11k_base *ab) --- -GitLab -From d4d6f1583876b3702603939ac41b98498cf6dd10 Mon Sep 17 00:00:00 2001 + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Thomas Crider Date: Thu, 7 Dec 2023 17:22:18 -0500 Subject: [PATCH] wifi-fixup +Signed-off-by: Jan200101 --- drivers/net/wireless/ath/ath11k/core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c -index 77daa0882..765b91e8d 100644 +index e3c55b1e792e..b286707af6e2 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -420,7 +420,8 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { @@ -774,6 +760,3 @@ index 77daa0882..765b91e8d 100644 .single_pdev_only = true, .rxdma1_enable = false, .num_rxmda_per_pdev = 2, --- -2.43.0 - diff --git a/SOURCES/t2linux.patch b/SOURCES/t2linux.patch new file mode 100644 index 0000000..ba58dde --- /dev/null +++ b/SOURCES/t2linux.patch @@ -0,0 +1,12153 @@ +From 80093f92d42d77f27de6b204550baf4622070732 Mon Sep 17 00:00:00 2001 +From: Aditya Garg +Date: Tue, 12 Sep 2023 12:26:12 +0530 +Subject: [PATCH] Add apple-bce driver + +--- + drivers/staging/apple-bce/Makefile | 28 + + drivers/staging/apple-bce/apple_bce.c | 443 ++++++++++ + drivers/staging/apple-bce/apple_bce.h | 38 + + drivers/staging/apple-bce/audio/audio.c | 711 ++++++++++++++++ + drivers/staging/apple-bce/audio/audio.h | 123 +++ + drivers/staging/apple-bce/audio/description.h | 42 + + drivers/staging/apple-bce/audio/pcm.c | 308 +++++++ + drivers/staging/apple-bce/audio/pcm.h | 16 + + drivers/staging/apple-bce/audio/protocol.c | 347 ++++++++ + drivers/staging/apple-bce/audio/protocol.h | 147 ++++ + .../staging/apple-bce/audio/protocol_bce.c | 226 ++++++ + .../staging/apple-bce/audio/protocol_bce.h | 72 ++ + drivers/staging/apple-bce/mailbox.c | 151 ++++ + drivers/staging/apple-bce/mailbox.h | 53 ++ + drivers/staging/apple-bce/queue.c | 390 +++++++++ + drivers/staging/apple-bce/queue.h | 177 ++++ + drivers/staging/apple-bce/queue_dma.c | 220 +++++ + drivers/staging/apple-bce/queue_dma.h | 50 ++ + drivers/staging/apple-bce/vhci/command.h | 204 +++++ + drivers/staging/apple-bce/vhci/queue.c | 268 +++++++ + drivers/staging/apple-bce/vhci/queue.h | 76 ++ + drivers/staging/apple-bce/vhci/transfer.c | 661 +++++++++++++++ + drivers/staging/apple-bce/vhci/transfer.h | 71 ++ + drivers/staging/apple-bce/vhci/vhci.c | 759 ++++++++++++++++++ + drivers/staging/apple-bce/vhci/vhci.h | 48 ++ + 25 files changed, 5629 insertions(+) + create mode 100644 drivers/staging/apple-bce/Makefile + create mode 100644 drivers/staging/apple-bce/apple_bce.c + create mode 100644 drivers/staging/apple-bce/apple_bce.h + create mode 100644 drivers/staging/apple-bce/audio/audio.c + create mode 100644 drivers/staging/apple-bce/audio/audio.h + create mode 100644 drivers/staging/apple-bce/audio/description.h + create mode 100644 drivers/staging/apple-bce/audio/pcm.c + create mode 100644 drivers/staging/apple-bce/audio/pcm.h + create mode 100644 drivers/staging/apple-bce/audio/protocol.c + create mode 100644 drivers/staging/apple-bce/audio/protocol.h + create mode 100644 drivers/staging/apple-bce/audio/protocol_bce.c + create mode 100644 drivers/staging/apple-bce/audio/protocol_bce.h + create mode 100644 drivers/staging/apple-bce/mailbox.c + create mode 100644 drivers/staging/apple-bce/mailbox.h + create mode 100644 drivers/staging/apple-bce/queue.c + create mode 100644 drivers/staging/apple-bce/queue.h + create mode 100644 drivers/staging/apple-bce/queue_dma.c + create mode 100644 drivers/staging/apple-bce/queue_dma.h + create mode 100644 drivers/staging/apple-bce/vhci/command.h + create mode 100644 drivers/staging/apple-bce/vhci/queue.c + create mode 100644 drivers/staging/apple-bce/vhci/queue.h + create mode 100644 drivers/staging/apple-bce/vhci/transfer.c + create mode 100644 drivers/staging/apple-bce/vhci/transfer.h + create mode 100644 drivers/staging/apple-bce/vhci/vhci.c + create mode 100644 drivers/staging/apple-bce/vhci/vhci.h + +diff --git a/drivers/staging/apple-bce/Makefile b/drivers/staging/apple-bce/Makefile +new file mode 100644 +index 000000000..a6a656f06 +--- /dev/null ++++ b/drivers/staging/apple-bce/Makefile +@@ -0,0 +1,28 @@ ++modname := apple-bce ++obj-m += $(modname).o ++ ++apple-bce-objs := apple_bce.o mailbox.o queue.o queue_dma.o vhci/vhci.o vhci/queue.o vhci/transfer.o audio/audio.o audio/protocol.o audio/protocol_bce.o audio/pcm.o ++ ++MY_CFLAGS += -DWITHOUT_NVME_PATCH ++#MY_CFLAGS += -g -DDEBUG ++ccflags-y += ${MY_CFLAGS} ++CC += ${MY_CFLAGS} ++ ++KVERSION := $(KERNELRELEASE) ++ifeq ($(origin KERNELRELEASE), undefined) ++KVERSION := $(shell uname -r) ++endif ++ ++KDIR := /lib/modules/$(KVERSION)/build ++PWD := $(shell pwd) ++ ++.PHONY: all ++ ++all: ++ $(MAKE) -C $(KDIR) M=$(PWD) modules ++ ++clean: ++ $(MAKE) -C $(KDIR) M=$(PWD) clean ++ ++install: ++ $(MAKE) -C $(KDIR) M=$(PWD) modules_install +diff --git a/drivers/staging/apple-bce/apple_bce.c b/drivers/staging/apple-bce/apple_bce.c +new file mode 100644 +index 000000000..ad89632df +--- /dev/null ++++ b/drivers/staging/apple-bce/apple_bce.c +@@ -0,0 +1,443 @@ ++#include "apple_bce.h" ++#include ++#include ++#include "audio/audio.h" ++#include ++ ++static dev_t bce_chrdev; ++static struct class *bce_class; ++ ++struct apple_bce_device *global_bce; ++ ++static int bce_create_command_queues(struct apple_bce_device *bce); ++static void bce_free_command_queues(struct apple_bce_device *bce); ++static irqreturn_t bce_handle_mb_irq(int irq, void *dev); ++static irqreturn_t bce_handle_dma_irq(int irq, void *dev); ++static int bce_fw_version_handshake(struct apple_bce_device *bce); ++static int bce_register_command_queue(struct apple_bce_device *bce, struct bce_queue_memcfg *cfg, int is_sq); ++ ++static int apple_bce_probe(struct pci_dev *dev, const struct pci_device_id *id) ++{ ++ struct apple_bce_device *bce = NULL; ++ int status = 0; ++ int nvec; ++ ++ pr_info("apple-bce: capturing our device\n"); ++ ++ if (pci_enable_device(dev)) ++ return -ENODEV; ++ if (pci_request_regions(dev, "apple-bce")) { ++ status = -ENODEV; ++ goto fail; ++ } ++ pci_set_master(dev); ++ nvec = pci_alloc_irq_vectors(dev, 1, 8, PCI_IRQ_MSI); ++ if (nvec < 5) { ++ status = -EINVAL; ++ goto fail; ++ } ++ ++ bce = kzalloc(sizeof(struct apple_bce_device), GFP_KERNEL); ++ if (!bce) { ++ status = -ENOMEM; ++ goto fail; ++ } ++ ++ bce->pci = dev; ++ pci_set_drvdata(dev, bce); ++ ++ bce->devt = bce_chrdev; ++ bce->dev = device_create(bce_class, &dev->dev, bce->devt, NULL, "apple-bce"); ++ if (IS_ERR_OR_NULL(bce->dev)) { ++ status = PTR_ERR(bce_class); ++ goto fail; ++ } ++ ++ bce->reg_mem_mb = pci_iomap(dev, 4, 0); ++ bce->reg_mem_dma = pci_iomap(dev, 2, 0); ++ ++ if (IS_ERR_OR_NULL(bce->reg_mem_mb) || IS_ERR_OR_NULL(bce->reg_mem_dma)) { ++ dev_warn(&dev->dev, "apple-bce: Failed to pci_iomap required regions\n"); ++ goto fail; ++ } ++ ++ bce_mailbox_init(&bce->mbox, bce->reg_mem_mb); ++ bce_timestamp_init(&bce->timestamp, bce->reg_mem_mb); ++ ++ spin_lock_init(&bce->queues_lock); ++ ida_init(&bce->queue_ida); ++ ++ if ((status = pci_request_irq(dev, 0, bce_handle_mb_irq, NULL, dev, "bce_mbox"))) ++ goto fail; ++ if ((status = pci_request_irq(dev, 4, NULL, bce_handle_dma_irq, dev, "bce_dma"))) ++ goto fail_interrupt_0; ++ ++ if ((status = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(37)))) { ++ dev_warn(&dev->dev, "dma: Setting mask failed\n"); ++ goto fail_interrupt; ++ } ++ ++ /* Gets the function 0's interface. This is needed because Apple only accepts DMA on our function if function 0 ++ is a bus master, so we need to work around this. */ ++ bce->pci0 = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); ++#ifndef WITHOUT_NVME_PATCH ++ if ((status = pci_enable_device_mem(bce->pci0))) { ++ dev_warn(&dev->dev, "apple-bce: failed to enable function 0\n"); ++ goto fail_dev0; ++ } ++#endif ++ pci_set_master(bce->pci0); ++ ++ bce_timestamp_start(&bce->timestamp, true); ++ ++ if ((status = bce_fw_version_handshake(bce))) ++ goto fail_ts; ++ pr_info("apple-bce: handshake done\n"); ++ ++ if ((status = bce_create_command_queues(bce))) { ++ pr_info("apple-bce: Creating command queues failed\n"); ++ goto fail_ts; ++ } ++ ++ global_bce = bce; ++ ++ bce_vhci_create(bce, &bce->vhci); ++ ++ return 0; ++ ++fail_ts: ++ bce_timestamp_stop(&bce->timestamp); ++#ifndef WITHOUT_NVME_PATCH ++ pci_disable_device(bce->pci0); ++fail_dev0: ++#endif ++ pci_dev_put(bce->pci0); ++fail_interrupt: ++ pci_free_irq(dev, 4, dev); ++fail_interrupt_0: ++ pci_free_irq(dev, 0, dev); ++fail: ++ if (bce && bce->dev) { ++ device_destroy(bce_class, bce->devt); ++ ++ if (!IS_ERR_OR_NULL(bce->reg_mem_mb)) ++ pci_iounmap(dev, bce->reg_mem_mb); ++ if (!IS_ERR_OR_NULL(bce->reg_mem_dma)) ++ pci_iounmap(dev, bce->reg_mem_dma); ++ ++ kfree(bce); ++ } ++ ++ pci_free_irq_vectors(dev); ++ pci_release_regions(dev); ++ pci_disable_device(dev); ++ ++ if (!status) ++ status = -EINVAL; ++ return status; ++} ++ ++static int bce_create_command_queues(struct apple_bce_device *bce) ++{ ++ int status; ++ struct bce_queue_memcfg *cfg; ++ ++ bce->cmd_cq = bce_alloc_cq(bce, 0, 0x20); ++ bce->cmd_cmdq = bce_alloc_cmdq(bce, 1, 0x20); ++ if (bce->cmd_cq == NULL || bce->cmd_cmdq == NULL) { ++ status = -ENOMEM; ++ goto err; ++ } ++ bce->queues[0] = (struct bce_queue *) bce->cmd_cq; ++ bce->queues[1] = (struct bce_queue *) bce->cmd_cmdq->sq; ++ ++ cfg = kzalloc(sizeof(struct bce_queue_memcfg), GFP_KERNEL); ++ if (!cfg) { ++ status = -ENOMEM; ++ goto err; ++ } ++ bce_get_cq_memcfg(bce->cmd_cq, cfg); ++ if ((status = bce_register_command_queue(bce, cfg, false))) ++ goto err; ++ bce_get_sq_memcfg(bce->cmd_cmdq->sq, bce->cmd_cq, cfg); ++ if ((status = bce_register_command_queue(bce, cfg, true))) ++ goto err; ++ kfree(cfg); ++ ++ return 0; ++ ++err: ++ if (bce->cmd_cq) ++ bce_free_cq(bce, bce->cmd_cq); ++ if (bce->cmd_cmdq) ++ bce_free_cmdq(bce, bce->cmd_cmdq); ++ return status; ++} ++ ++static void bce_free_command_queues(struct apple_bce_device *bce) ++{ ++ bce_free_cq(bce, bce->cmd_cq); ++ bce_free_cmdq(bce, bce->cmd_cmdq); ++ bce->cmd_cq = NULL; ++ bce->queues[0] = NULL; ++} ++ ++static irqreturn_t bce_handle_mb_irq(int irq, void *dev) ++{ ++ struct apple_bce_device *bce = pci_get_drvdata(dev); ++ bce_mailbox_handle_interrupt(&bce->mbox); ++ return IRQ_HANDLED; ++} ++ ++static irqreturn_t bce_handle_dma_irq(int irq, void *dev) ++{ ++ int i; ++ struct apple_bce_device *bce = pci_get_drvdata(dev); ++ spin_lock(&bce->queues_lock); ++ for (i = 0; i < BCE_MAX_QUEUE_COUNT; i++) ++ if (bce->queues[i] && bce->queues[i]->type == BCE_QUEUE_CQ) ++ bce_handle_cq_completions(bce, (struct bce_queue_cq *) bce->queues[i]); ++ spin_unlock(&bce->queues_lock); ++ return IRQ_HANDLED; ++} ++ ++static int bce_fw_version_handshake(struct apple_bce_device *bce) ++{ ++ u64 result; ++ int status; ++ ++ if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_SET_FW_PROTOCOL_VERSION, BC_PROTOCOL_VERSION), ++ &result))) ++ return status; ++ if (BCE_MB_TYPE(result) != BCE_MB_SET_FW_PROTOCOL_VERSION || ++ BCE_MB_VALUE(result) != BC_PROTOCOL_VERSION) { ++ pr_err("apple-bce: FW version handshake failed %x:%llx\n", BCE_MB_TYPE(result), BCE_MB_VALUE(result)); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static int bce_register_command_queue(struct apple_bce_device *bce, struct bce_queue_memcfg *cfg, int is_sq) ++{ ++ int status; ++ int cmd_type; ++ u64 result; ++ // OS X uses an bidirectional direction, but that's not really needed ++ dma_addr_t a = dma_map_single(&bce->pci->dev, cfg, sizeof(struct bce_queue_memcfg), DMA_TO_DEVICE); ++ if (dma_mapping_error(&bce->pci->dev, a)) ++ return -ENOMEM; ++ cmd_type = is_sq ? BCE_MB_REGISTER_COMMAND_SQ : BCE_MB_REGISTER_COMMAND_CQ; ++ status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(cmd_type, a), &result); ++ dma_unmap_single(&bce->pci->dev, a, sizeof(struct bce_queue_memcfg), DMA_TO_DEVICE); ++ if (status) ++ return status; ++ if (BCE_MB_TYPE(result) != BCE_MB_REGISTER_COMMAND_QUEUE_REPLY) ++ return -EINVAL; ++ return 0; ++} ++ ++static void apple_bce_remove(struct pci_dev *dev) ++{ ++ struct apple_bce_device *bce = pci_get_drvdata(dev); ++ bce->is_being_removed = true; ++ ++ bce_vhci_destroy(&bce->vhci); ++ ++ bce_timestamp_stop(&bce->timestamp); ++#ifndef WITHOUT_NVME_PATCH ++ pci_disable_device(bce->pci0); ++#endif ++ pci_dev_put(bce->pci0); ++ pci_free_irq(dev, 0, dev); ++ pci_free_irq(dev, 4, dev); ++ bce_free_command_queues(bce); ++ pci_iounmap(dev, bce->reg_mem_mb); ++ pci_iounmap(dev, bce->reg_mem_dma); ++ device_destroy(bce_class, bce->devt); ++ pci_free_irq_vectors(dev); ++ pci_release_regions(dev); ++ pci_disable_device(dev); ++ kfree(bce); ++} ++ ++static int bce_save_state_and_sleep(struct apple_bce_device *bce) ++{ ++ int attempt, status = 0; ++ u64 resp; ++ dma_addr_t dma_addr; ++ void *dma_ptr = NULL; ++ size_t size = max(PAGE_SIZE, 4096UL); ++ ++ for (attempt = 0; attempt < 5; ++attempt) { ++ pr_debug("apple-bce: suspend: attempt %i, buffer size %li\n", attempt, size); ++ dma_ptr = dma_alloc_coherent(&bce->pci->dev, size, &dma_addr, GFP_KERNEL); ++ if (!dma_ptr) { ++ pr_err("apple-bce: suspend failed (data alloc failed)\n"); ++ break; ++ } ++ BUG_ON((dma_addr % 4096) != 0); ++ status = bce_mailbox_send(&bce->mbox, ++ BCE_MB_MSG(BCE_MB_SAVE_STATE_AND_SLEEP, (dma_addr & ~(4096LLU - 1)) | (size / 4096)), &resp); ++ if (status) { ++ pr_err("apple-bce: suspend failed (mailbox send)\n"); ++ break; ++ } ++ if (BCE_MB_TYPE(resp) == BCE_MB_SAVE_RESTORE_STATE_COMPLETE) { ++ bce->saved_data_dma_addr = dma_addr; ++ bce->saved_data_dma_ptr = dma_ptr; ++ bce->saved_data_dma_size = size; ++ return 0; ++ } else if (BCE_MB_TYPE(resp) == BCE_MB_SAVE_STATE_AND_SLEEP_FAILURE) { ++ dma_free_coherent(&bce->pci->dev, size, dma_ptr, dma_addr); ++ /* The 0x10ff magic value was extracted from Apple's driver */ ++ size = (BCE_MB_VALUE(resp) + 0x10ff) & ~(4096LLU - 1); ++ pr_debug("apple-bce: suspend: device requested a larger buffer (%li)\n", size); ++ continue; ++ } else { ++ pr_err("apple-bce: suspend failed (invalid device response)\n"); ++ status = -EINVAL; ++ break; ++ } ++ } ++ if (dma_ptr) ++ dma_free_coherent(&bce->pci->dev, size, dma_ptr, dma_addr); ++ if (!status) ++ return bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_SLEEP_NO_STATE, 0), &resp); ++ return status; ++} ++ ++static int bce_restore_state_and_wake(struct apple_bce_device *bce) ++{ ++ int status; ++ u64 resp; ++ if (!bce->saved_data_dma_ptr) { ++ if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_RESTORE_NO_STATE, 0), &resp))) { ++ pr_err("apple-bce: resume with no state failed (mailbox send)\n"); ++ return status; ++ } ++ if (BCE_MB_TYPE(resp) != BCE_MB_RESTORE_NO_STATE) { ++ pr_err("apple-bce: resume with no state failed (invalid device response)\n"); ++ return -EINVAL; ++ } ++ return 0; ++ } ++ ++ if ((status = bce_mailbox_send(&bce->mbox, BCE_MB_MSG(BCE_MB_RESTORE_STATE_AND_WAKE, ++ (bce->saved_data_dma_addr & ~(4096LLU - 1)) | (bce->saved_data_dma_size / 4096)), &resp))) { ++ pr_err("apple-bce: resume with state failed (mailbox send)\n"); ++ goto finish_with_state; ++ } ++ if (BCE_MB_TYPE(resp) != BCE_MB_SAVE_RESTORE_STATE_COMPLETE) { ++ pr_err("apple-bce: resume with state failed (invalid device response)\n"); ++ status = -EINVAL; ++ goto finish_with_state; ++ } ++ ++finish_with_state: ++ dma_free_coherent(&bce->pci->dev, bce->saved_data_dma_size, bce->saved_data_dma_ptr, bce->saved_data_dma_addr); ++ bce->saved_data_dma_ptr = NULL; ++ return status; ++} ++ ++static int apple_bce_suspend(struct device *dev) ++{ ++ struct apple_bce_device *bce = pci_get_drvdata(to_pci_dev(dev)); ++ int status; ++ ++ bce_timestamp_stop(&bce->timestamp); ++ ++ if ((status = bce_save_state_and_sleep(bce))) ++ return status; ++ ++ return 0; ++} ++ ++static int apple_bce_resume(struct device *dev) ++{ ++ struct apple_bce_device *bce = pci_get_drvdata(to_pci_dev(dev)); ++ int status; ++ ++ pci_set_master(bce->pci); ++ pci_set_master(bce->pci0); ++ ++ if ((status = bce_restore_state_and_wake(bce))) ++ return status; ++ ++ bce_timestamp_start(&bce->timestamp, false); ++ ++ return 0; ++} ++ ++static struct pci_device_id apple_bce_ids[ ] = { ++ { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x1801) }, ++ { 0, }, ++}; ++ ++struct dev_pm_ops apple_bce_pci_driver_pm = { ++ .suspend = apple_bce_suspend, ++ .resume = apple_bce_resume ++}; ++struct pci_driver apple_bce_pci_driver = { ++ .name = "apple-bce", ++ .id_table = apple_bce_ids, ++ .probe = apple_bce_probe, ++ .remove = apple_bce_remove, ++ .driver = { ++ .pm = &apple_bce_pci_driver_pm ++ } ++}; ++ ++ ++static int __init apple_bce_module_init(void) ++{ ++ int result; ++ if ((result = alloc_chrdev_region(&bce_chrdev, 0, 1, "apple-bce"))) ++ goto fail_chrdev; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0) ++ bce_class = class_create(THIS_MODULE, "apple-bce"); ++#else ++ bce_class = class_create("apple-bce"); ++#endif ++ if (IS_ERR(bce_class)) { ++ result = PTR_ERR(bce_class); ++ goto fail_class; ++ } ++ if ((result = bce_vhci_module_init())) { ++ pr_err("apple-bce: bce-vhci init failed"); ++ goto fail_class; ++ } ++ ++ result = pci_register_driver(&apple_bce_pci_driver); ++ if (result) ++ goto fail_drv; ++ ++ aaudio_module_init(); ++ ++ return 0; ++ ++fail_drv: ++ pci_unregister_driver(&apple_bce_pci_driver); ++fail_class: ++ class_destroy(bce_class); ++fail_chrdev: ++ unregister_chrdev_region(bce_chrdev, 1); ++ if (!result) ++ result = -EINVAL; ++ return result; ++} ++static void __exit apple_bce_module_exit(void) ++{ ++ pci_unregister_driver(&apple_bce_pci_driver); ++ ++ aaudio_module_exit(); ++ bce_vhci_module_exit(); ++ class_destroy(bce_class); ++ unregister_chrdev_region(bce_chrdev, 1); ++} ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("MrARM"); ++MODULE_DESCRIPTION("Apple BCE Driver"); ++MODULE_VERSION("0.01"); ++module_init(apple_bce_module_init); ++module_exit(apple_bce_module_exit); +diff --git a/drivers/staging/apple-bce/apple_bce.h b/drivers/staging/apple-bce/apple_bce.h +new file mode 100644 +index 000000000..f13ab8d57 +--- /dev/null ++++ b/drivers/staging/apple-bce/apple_bce.h +@@ -0,0 +1,38 @@ ++#pragma once ++ ++#include ++#include ++#include "mailbox.h" ++#include "queue.h" ++#include "vhci/vhci.h" ++ ++#define BC_PROTOCOL_VERSION 0x20001 ++#define BCE_MAX_QUEUE_COUNT 0x100 ++ ++#define BCE_QUEUE_USER_MIN 2 ++#define BCE_QUEUE_USER_MAX (BCE_MAX_QUEUE_COUNT - 1) ++ ++struct apple_bce_device { ++ struct pci_dev *pci, *pci0; ++ dev_t devt; ++ struct device *dev; ++ void __iomem *reg_mem_mb; ++ void __iomem *reg_mem_dma; ++ struct bce_mailbox mbox; ++ struct bce_timestamp timestamp; ++ struct bce_queue *queues[BCE_MAX_QUEUE_COUNT]; ++ struct spinlock queues_lock; ++ struct ida queue_ida; ++ struct bce_queue_cq *cmd_cq; ++ struct bce_queue_cmdq *cmd_cmdq; ++ struct bce_queue_sq *int_sq_list[BCE_MAX_QUEUE_COUNT]; ++ bool is_being_removed; ++ ++ dma_addr_t saved_data_dma_addr; ++ void *saved_data_dma_ptr; ++ size_t saved_data_dma_size; ++ ++ struct bce_vhci vhci; ++}; ++ ++extern struct apple_bce_device *global_bce; +\ No newline at end of file +diff --git a/drivers/staging/apple-bce/audio/audio.c b/drivers/staging/apple-bce/audio/audio.c +new file mode 100644 +index 000000000..bd16ddd16 +--- /dev/null ++++ b/drivers/staging/apple-bce/audio/audio.c +@@ -0,0 +1,711 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "audio.h" ++#include "pcm.h" ++#include ++ ++static int aaudio_alsa_index = SNDRV_DEFAULT_IDX1; ++static char *aaudio_alsa_id = SNDRV_DEFAULT_STR1; ++ ++static dev_t aaudio_chrdev; ++static struct class *aaudio_class; ++ ++static int aaudio_init_cmd(struct aaudio_device *a); ++static int aaudio_init_bs(struct aaudio_device *a); ++static void aaudio_init_dev(struct aaudio_device *a, aaudio_device_id_t dev_id); ++static void aaudio_free_dev(struct aaudio_subdevice *sdev); ++ ++static int aaudio_probe(struct pci_dev *dev, const struct pci_device_id *id) ++{ ++ struct aaudio_device *aaudio = NULL; ++ struct aaudio_subdevice *sdev = NULL; ++ int status = 0; ++ u32 cfg; ++ ++ pr_info("aaudio: capturing our device\n"); ++ ++ if (pci_enable_device(dev)) ++ return -ENODEV; ++ if (pci_request_regions(dev, "aaudio")) { ++ status = -ENODEV; ++ goto fail; ++ } ++ pci_set_master(dev); ++ ++ aaudio = kzalloc(sizeof(struct aaudio_device), GFP_KERNEL); ++ if (!aaudio) { ++ status = -ENOMEM; ++ goto fail; ++ } ++ ++ aaudio->bce = global_bce; ++ if (!aaudio->bce) { ++ dev_warn(&dev->dev, "aaudio: No BCE available\n"); ++ status = -EINVAL; ++ goto fail; ++ } ++ ++ aaudio->pci = dev; ++ pci_set_drvdata(dev, aaudio); ++ ++ aaudio->devt = aaudio_chrdev; ++ aaudio->dev = device_create(aaudio_class, &dev->dev, aaudio->devt, NULL, "aaudio"); ++ if (IS_ERR_OR_NULL(aaudio->dev)) { ++ status = PTR_ERR(aaudio_class); ++ goto fail; ++ } ++ device_link_add(aaudio->dev, aaudio->bce->dev, DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_CONSUMER); ++ ++ init_completion(&aaudio->remote_alive); ++ INIT_LIST_HEAD(&aaudio->subdevice_list); ++ ++ /* Init: set an unknown flag in the bitset */ ++ if (pci_read_config_dword(dev, 4, &cfg)) ++ dev_warn(&dev->dev, "aaudio: pci_read_config_dword fail\n"); ++ if (pci_write_config_dword(dev, 4, cfg | 6u)) ++ dev_warn(&dev->dev, "aaudio: pci_write_config_dword fail\n"); ++ ++ dev_info(aaudio->dev, "aaudio: bs len = %llx\n", pci_resource_len(dev, 0)); ++ aaudio->reg_mem_bs_dma = pci_resource_start(dev, 0); ++ aaudio->reg_mem_bs = pci_iomap(dev, 0, 0); ++ aaudio->reg_mem_cfg = pci_iomap(dev, 4, 0); ++ ++ aaudio->reg_mem_gpr = (u32 __iomem *) ((u8 __iomem *) aaudio->reg_mem_cfg + 0xC000); ++ ++ if (IS_ERR_OR_NULL(aaudio->reg_mem_bs) || IS_ERR_OR_NULL(aaudio->reg_mem_cfg)) { ++ dev_warn(&dev->dev, "aaudio: Failed to pci_iomap required regions\n"); ++ goto fail; ++ } ++ ++ if (aaudio_bce_init(aaudio)) { ++ dev_warn(&dev->dev, "aaudio: Failed to init BCE command transport\n"); ++ goto fail; ++ } ++ ++ if (snd_card_new(aaudio->dev, aaudio_alsa_index, aaudio_alsa_id, THIS_MODULE, 0, &aaudio->card)) { ++ dev_err(&dev->dev, "aaudio: Failed to create ALSA card\n"); ++ goto fail; ++ } ++ ++ strcpy(aaudio->card->shortname, "Apple T2 Audio"); ++ strcpy(aaudio->card->longname, "Apple T2 Audio"); ++ strcpy(aaudio->card->mixername, "Apple T2 Audio"); ++ /* Dynamic alsa ids start at 100 */ ++ aaudio->next_alsa_id = 100; ++ ++ if (aaudio_init_cmd(aaudio)) { ++ dev_err(&dev->dev, "aaudio: Failed to initialize over BCE\n"); ++ goto fail_snd; ++ } ++ ++ if (aaudio_init_bs(aaudio)) { ++ dev_err(&dev->dev, "aaudio: Failed to initialize BufferStruct\n"); ++ goto fail_snd; ++ } ++ ++ if ((status = aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_ON))) { ++ dev_err(&dev->dev, "Failed to set remote access\n"); ++ return status; ++ } ++ ++ if (snd_card_register(aaudio->card)) { ++ dev_err(&dev->dev, "aaudio: Failed to register ALSA sound device\n"); ++ goto fail_snd; ++ } ++ ++ list_for_each_entry(sdev, &aaudio->subdevice_list, list) { ++ struct aaudio_buffer_struct_device *dev = &aaudio->bs->devices[sdev->buf_id]; ++ ++ if (sdev->out_stream_cnt == 1 && !strcmp(dev->name, "Speaker")) { ++ struct snd_pcm_hardware *hw = sdev->out_streams[0].alsa_hw_desc; ++ ++ snprintf(aaudio->card->driver, sizeof(aaudio->card->driver) / sizeof(char), "AppleT2x%d", hw->channels_min); ++ } ++ } ++ ++ return 0; ++ ++fail_snd: ++ snd_card_free(aaudio->card); ++fail: ++ if (aaudio && aaudio->dev) ++ device_destroy(aaudio_class, aaudio->devt); ++ kfree(aaudio); ++ ++ if (!IS_ERR_OR_NULL(aaudio->reg_mem_bs)) ++ pci_iounmap(dev, aaudio->reg_mem_bs); ++ if (!IS_ERR_OR_NULL(aaudio->reg_mem_cfg)) ++ pci_iounmap(dev, aaudio->reg_mem_cfg); ++ ++ pci_release_regions(dev); ++ pci_disable_device(dev); ++ ++ if (!status) ++ status = -EINVAL; ++ return status; ++} ++ ++ ++ ++static void aaudio_remove(struct pci_dev *dev) ++{ ++ struct aaudio_subdevice *sdev; ++ struct aaudio_device *aaudio = pci_get_drvdata(dev); ++ ++ snd_card_free(aaudio->card); ++ while (!list_empty(&aaudio->subdevice_list)) { ++ sdev = list_first_entry(&aaudio->subdevice_list, struct aaudio_subdevice, list); ++ list_del(&sdev->list); ++ aaudio_free_dev(sdev); ++ } ++ pci_iounmap(dev, aaudio->reg_mem_bs); ++ pci_iounmap(dev, aaudio->reg_mem_cfg); ++ device_destroy(aaudio_class, aaudio->devt); ++ pci_free_irq_vectors(dev); ++ pci_release_regions(dev); ++ pci_disable_device(dev); ++ kfree(aaudio); ++} ++ ++static int aaudio_suspend(struct device *dev) ++{ ++ struct aaudio_device *aaudio = pci_get_drvdata(to_pci_dev(dev)); ++ ++ if (aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_OFF)) ++ dev_warn(aaudio->dev, "Failed to reset remote access\n"); ++ ++ pci_disable_device(aaudio->pci); ++ return 0; ++} ++ ++static int aaudio_resume(struct device *dev) ++{ ++ int status; ++ struct aaudio_device *aaudio = pci_get_drvdata(to_pci_dev(dev)); ++ ++ if ((status = pci_enable_device(aaudio->pci))) ++ return status; ++ pci_set_master(aaudio->pci); ++ ++ if ((status = aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_ON))) { ++ dev_err(aaudio->dev, "Failed to set remote access\n"); ++ return status; ++ } ++ ++ return 0; ++} ++ ++static int aaudio_init_cmd(struct aaudio_device *a) ++{ ++ int status; ++ struct aaudio_send_ctx sctx; ++ struct aaudio_msg buf; ++ u64 dev_cnt, dev_i; ++ aaudio_device_id_t *dev_l; ++ ++ if ((status = aaudio_send(a, &sctx, 500, ++ aaudio_msg_write_alive_notification, 1, 3))) { ++ dev_err(a->dev, "Sending alive notification failed\n"); ++ return status; ++ } ++ ++ if (wait_for_completion_timeout(&a->remote_alive, msecs_to_jiffies(500)) == 0) { ++ dev_err(a->dev, "Timed out waiting for remote\n"); ++ return -ETIMEDOUT; ++ } ++ dev_info(a->dev, "Continuing init\n"); ++ ++ buf = aaudio_reply_alloc(); ++ if ((status = aaudio_cmd_get_device_list(a, &buf, &dev_l, &dev_cnt))) { ++ dev_err(a->dev, "Failed to get device list\n"); ++ aaudio_reply_free(&buf); ++ return status; ++ } ++ for (dev_i = 0; dev_i < dev_cnt; ++dev_i) ++ aaudio_init_dev(a, dev_l[dev_i]); ++ aaudio_reply_free(&buf); ++ ++ return 0; ++} ++ ++static void aaudio_init_stream_info(struct aaudio_subdevice *sdev, struct aaudio_stream *strm); ++static void aaudio_handle_jack_connection_change(struct aaudio_subdevice *sdev); ++ ++static void aaudio_init_dev(struct aaudio_device *a, aaudio_device_id_t dev_id) ++{ ++ struct aaudio_subdevice *sdev; ++ struct aaudio_msg buf = aaudio_reply_alloc(); ++ u64 uid_len, stream_cnt, i; ++ aaudio_object_id_t *stream_list; ++ char *uid; ++ ++ sdev = kzalloc(sizeof(struct aaudio_subdevice), GFP_KERNEL); ++ ++ if (aaudio_cmd_get_property(a, &buf, dev_id, dev_id, AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_UID, 0), ++ NULL, 0, (void **) &uid, &uid_len) || uid_len > AAUDIO_DEVICE_MAX_UID_LEN) { ++ dev_err(a->dev, "Failed to get device uid for device %llx\n", dev_id); ++ goto fail; ++ } ++ dev_info(a->dev, "Remote device %llx %.*s\n", dev_id, (int) uid_len, uid); ++ ++ sdev->a = a; ++ INIT_LIST_HEAD(&sdev->list); ++ sdev->dev_id = dev_id; ++ sdev->buf_id = AAUDIO_BUFFER_ID_NONE; ++ strncpy(sdev->uid, uid, uid_len); ++ sdev->uid[uid_len + 1] = '\0'; ++ ++ if (aaudio_cmd_get_primitive_property(a, dev_id, dev_id, ++ AAUDIO_PROP(AAUDIO_PROP_SCOPE_INPUT, AAUDIO_PROP_LATENCY, 0), NULL, 0, &sdev->in_latency, sizeof(u32))) ++ dev_warn(a->dev, "Failed to query device input latency\n"); ++ if (aaudio_cmd_get_primitive_property(a, dev_id, dev_id, ++ AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_LATENCY, 0), NULL, 0, &sdev->out_latency, sizeof(u32))) ++ dev_warn(a->dev, "Failed to query device output latency\n"); ++ ++ if (aaudio_cmd_get_input_stream_list(a, &buf, dev_id, &stream_list, &stream_cnt)) { ++ dev_err(a->dev, "Failed to get input stream list for device %llx\n", dev_id); ++ goto fail; ++ } ++ if (stream_cnt > AAUDIO_DEIVCE_MAX_INPUT_STREAMS) { ++ dev_warn(a->dev, "Device %s input stream count %llu is larger than the supported count of %u\n", ++ sdev->uid, stream_cnt, AAUDIO_DEIVCE_MAX_INPUT_STREAMS); ++ stream_cnt = AAUDIO_DEIVCE_MAX_INPUT_STREAMS; ++ } ++ sdev->in_stream_cnt = stream_cnt; ++ for (i = 0; i < stream_cnt; i++) { ++ sdev->in_streams[i].id = stream_list[i]; ++ sdev->in_streams[i].buffer_cnt = 0; ++ aaudio_init_stream_info(sdev, &sdev->in_streams[i]); ++ sdev->in_streams[i].latency += sdev->in_latency; ++ } ++ ++ if (aaudio_cmd_get_output_stream_list(a, &buf, dev_id, &stream_list, &stream_cnt)) { ++ dev_err(a->dev, "Failed to get output stream list for device %llx\n", dev_id); ++ goto fail; ++ } ++ if (stream_cnt > AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS) { ++ dev_warn(a->dev, "Device %s input stream count %llu is larger than the supported count of %u\n", ++ sdev->uid, stream_cnt, AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS); ++ stream_cnt = AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS; ++ } ++ sdev->out_stream_cnt = stream_cnt; ++ for (i = 0; i < stream_cnt; i++) { ++ sdev->out_streams[i].id = stream_list[i]; ++ sdev->out_streams[i].buffer_cnt = 0; ++ aaudio_init_stream_info(sdev, &sdev->out_streams[i]); ++ sdev->out_streams[i].latency += sdev->in_latency; ++ } ++ ++ if (sdev->is_pcm) ++ aaudio_create_pcm(sdev); ++ /* Headphone Jack status */ ++ if (!strcmp(sdev->uid, "Codec Output")) { ++ if (snd_jack_new(a->card, sdev->uid, SND_JACK_HEADPHONE, &sdev->jack, true, false)) ++ dev_warn(a->dev, "Failed to create an attached jack for %s\n", sdev->uid); ++ aaudio_cmd_property_listener(a, sdev->dev_id, sdev->dev_id, ++ AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_JACK_PLUGGED, 0)); ++ aaudio_handle_jack_connection_change(sdev); ++ } ++ ++ aaudio_reply_free(&buf); ++ ++ list_add_tail(&sdev->list, &a->subdevice_list); ++ return; ++ ++fail: ++ aaudio_reply_free(&buf); ++ kfree(sdev); ++} ++ ++static void aaudio_init_stream_info(struct aaudio_subdevice *sdev, struct aaudio_stream *strm) ++{ ++ if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, strm->id, ++ AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_PHYS_FORMAT, 0), NULL, 0, ++ &strm->desc, sizeof(strm->desc))) ++ dev_warn(sdev->a->dev, "Failed to query stream descriptor\n"); ++ if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, strm->id, ++ AAUDIO_PROP(AAUDIO_PROP_SCOPE_GLOBAL, AAUDIO_PROP_LATENCY, 0), NULL, 0, &strm->latency, sizeof(u32))) ++ dev_warn(sdev->a->dev, "Failed to query stream latency\n"); ++ if (strm->desc.format_id == AAUDIO_FORMAT_LPCM) ++ sdev->is_pcm = true; ++} ++ ++static void aaudio_free_dev(struct aaudio_subdevice *sdev) ++{ ++ size_t i; ++ for (i = 0; i < sdev->in_stream_cnt; i++) { ++ if (sdev->in_streams[i].alsa_hw_desc) ++ kfree(sdev->in_streams[i].alsa_hw_desc); ++ if (sdev->in_streams[i].buffers) ++ kfree(sdev->in_streams[i].buffers); ++ } ++ for (i = 0; i < sdev->out_stream_cnt; i++) { ++ if (sdev->out_streams[i].alsa_hw_desc) ++ kfree(sdev->out_streams[i].alsa_hw_desc); ++ if (sdev->out_streams[i].buffers) ++ kfree(sdev->out_streams[i].buffers); ++ } ++ kfree(sdev); ++} ++ ++static struct aaudio_subdevice *aaudio_find_dev_by_dev_id(struct aaudio_device *a, aaudio_device_id_t dev_id) ++{ ++ struct aaudio_subdevice *sdev; ++ list_for_each_entry(sdev, &a->subdevice_list, list) { ++ if (dev_id == sdev->dev_id) ++ return sdev; ++ } ++ return NULL; ++} ++ ++static struct aaudio_subdevice *aaudio_find_dev_by_uid(struct aaudio_device *a, const char *uid) ++{ ++ struct aaudio_subdevice *sdev; ++ list_for_each_entry(sdev, &a->subdevice_list, list) { ++ if (!strcmp(uid, sdev->uid)) ++ return sdev; ++ } ++ return NULL; ++} ++ ++static void aaudio_init_bs_stream(struct aaudio_device *a, struct aaudio_stream *strm, ++ struct aaudio_buffer_struct_stream *bs_strm); ++static void aaudio_init_bs_stream_host(struct aaudio_device *a, struct aaudio_stream *strm, ++ struct aaudio_buffer_struct_stream *bs_strm); ++ ++static int aaudio_init_bs(struct aaudio_device *a) ++{ ++ int i, j; ++ struct aaudio_buffer_struct_device *dev; ++ struct aaudio_subdevice *sdev; ++ u32 ver, sig, bs_base; ++ ++ ver = ioread32(&a->reg_mem_gpr[0]); ++ if (ver < 3) { ++ dev_err(a->dev, "aaudio: Bad GPR version (%u)", ver); ++ return -EINVAL; ++ } ++ sig = ioread32(&a->reg_mem_gpr[1]); ++ if (sig != AAUDIO_SIG) { ++ dev_err(a->dev, "aaudio: Bad GPR sig (%x)", sig); ++ return -EINVAL; ++ } ++ bs_base = ioread32(&a->reg_mem_gpr[2]); ++ a->bs = (struct aaudio_buffer_struct *) ((u8 *) a->reg_mem_bs + bs_base); ++ if (a->bs->signature != AAUDIO_SIG) { ++ dev_err(a->dev, "aaudio: Bad BufferStruct sig (%x)", a->bs->signature); ++ return -EINVAL; ++ } ++ dev_info(a->dev, "aaudio: BufferStruct ver = %i\n", a->bs->version); ++ dev_info(a->dev, "aaudio: Num devices = %i\n", a->bs->num_devices); ++ for (i = 0; i < a->bs->num_devices; i++) { ++ dev = &a->bs->devices[i]; ++ dev_info(a->dev, "aaudio: Device %i %s\n", i, dev->name); ++ ++ sdev = aaudio_find_dev_by_uid(a, dev->name); ++ if (!sdev) { ++ dev_err(a->dev, "aaudio: Subdevice not found for BufferStruct device %s\n", dev->name); ++ continue; ++ } ++ sdev->buf_id = (u8) i; ++ dev->num_input_streams = 0; ++ for (j = 0; j < dev->num_output_streams; j++) { ++ dev_info(a->dev, "aaudio: Device %i Stream %i: Output; Buffer Count = %i\n", i, j, ++ dev->output_streams[j].num_buffers); ++ if (j < sdev->out_stream_cnt) ++ aaudio_init_bs_stream(a, &sdev->out_streams[j], &dev->output_streams[j]); ++ } ++ } ++ ++ list_for_each_entry(sdev, &a->subdevice_list, list) { ++ if (sdev->buf_id != AAUDIO_BUFFER_ID_NONE) ++ continue; ++ sdev->buf_id = i; ++ dev_info(a->dev, "aaudio: Created device %i %s\n", i, sdev->uid); ++ strcpy(a->bs->devices[i].name, sdev->uid); ++ a->bs->devices[i].num_input_streams = 0; ++ a->bs->devices[i].num_output_streams = 0; ++ a->bs->num_devices = ++i; ++ } ++ list_for_each_entry(sdev, &a->subdevice_list, list) { ++ if (sdev->in_stream_cnt == 1) { ++ dev_info(a->dev, "aaudio: Device %i Host Stream; Input\n", sdev->buf_id); ++ aaudio_init_bs_stream_host(a, &sdev->in_streams[0], &a->bs->devices[sdev->buf_id].input_streams[0]); ++ a->bs->devices[sdev->buf_id].num_input_streams = 1; ++ wmb(); ++ ++ if (aaudio_cmd_set_input_stream_address_ranges(a, sdev->dev_id)) { ++ dev_err(a->dev, "aaudio: Failed to set input stream address ranges\n"); ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static void aaudio_init_bs_stream(struct aaudio_device *a, struct aaudio_stream *strm, ++ struct aaudio_buffer_struct_stream *bs_strm) ++{ ++ size_t i; ++ strm->buffer_cnt = bs_strm->num_buffers; ++ if (bs_strm->num_buffers > AAUDIO_DEIVCE_MAX_BUFFER_COUNT) { ++ dev_warn(a->dev, "BufferStruct buffer count %u exceeds driver limit of %u\n", bs_strm->num_buffers, ++ AAUDIO_DEIVCE_MAX_BUFFER_COUNT); ++ strm->buffer_cnt = AAUDIO_DEIVCE_MAX_BUFFER_COUNT; ++ } ++ if (!strm->buffer_cnt) ++ return; ++ strm->buffers = kmalloc_array(strm->buffer_cnt, sizeof(struct aaudio_dma_buf), GFP_KERNEL); ++ if (!strm->buffers) { ++ dev_err(a->dev, "Buffer list allocation failed\n"); ++ return; ++ } ++ for (i = 0; i < strm->buffer_cnt; i++) { ++ strm->buffers[i].dma_addr = a->reg_mem_bs_dma + (dma_addr_t) bs_strm->buffers[i].address; ++ strm->buffers[i].ptr = a->reg_mem_bs + bs_strm->buffers[i].address; ++ strm->buffers[i].size = bs_strm->buffers[i].size; ++ } ++ ++ if (strm->buffer_cnt == 1) { ++ strm->alsa_hw_desc = kmalloc(sizeof(struct snd_pcm_hardware), GFP_KERNEL); ++ if (aaudio_create_hw_info(&strm->desc, strm->alsa_hw_desc, strm->buffers[0].size)) { ++ kfree(strm->alsa_hw_desc); ++ strm->alsa_hw_desc = NULL; ++ } ++ } ++} ++ ++static void aaudio_init_bs_stream_host(struct aaudio_device *a, struct aaudio_stream *strm, ++ struct aaudio_buffer_struct_stream *bs_strm) ++{ ++ size_t size; ++ dma_addr_t dma_addr; ++ void *dma_ptr; ++ size = strm->desc.bytes_per_packet * 16640; ++ dma_ptr = dma_alloc_coherent(&a->pci->dev, size, &dma_addr, GFP_KERNEL); ++ if (!dma_ptr) { ++ dev_err(a->dev, "dma_alloc_coherent failed\n"); ++ return; ++ } ++ bs_strm->buffers[0].address = dma_addr; ++ bs_strm->buffers[0].size = size; ++ bs_strm->num_buffers = 1; ++ ++ memset(dma_ptr, 0, size); ++ ++ strm->buffer_cnt = 1; ++ strm->buffers = kmalloc_array(strm->buffer_cnt, sizeof(struct aaudio_dma_buf), GFP_KERNEL); ++ if (!strm->buffers) { ++ dev_err(a->dev, "Buffer list allocation failed\n"); ++ return; ++ } ++ strm->buffers[0].dma_addr = dma_addr; ++ strm->buffers[0].ptr = dma_ptr; ++ strm->buffers[0].size = size; ++ ++ strm->alsa_hw_desc = kmalloc(sizeof(struct snd_pcm_hardware), GFP_KERNEL); ++ if (aaudio_create_hw_info(&strm->desc, strm->alsa_hw_desc, strm->buffers[0].size)) { ++ kfree(strm->alsa_hw_desc); ++ strm->alsa_hw_desc = NULL; ++ } ++} ++ ++static void aaudio_handle_prop_change(struct aaudio_device *a, struct aaudio_msg *msg); ++ ++void aaudio_handle_notification(struct aaudio_device *a, struct aaudio_msg *msg) ++{ ++ struct aaudio_send_ctx sctx; ++ struct aaudio_msg_base base; ++ if (aaudio_msg_read_base(msg, &base)) ++ return; ++ switch (base.msg) { ++ case AAUDIO_MSG_NOTIFICATION_BOOT: ++ dev_info(a->dev, "Received boot notification from remote\n"); ++ ++ /* Resend the alive notify */ ++ if (aaudio_send(a, &sctx, 500, ++ aaudio_msg_write_alive_notification, 1, 3)) { ++ pr_err("Sending alive notification failed\n"); ++ } ++ break; ++ case AAUDIO_MSG_NOTIFICATION_ALIVE: ++ dev_info(a->dev, "Received alive notification from remote\n"); ++ complete_all(&a->remote_alive); ++ break; ++ case AAUDIO_MSG_PROPERTY_CHANGED: ++ aaudio_handle_prop_change(a, msg); ++ break; ++ default: ++ dev_info(a->dev, "Unhandled notification %i", base.msg); ++ break; ++ } ++} ++ ++struct aaudio_prop_change_work_struct { ++ struct work_struct ws; ++ struct aaudio_device *a; ++ aaudio_device_id_t dev; ++ aaudio_object_id_t obj; ++ struct aaudio_prop_addr prop; ++}; ++ ++static void aaudio_handle_jack_connection_change(struct aaudio_subdevice *sdev) ++{ ++ u32 plugged; ++ if (!sdev->jack) ++ return; ++ /* NOTE: Apple made the plug status scoped to the input and output streams. This makes no sense for us, so I just ++ * always pick the OUTPUT status. */ ++ if (aaudio_cmd_get_primitive_property(sdev->a, sdev->dev_id, sdev->dev_id, ++ AAUDIO_PROP(AAUDIO_PROP_SCOPE_OUTPUT, AAUDIO_PROP_JACK_PLUGGED, 0), NULL, 0, &plugged, sizeof(plugged))) { ++ dev_err(sdev->a->dev, "Failed to get jack enable status\n"); ++ return; ++ } ++ dev_dbg(sdev->a->dev, "Jack is now %s\n", plugged ? "plugged" : "unplugged"); ++ snd_jack_report(sdev->jack, plugged ? sdev->jack->type : 0); ++} ++ ++void aaudio_handle_prop_change_work(struct work_struct *ws) ++{ ++ struct aaudio_prop_change_work_struct *work = container_of(ws, struct aaudio_prop_change_work_struct, ws); ++ struct aaudio_subdevice *sdev; ++ ++ sdev = aaudio_find_dev_by_dev_id(work->a, work->dev); ++ if (!sdev) { ++ dev_err(work->a->dev, "Property notification change: device not found\n"); ++ goto done; ++ } ++ dev_dbg(work->a->dev, "Property changed for device: %s\n", sdev->uid); ++ ++ if (work->prop.scope == AAUDIO_PROP_SCOPE_OUTPUT && work->prop.selector == AAUDIO_PROP_JACK_PLUGGED) { ++ aaudio_handle_jack_connection_change(sdev); ++ } ++ ++done: ++ kfree(work); ++} ++ ++void aaudio_handle_prop_change(struct aaudio_device *a, struct aaudio_msg *msg) ++{ ++ /* NOTE: This is a scheduled work because this callback will generally need to query device information and this ++ * is not possible when we are in the reply parsing code's context. */ ++ struct aaudio_prop_change_work_struct *work; ++ work = kmalloc(sizeof(struct aaudio_prop_change_work_struct), GFP_KERNEL); ++ work->a = a; ++ INIT_WORK(&work->ws, aaudio_handle_prop_change_work); ++ aaudio_msg_read_property_changed(msg, &work->dev, &work->obj, &work->prop); ++ schedule_work(&work->ws); ++} ++ ++#define aaudio_send_cmd_response(a, sctx, msg, fn, ...) \ ++ if (aaudio_send_with_tag(a, sctx, ((struct aaudio_msg_header *) msg->data)->tag, 500, fn, ##__VA_ARGS__)) \ ++ pr_err("aaudio: Failed to reply to a command\n"); ++ ++void aaudio_handle_cmd_timestamp(struct aaudio_device *a, struct aaudio_msg *msg) ++{ ++ ktime_t time_os = ktime_get_boottime(); ++ struct aaudio_send_ctx sctx; ++ struct aaudio_subdevice *sdev; ++ u64 devid, timestamp, update_seed; ++ aaudio_msg_read_update_timestamp(msg, &devid, ×tamp, &update_seed); ++ dev_dbg(a->dev, "Received timestamp update for dev=%llx ts=%llx seed=%llx\n", devid, timestamp, update_seed); ++ ++ sdev = aaudio_find_dev_by_dev_id(a, devid); ++ aaudio_handle_timestamp(sdev, time_os, timestamp); ++ ++ aaudio_send_cmd_response(a, &sctx, msg, ++ aaudio_msg_write_update_timestamp_response); ++} ++ ++void aaudio_handle_command(struct aaudio_device *a, struct aaudio_msg *msg) ++{ ++ struct aaudio_msg_base base; ++ if (aaudio_msg_read_base(msg, &base)) ++ return; ++ switch (base.msg) { ++ case AAUDIO_MSG_UPDATE_TIMESTAMP: ++ aaudio_handle_cmd_timestamp(a, msg); ++ break; ++ default: ++ dev_info(a->dev, "Unhandled device command %i", base.msg); ++ break; ++ } ++} ++ ++static struct pci_device_id aaudio_ids[ ] = { ++ { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x1803) }, ++ { 0, }, ++}; ++ ++struct dev_pm_ops aaudio_pci_driver_pm = { ++ .suspend = aaudio_suspend, ++ .resume = aaudio_resume ++}; ++struct pci_driver aaudio_pci_driver = { ++ .name = "aaudio", ++ .id_table = aaudio_ids, ++ .probe = aaudio_probe, ++ .remove = aaudio_remove, ++ .driver = { ++ .pm = &aaudio_pci_driver_pm ++ } ++}; ++ ++ ++int aaudio_module_init(void) ++{ ++ int result; ++ if ((result = alloc_chrdev_region(&aaudio_chrdev, 0, 1, "aaudio"))) ++ goto fail_chrdev; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0) ++ aaudio_class = class_create(THIS_MODULE, "aaudio"); ++#else ++ aaudio_class = class_create("aaudio"); ++#endif ++ if (IS_ERR(aaudio_class)) { ++ result = PTR_ERR(aaudio_class); ++ goto fail_class; ++ } ++ ++ result = pci_register_driver(&aaudio_pci_driver); ++ if (result) ++ goto fail_drv; ++ return 0; ++ ++fail_drv: ++ pci_unregister_driver(&aaudio_pci_driver); ++fail_class: ++ class_destroy(aaudio_class); ++fail_chrdev: ++ unregister_chrdev_region(aaudio_chrdev, 1); ++ if (!result) ++ result = -EINVAL; ++ return result; ++} ++ ++void aaudio_module_exit(void) ++{ ++ pci_unregister_driver(&aaudio_pci_driver); ++ class_destroy(aaudio_class); ++ unregister_chrdev_region(aaudio_chrdev, 1); ++} ++ ++struct aaudio_alsa_pcm_id_mapping aaudio_alsa_id_mappings[] = { ++ {"Speaker", 0}, ++ {"Digital Mic", 1}, ++ {"Codec Output", 2}, ++ {"Codec Input", 3}, ++ {"Bridge Loopback", 4}, ++ {} ++}; ++ ++module_param_named(index, aaudio_alsa_index, int, 0444); ++MODULE_PARM_DESC(index, "Index value for Apple Internal Audio soundcard."); ++module_param_named(id, aaudio_alsa_id, charp, 0444); ++MODULE_PARM_DESC(id, "ID string for Apple Internal Audio soundcard."); +diff --git a/drivers/staging/apple-bce/audio/audio.h b/drivers/staging/apple-bce/audio/audio.h +new file mode 100644 +index 000000000..693006e93 +--- /dev/null ++++ b/drivers/staging/apple-bce/audio/audio.h +@@ -0,0 +1,123 @@ ++#ifndef AAUDIO_H ++#define AAUDIO_H ++ ++#include ++#include ++#include "../apple_bce.h" ++#include "protocol_bce.h" ++#include "description.h" ++ ++#define AAUDIO_SIG 0x19870423 ++ ++#define AAUDIO_DEVICE_MAX_UID_LEN 128 ++#define AAUDIO_DEIVCE_MAX_INPUT_STREAMS 1 ++#define AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS 1 ++#define AAUDIO_DEIVCE_MAX_BUFFER_COUNT 1 ++ ++#define AAUDIO_BUFFER_ID_NONE 0xffu ++ ++struct snd_card; ++struct snd_pcm; ++struct snd_pcm_hardware; ++struct snd_jack; ++ ++struct __attribute__((packed)) __attribute__((aligned(4))) aaudio_buffer_struct_buffer { ++ size_t address; ++ size_t size; ++ size_t pad[4]; ++}; ++struct aaudio_buffer_struct_stream { ++ u8 num_buffers; ++ struct aaudio_buffer_struct_buffer buffers[100]; ++ char filler[32]; ++}; ++struct aaudio_buffer_struct_device { ++ char name[128]; ++ u8 num_input_streams; ++ u8 num_output_streams; ++ struct aaudio_buffer_struct_stream input_streams[5]; ++ struct aaudio_buffer_struct_stream output_streams[5]; ++ char filler[128]; ++}; ++struct aaudio_buffer_struct { ++ u32 version; ++ u32 signature; ++ u32 flags; ++ u8 num_devices; ++ struct aaudio_buffer_struct_device devices[20]; ++}; ++ ++struct aaudio_device; ++struct aaudio_dma_buf { ++ dma_addr_t dma_addr; ++ void *ptr; ++ size_t size; ++}; ++struct aaudio_stream { ++ aaudio_object_id_t id; ++ size_t buffer_cnt; ++ struct aaudio_dma_buf *buffers; ++ ++ struct aaudio_apple_description desc; ++ struct snd_pcm_hardware *alsa_hw_desc; ++ u32 latency; ++ ++ bool waiting_for_first_ts; ++ ++ ktime_t remote_timestamp; ++ snd_pcm_sframes_t frame_min; ++ int started; ++}; ++struct aaudio_subdevice { ++ struct aaudio_device *a; ++ struct list_head list; ++ aaudio_device_id_t dev_id; ++ u32 in_latency, out_latency; ++ u8 buf_id; ++ int alsa_id; ++ char uid[AAUDIO_DEVICE_MAX_UID_LEN + 1]; ++ size_t in_stream_cnt; ++ struct aaudio_stream in_streams[AAUDIO_DEIVCE_MAX_INPUT_STREAMS]; ++ size_t out_stream_cnt; ++ struct aaudio_stream out_streams[AAUDIO_DEIVCE_MAX_OUTPUT_STREAMS]; ++ bool is_pcm; ++ struct snd_pcm *pcm; ++ struct snd_jack *jack; ++}; ++struct aaudio_alsa_pcm_id_mapping { ++ const char *name; ++ int alsa_id; ++}; ++ ++struct aaudio_device { ++ struct pci_dev *pci; ++ dev_t devt; ++ struct device *dev; ++ void __iomem *reg_mem_bs; ++ dma_addr_t reg_mem_bs_dma; ++ void __iomem *reg_mem_cfg; ++ ++ u32 __iomem *reg_mem_gpr; ++ ++ struct aaudio_buffer_struct *bs; ++ ++ struct apple_bce_device *bce; ++ struct aaudio_bce bcem; ++ ++ struct snd_card *card; ++ ++ struct list_head subdevice_list; ++ int next_alsa_id; ++ ++ struct completion remote_alive; ++}; ++ ++void aaudio_handle_notification(struct aaudio_device *a, struct aaudio_msg *msg); ++void aaudio_handle_command(struct aaudio_device *a, struct aaudio_msg *msg); ++ ++int aaudio_module_init(void); ++void aaudio_module_exit(void); ++ ++extern struct aaudio_alsa_pcm_id_mapping aaudio_alsa_id_mappings[]; ++ ++#endif //AAUDIO_H +diff --git a/drivers/staging/apple-bce/audio/description.h b/drivers/staging/apple-bce/audio/description.h +new file mode 100644 +index 000000000..dfef3ab68 +--- /dev/null ++++ b/drivers/staging/apple-bce/audio/description.h +@@ -0,0 +1,42 @@ ++#ifndef AAUDIO_DESCRIPTION_H ++#define AAUDIO_DESCRIPTION_H ++ ++#include ++ ++struct aaudio_apple_description { ++ u64 sample_rate_double; ++ u32 format_id; ++ u32 format_flags; ++ u32 bytes_per_packet; ++ u32 frames_per_packet; ++ u32 bytes_per_frame; ++ u32 channels_per_frame; ++ u32 bits_per_channel; ++ u32 reserved; ++}; ++ ++enum { ++ AAUDIO_FORMAT_LPCM = 0x6c70636d // 'lpcm' ++}; ++ ++enum { ++ AAUDIO_FORMAT_FLAG_FLOAT = 1, ++ AAUDIO_FORMAT_FLAG_BIG_ENDIAN = 2, ++ AAUDIO_FORMAT_FLAG_SIGNED = 4, ++ AAUDIO_FORMAT_FLAG_PACKED = 8, ++ AAUDIO_FORMAT_FLAG_ALIGNED_HIGH = 16, ++ AAUDIO_FORMAT_FLAG_NON_INTERLEAVED = 32, ++ AAUDIO_FORMAT_FLAG_NON_MIXABLE = 64 ++}; ++ ++static inline u64 aaudio_double_to_u64(u64 d) ++{ ++ u8 sign = (u8) ((d >> 63) & 1); ++ s32 exp = (s32) ((d >> 52) & 0x7ff) - 1023; ++ u64 fr = d & ((1LL << 52) - 1); ++ if (sign || exp < 0) ++ return 0; ++ return (u64) ((1LL << exp) + (fr >> (52 - exp))); ++} ++ ++#endif //AAUDIO_DESCRIPTION_H +diff --git a/drivers/staging/apple-bce/audio/pcm.c b/drivers/staging/apple-bce/audio/pcm.c +new file mode 100644 +index 000000000..1026e10a9 +--- /dev/null ++++ b/drivers/staging/apple-bce/audio/pcm.c +@@ -0,0 +1,308 @@ ++#include "pcm.h" ++#include "audio.h" ++ ++static u64 aaudio_get_alsa_fmtbit(struct aaudio_apple_description *desc) ++{ ++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_FLOAT) { ++ if (desc->bits_per_channel == 32) { ++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN) ++ return SNDRV_PCM_FMTBIT_FLOAT_BE; ++ else ++ return SNDRV_PCM_FMTBIT_FLOAT_LE; ++ } else if (desc->bits_per_channel == 64) { ++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN) ++ return SNDRV_PCM_FMTBIT_FLOAT64_BE; ++ else ++ return SNDRV_PCM_FMTBIT_FLOAT64_LE; ++ } else { ++ pr_err("aaudio: unsupported bits per channel for float format: %u\n", desc->bits_per_channel); ++ return 0; ++ } ++ } ++#define DEFINE_BPC_OPTION(val, b) \ ++ case val: \ ++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_BIG_ENDIAN) { \ ++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED) \ ++ return SNDRV_PCM_FMTBIT_S ## b ## BE; \ ++ else \ ++ return SNDRV_PCM_FMTBIT_U ## b ## BE; \ ++ } else { \ ++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED) \ ++ return SNDRV_PCM_FMTBIT_S ## b ## LE; \ ++ else \ ++ return SNDRV_PCM_FMTBIT_U ## b ## LE; \ ++ } ++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_PACKED) { ++ switch (desc->bits_per_channel) { ++ case 8: ++ case 16: ++ case 32: ++ break; ++ DEFINE_BPC_OPTION(24, 24_3) ++ default: ++ pr_err("aaudio: unsupported bits per channel for packed format: %u\n", desc->bits_per_channel); ++ return 0; ++ } ++ } ++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_ALIGNED_HIGH) { ++ switch (desc->bits_per_channel) { ++ DEFINE_BPC_OPTION(24, 32_) ++ default: ++ pr_err("aaudio: unsupported bits per channel for high-aligned format: %u\n", desc->bits_per_channel); ++ return 0; ++ } ++ } ++ switch (desc->bits_per_channel) { ++ case 8: ++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_SIGNED) ++ return SNDRV_PCM_FMTBIT_S8; ++ else ++ return SNDRV_PCM_FMTBIT_U8; ++ DEFINE_BPC_OPTION(16, 16_) ++ DEFINE_BPC_OPTION(24, 24_) ++ DEFINE_BPC_OPTION(32, 32_) ++ default: ++ pr_err("aaudio: unsupported bits per channel: %u\n", desc->bits_per_channel); ++ return 0; ++ } ++} ++int aaudio_create_hw_info(struct aaudio_apple_description *desc, struct snd_pcm_hardware *alsa_hw, ++ size_t buf_size) ++{ ++ uint rate; ++ alsa_hw->info = (SNDRV_PCM_INFO_MMAP | ++ SNDRV_PCM_INFO_BLOCK_TRANSFER | ++ SNDRV_PCM_INFO_MMAP_VALID | ++ SNDRV_PCM_INFO_DOUBLE); ++ if (desc->format_flags & AAUDIO_FORMAT_FLAG_NON_MIXABLE) ++ pr_warn("aaudio: unsupported hw flag: NON_MIXABLE\n"); ++ if (!(desc->format_flags & AAUDIO_FORMAT_FLAG_NON_INTERLEAVED)) ++ alsa_hw->info |= SNDRV_PCM_INFO_INTERLEAVED; ++ alsa_hw->formats = aaudio_get_alsa_fmtbit(desc); ++ if (!alsa_hw->formats) ++ return -EINVAL; ++ rate = (uint) aaudio_double_to_u64(desc->sample_rate_double); ++ alsa_hw->rates = snd_pcm_rate_to_rate_bit(rate); ++ alsa_hw->rate_min = rate; ++ alsa_hw->rate_max = rate; ++ alsa_hw->channels_min = desc->channels_per_frame; ++ alsa_hw->channels_max = desc->channels_per_frame; ++ alsa_hw->buffer_bytes_max = buf_size; ++ alsa_hw->period_bytes_min = desc->bytes_per_packet; ++ alsa_hw->period_bytes_max = desc->bytes_per_packet; ++ alsa_hw->periods_min = (uint) (buf_size / desc->bytes_per_packet); ++ alsa_hw->periods_max = (uint) (buf_size / desc->bytes_per_packet); ++ pr_debug("aaudio_create_hw_info: format = %llu, rate = %u/%u. channels = %u, periods = %u, period size = %lu\n", ++ alsa_hw->formats, alsa_hw->rate_min, alsa_hw->rates, alsa_hw->channels_min, alsa_hw->periods_min, ++ alsa_hw->period_bytes_min); ++ return 0; ++} ++ ++static struct aaudio_stream *aaudio_pcm_stream(struct snd_pcm_substream *substream) ++{ ++ struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream); ++ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ++ return &sdev->out_streams[substream->number]; ++ else ++ return &sdev->in_streams[substream->number]; ++} ++ ++static int aaudio_pcm_open(struct snd_pcm_substream *substream) ++{ ++ pr_debug("aaudio_pcm_open\n"); ++ substream->runtime->hw = *aaudio_pcm_stream(substream)->alsa_hw_desc; ++ ++ return 0; ++} ++ ++static int aaudio_pcm_close(struct snd_pcm_substream *substream) ++{ ++ pr_debug("aaudio_pcm_close\n"); ++ return 0; ++} ++ ++static int aaudio_pcm_prepare(struct snd_pcm_substream *substream) ++{ ++ return 0; ++} ++ ++static int aaudio_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params) ++{ ++ struct aaudio_stream *astream = aaudio_pcm_stream(substream); ++ pr_debug("aaudio_pcm_hw_params\n"); ++ ++ if (!astream->buffer_cnt || !astream->buffers) ++ return -EINVAL; ++ ++ substream->runtime->dma_area = astream->buffers[0].ptr; ++ substream->runtime->dma_addr = astream->buffers[0].dma_addr; ++ substream->runtime->dma_bytes = astream->buffers[0].size; ++ return 0; ++} ++ ++static int aaudio_pcm_hw_free(struct snd_pcm_substream *substream) ++{ ++ pr_debug("aaudio_pcm_hw_free\n"); ++ return 0; ++} ++ ++static void aaudio_pcm_start(struct snd_pcm_substream *substream) ++{ ++ struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream); ++ struct aaudio_stream *stream = aaudio_pcm_stream(substream); ++ void *buf; ++ size_t s; ++ ktime_t time_start, time_end; ++ bool back_buffer; ++ time_start = ktime_get(); ++ ++ back_buffer = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK); ++ ++ if (back_buffer) { ++ s = frames_to_bytes(substream->runtime, substream->runtime->control->appl_ptr); ++ buf = kmalloc(s, GFP_KERNEL); ++ memcpy_fromio(buf, substream->runtime->dma_area, s); ++ time_end = ktime_get(); ++ pr_debug("aaudio: Backed up the buffer in %lluns [%li]\n", ktime_to_ns(time_end - time_start), ++ substream->runtime->control->appl_ptr); ++ } ++ ++ stream->waiting_for_first_ts = true; ++ stream->frame_min = stream->latency; ++ ++ aaudio_cmd_start_io(sdev->a, sdev->dev_id); ++ if (back_buffer) ++ memcpy_toio(substream->runtime->dma_area, buf, s); ++ ++ time_end = ktime_get(); ++ pr_debug("aaudio: Started the audio device in %lluns\n", ktime_to_ns(time_end - time_start)); ++} ++ ++static int aaudio_pcm_trigger(struct snd_pcm_substream *substream, int cmd) ++{ ++ struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream); ++ struct aaudio_stream *stream = aaudio_pcm_stream(substream); ++ pr_debug("aaudio_pcm_trigger %x\n", cmd); ++ ++ /* We only supports triggers on the #0 buffer */ ++ if (substream->number != 0) ++ return 0; ++ switch (cmd) { ++ case SNDRV_PCM_TRIGGER_START: ++ aaudio_pcm_start(substream); ++ stream->started = 1; ++ break; ++ case SNDRV_PCM_TRIGGER_STOP: ++ aaudio_cmd_stop_io(sdev->a, sdev->dev_id); ++ stream->started = 0; ++ break; ++ default: ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static snd_pcm_uframes_t aaudio_pcm_pointer(struct snd_pcm_substream *substream) ++{ ++ struct aaudio_stream *stream = aaudio_pcm_stream(substream); ++ ktime_t time_from_start; ++ snd_pcm_sframes_t frames; ++ snd_pcm_sframes_t buffer_time_length; ++ ++ if (!stream->started || stream->waiting_for_first_ts) { ++ pr_warn("aaudio_pcm_pointer while not started\n"); ++ return 0; ++ } ++ ++ /* Approximate the pointer based on the last received timestamp */ ++ time_from_start = ktime_get_boottime() - stream->remote_timestamp; ++ buffer_time_length = NSEC_PER_SEC * substream->runtime->buffer_size / substream->runtime->rate; ++ frames = (ktime_to_ns(time_from_start) % buffer_time_length) * substream->runtime->buffer_size / buffer_time_length; ++ if (ktime_to_ns(time_from_start) < buffer_time_length) { ++ if (frames < stream->frame_min) ++ frames = stream->frame_min; ++ else ++ stream->frame_min = 0; ++ } else { ++ if (ktime_to_ns(time_from_start) < 2 * buffer_time_length) ++ stream->frame_min = frames; ++ else ++ stream->frame_min = 0; /* Heavy desync */ ++ } ++ frames -= stream->latency; ++ if (frames < 0) ++ frames += ((-frames - 1) / substream->runtime->buffer_size + 1) * substream->runtime->buffer_size; ++ return (snd_pcm_uframes_t) frames; ++} ++ ++static struct snd_pcm_ops aaudio_pcm_ops = { ++ .open = aaudio_pcm_open, ++ .close = aaudio_pcm_close, ++ .ioctl = snd_pcm_lib_ioctl, ++ .hw_params = aaudio_pcm_hw_params, ++ .hw_free = aaudio_pcm_hw_free, ++ .prepare = aaudio_pcm_prepare, ++ .trigger = aaudio_pcm_trigger, ++ .pointer = aaudio_pcm_pointer, ++ .mmap = snd_pcm_lib_mmap_iomem ++}; ++ ++int aaudio_create_pcm(struct aaudio_subdevice *sdev) ++{ ++ struct snd_pcm *pcm; ++ struct aaudio_alsa_pcm_id_mapping *id_mapping; ++ int err; ++ ++ if (!sdev->is_pcm || (sdev->in_stream_cnt == 0 && sdev->out_stream_cnt == 0)) { ++ return -EINVAL; ++ } ++ ++ for (id_mapping = aaudio_alsa_id_mappings; id_mapping->name; id_mapping++) { ++ if (!strcmp(sdev->uid, id_mapping->name)) { ++ sdev->alsa_id = id_mapping->alsa_id; ++ break; ++ } ++ } ++ if (!id_mapping->name) ++ sdev->alsa_id = sdev->a->next_alsa_id++; ++ err = snd_pcm_new(sdev->a->card, sdev->uid, sdev->alsa_id, ++ (int) sdev->out_stream_cnt, (int) sdev->in_stream_cnt, &pcm); ++ if (err < 0) ++ return err; ++ pcm->private_data = sdev; ++ pcm->nonatomic = 1; ++ sdev->pcm = pcm; ++ strcpy(pcm->name, sdev->uid); ++ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &aaudio_pcm_ops); ++ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &aaudio_pcm_ops); ++ return 0; ++} ++ ++static void aaudio_handle_stream_timestamp(struct snd_pcm_substream *substream, ktime_t timestamp) ++{ ++ unsigned long flags; ++ struct aaudio_stream *stream; ++ ++ stream = aaudio_pcm_stream(substream); ++ snd_pcm_stream_lock_irqsave(substream, flags); ++ stream->remote_timestamp = timestamp; ++ if (stream->waiting_for_first_ts) { ++ stream->waiting_for_first_ts = false; ++ snd_pcm_stream_unlock_irqrestore(substream, flags); ++ return; ++ } ++ snd_pcm_stream_unlock_irqrestore(substream, flags); ++ snd_pcm_period_elapsed(substream); ++} ++ ++void aaudio_handle_timestamp(struct aaudio_subdevice *sdev, ktime_t os_timestamp, u64 dev_timestamp) ++{ ++ struct snd_pcm_substream *substream; ++ ++ substream = sdev->pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream; ++ if (substream) ++ aaudio_handle_stream_timestamp(substream, dev_timestamp); ++ substream = sdev->pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream; ++ if (substream) ++ aaudio_handle_stream_timestamp(substream, os_timestamp); ++} +diff --git a/drivers/staging/apple-bce/audio/pcm.h b/drivers/staging/apple-bce/audio/pcm.h +new file mode 100644 +index 000000000..ea5f35fbe +--- /dev/null ++++ b/drivers/staging/apple-bce/audio/pcm.h +@@ -0,0 +1,16 @@ ++#ifndef AAUDIO_PCM_H ++#define AAUDIO_PCM_H ++ ++#include ++#include ++ ++struct aaudio_subdevice; ++struct aaudio_apple_description; ++struct snd_pcm_hardware; ++ ++int aaudio_create_hw_info(struct aaudio_apple_description *desc, struct snd_pcm_hardware *alsa_hw, size_t buf_size); ++int aaudio_create_pcm(struct aaudio_subdevice *sdev); ++ ++void aaudio_handle_timestamp(struct aaudio_subdevice *sdev, ktime_t os_timestamp, u64 dev_timestamp); ++ ++#endif //AAUDIO_PCM_H +diff --git a/drivers/staging/apple-bce/audio/protocol.c b/drivers/staging/apple-bce/audio/protocol.c +new file mode 100644 +index 000000000..2314813ae +--- /dev/null ++++ b/drivers/staging/apple-bce/audio/protocol.c +@@ -0,0 +1,347 @@ ++#include "protocol.h" ++#include "protocol_bce.h" ++#include "audio.h" ++ ++int aaudio_msg_read_base(struct aaudio_msg *msg, struct aaudio_msg_base *base) ++{ ++ if (msg->size < sizeof(struct aaudio_msg_header) + sizeof(struct aaudio_msg_base) * 2) ++ return -EINVAL; ++ *base = *((struct aaudio_msg_base *) ((struct aaudio_msg_header *) msg->data + 1)); ++ return 0; ++} ++ ++#define READ_START(type) \ ++ size_t offset = sizeof(struct aaudio_msg_header) + sizeof(struct aaudio_msg_base); (void)offset; \ ++ if (((struct aaudio_msg_base *) ((struct aaudio_msg_header *) msg->data + 1))->msg != type) \ ++ return -EINVAL; ++#define READ_DEVID_VAR(devid) *devid = ((struct aaudio_msg_header *) msg->data)->device_id ++#define READ_VAL(type) ({ offset += sizeof(type); *((type *) ((u8 *) msg->data + offset - sizeof(type))); }) ++#define READ_VAR(type, var) *var = READ_VAL(type) ++ ++int aaudio_msg_read_start_io_response(struct aaudio_msg *msg) ++{ ++ READ_START(AAUDIO_MSG_START_IO_RESPONSE); ++ return 0; ++} ++ ++int aaudio_msg_read_stop_io_response(struct aaudio_msg *msg) ++{ ++ READ_START(AAUDIO_MSG_STOP_IO_RESPONSE); ++ return 0; ++} ++ ++int aaudio_msg_read_update_timestamp(struct aaudio_msg *msg, aaudio_device_id_t *devid, ++ u64 *timestamp, u64 *update_seed) ++{ ++ READ_START(AAUDIO_MSG_UPDATE_TIMESTAMP); ++ READ_DEVID_VAR(devid); ++ READ_VAR(u64, timestamp); ++ READ_VAR(u64, update_seed); ++ return 0; ++} ++ ++int aaudio_msg_read_get_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj, ++ struct aaudio_prop_addr *prop, void **data, u64 *data_size) ++{ ++ READ_START(AAUDIO_MSG_GET_PROPERTY_RESPONSE); ++ READ_VAR(aaudio_object_id_t, obj); ++ READ_VAR(u32, &prop->element); ++ READ_VAR(u32, &prop->scope); ++ READ_VAR(u32, &prop->selector); ++ READ_VAR(u64, data_size); ++ *data = ((u8 *) msg->data + offset); ++ /* offset += data_size; */ ++ return 0; ++} ++ ++int aaudio_msg_read_set_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj) ++{ ++ READ_START(AAUDIO_MSG_SET_PROPERTY_RESPONSE); ++ READ_VAR(aaudio_object_id_t, obj); ++ return 0; ++} ++ ++int aaudio_msg_read_property_listener_response(struct aaudio_msg *msg, aaudio_object_id_t *obj, ++ struct aaudio_prop_addr *prop) ++{ ++ READ_START(AAUDIO_MSG_PROPERTY_LISTENER_RESPONSE); ++ READ_VAR(aaudio_object_id_t, obj); ++ READ_VAR(u32, &prop->element); ++ READ_VAR(u32, &prop->scope); ++ READ_VAR(u32, &prop->selector); ++ return 0; ++} ++ ++int aaudio_msg_read_property_changed(struct aaudio_msg *msg, aaudio_device_id_t *devid, aaudio_object_id_t *obj, ++ struct aaudio_prop_addr *prop) ++{ ++ READ_START(AAUDIO_MSG_PROPERTY_CHANGED); ++ READ_DEVID_VAR(devid); ++ READ_VAR(aaudio_object_id_t, obj); ++ READ_VAR(u32, &prop->element); ++ READ_VAR(u32, &prop->scope); ++ READ_VAR(u32, &prop->selector); ++ return 0; ++} ++ ++int aaudio_msg_read_set_input_stream_address_ranges_response(struct aaudio_msg *msg) ++{ ++ READ_START(AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES_RESPONSE); ++ return 0; ++} ++ ++int aaudio_msg_read_get_input_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt) ++{ ++ READ_START(AAUDIO_MSG_GET_INPUT_STREAM_LIST_RESPONSE); ++ READ_VAR(u64, str_cnt); ++ *str_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset); ++ /* offset += str_cnt * sizeof(aaudio_object_id_t); */ ++ return 0; ++} ++ ++int aaudio_msg_read_get_output_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt) ++{ ++ READ_START(AAUDIO_MSG_GET_OUTPUT_STREAM_LIST_RESPONSE); ++ READ_VAR(u64, str_cnt); ++ *str_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset); ++ /* offset += str_cnt * sizeof(aaudio_object_id_t); */ ++ return 0; ++} ++ ++int aaudio_msg_read_set_remote_access_response(struct aaudio_msg *msg) ++{ ++ READ_START(AAUDIO_MSG_SET_REMOTE_ACCESS_RESPONSE); ++ return 0; ++} ++ ++int aaudio_msg_read_get_device_list_response(struct aaudio_msg *msg, aaudio_device_id_t **dev_l, u64 *dev_cnt) ++{ ++ READ_START(AAUDIO_MSG_GET_DEVICE_LIST_RESPONSE); ++ READ_VAR(u64, dev_cnt); ++ *dev_l = (aaudio_device_id_t *) ((u8 *) msg->data + offset); ++ /* offset += dev_cnt * sizeof(aaudio_device_id_t); */ ++ return 0; ++} ++ ++#define WRITE_START_OF_TYPE(typev, devid) \ ++ size_t offset = sizeof(struct aaudio_msg_header); (void) offset; \ ++ ((struct aaudio_msg_header *) msg->data)->type = (typev); \ ++ ((struct aaudio_msg_header *) msg->data)->device_id = (devid); ++#define WRITE_START_COMMAND(devid) WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_COMMAND, devid) ++#define WRITE_START_RESPONSE() WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_RESPONSE, 0) ++#define WRITE_START_NOTIFICATION() WRITE_START_OF_TYPE(AAUDIO_MSG_TYPE_NOTIFICATION, 0) ++#define WRITE_VAL(type, value) { *((type *) ((u8 *) msg->data + offset)) = value; offset += sizeof(value); } ++#define WRITE_BIN(value, size) { memcpy((u8 *) msg->data + offset, value, size); offset += size; } ++#define WRITE_BASE(type) WRITE_VAL(u32, type) WRITE_VAL(u32, 0) ++#define WRITE_END() { msg->size = offset; } ++ ++void aaudio_msg_write_start_io(struct aaudio_msg *msg, aaudio_device_id_t dev) ++{ ++ WRITE_START_COMMAND(dev); ++ WRITE_BASE(AAUDIO_MSG_START_IO); ++ WRITE_END(); ++} ++ ++void aaudio_msg_write_stop_io(struct aaudio_msg *msg, aaudio_device_id_t dev) ++{ ++ WRITE_START_COMMAND(dev); ++ WRITE_BASE(AAUDIO_MSG_STOP_IO); ++ WRITE_END(); ++} ++ ++void aaudio_msg_write_get_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj, ++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size) ++{ ++ WRITE_START_COMMAND(dev); ++ WRITE_BASE(AAUDIO_MSG_GET_PROPERTY); ++ WRITE_VAL(aaudio_object_id_t, obj); ++ WRITE_VAL(u32, prop.element); ++ WRITE_VAL(u32, prop.scope); ++ WRITE_VAL(u32, prop.selector); ++ WRITE_VAL(u64, qualifier_size); ++ WRITE_BIN(qualifier, qualifier_size); ++ WRITE_END(); ++} ++ ++void aaudio_msg_write_set_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj, ++ struct aaudio_prop_addr prop, void *data, u64 data_size, void *qualifier, u64 qualifier_size) ++{ ++ WRITE_START_COMMAND(dev); ++ WRITE_BASE(AAUDIO_MSG_SET_PROPERTY); ++ WRITE_VAL(aaudio_object_id_t, obj); ++ WRITE_VAL(u32, prop.element); ++ WRITE_VAL(u32, prop.scope); ++ WRITE_VAL(u32, prop.selector); ++ WRITE_VAL(u64, data_size); ++ WRITE_BIN(data, data_size); ++ WRITE_VAL(u64, qualifier_size); ++ WRITE_BIN(qualifier, qualifier_size); ++ WRITE_END(); ++} ++ ++void aaudio_msg_write_property_listener(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj, ++ struct aaudio_prop_addr prop) ++{ ++ WRITE_START_COMMAND(dev); ++ WRITE_BASE(AAUDIO_MSG_PROPERTY_LISTENER); ++ WRITE_VAL(aaudio_object_id_t, obj); ++ WRITE_VAL(u32, prop.element); ++ WRITE_VAL(u32, prop.scope); ++ WRITE_VAL(u32, prop.selector); ++ WRITE_END(); ++} ++ ++void aaudio_msg_write_set_input_stream_address_ranges(struct aaudio_msg *msg, aaudio_device_id_t devid) ++{ ++ WRITE_START_COMMAND(devid); ++ WRITE_BASE(AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES); ++ WRITE_END(); ++} ++ ++void aaudio_msg_write_get_input_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid) ++{ ++ WRITE_START_COMMAND(devid); ++ WRITE_BASE(AAUDIO_MSG_GET_INPUT_STREAM_LIST); ++ WRITE_END(); ++} ++ ++void aaudio_msg_write_get_output_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid) ++{ ++ WRITE_START_COMMAND(devid); ++ WRITE_BASE(AAUDIO_MSG_GET_OUTPUT_STREAM_LIST); ++ WRITE_END(); ++} ++ ++void aaudio_msg_write_set_remote_access(struct aaudio_msg *msg, u64 mode) ++{ ++ WRITE_START_COMMAND(0); ++ WRITE_BASE(AAUDIO_MSG_SET_REMOTE_ACCESS); ++ WRITE_VAL(u64, mode); ++ WRITE_END(); ++} ++ ++void aaudio_msg_write_alive_notification(struct aaudio_msg *msg, u32 proto_ver, u32 msg_ver) ++{ ++ WRITE_START_NOTIFICATION(); ++ WRITE_BASE(AAUDIO_MSG_NOTIFICATION_ALIVE); ++ WRITE_VAL(u32, proto_ver); ++ WRITE_VAL(u32, msg_ver); ++ WRITE_END(); ++} ++ ++void aaudio_msg_write_update_timestamp_response(struct aaudio_msg *msg) ++{ ++ WRITE_START_RESPONSE(); ++ WRITE_BASE(AAUDIO_MSG_UPDATE_TIMESTAMP_RESPONSE); ++ WRITE_END(); ++} ++ ++void aaudio_msg_write_get_device_list(struct aaudio_msg *msg) ++{ ++ WRITE_START_COMMAND(0); ++ WRITE_BASE(AAUDIO_MSG_GET_DEVICE_LIST); ++ WRITE_END(); ++} ++ ++#define CMD_SHARED_VARS_NO_REPLY \ ++ int status = 0; \ ++ struct aaudio_send_ctx sctx; ++#define CMD_SHARED_VARS \ ++ CMD_SHARED_VARS_NO_REPLY \ ++ struct aaudio_msg reply = aaudio_reply_alloc(); \ ++ struct aaudio_msg *buf = &reply; ++#define CMD_SEND_REQUEST(fn, ...) \ ++ if ((status = aaudio_send_cmd_sync(a, &sctx, buf, 500, fn, ##__VA_ARGS__))) \ ++ return status; ++#define CMD_DEF_SHARED_AND_SEND(fn, ...) \ ++ CMD_SHARED_VARS \ ++ CMD_SEND_REQUEST(fn, ##__VA_ARGS__); ++#define CMD_DEF_SHARED_NO_REPLY_AND_SEND(fn, ...) \ ++ CMD_SHARED_VARS_NO_REPLY \ ++ CMD_SEND_REQUEST(fn, ##__VA_ARGS__); ++#define CMD_HNDL_REPLY_NO_FREE(fn, ...) \ ++ status = fn(buf, ##__VA_ARGS__); \ ++ return status; ++#define CMD_HNDL_REPLY_AND_FREE(fn, ...) \ ++ status = fn(buf, ##__VA_ARGS__); \ ++ aaudio_reply_free(&reply); \ ++ return status; ++ ++int aaudio_cmd_start_io(struct aaudio_device *a, aaudio_device_id_t devid) ++{ ++ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_start_io, devid); ++ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_start_io_response); ++} ++int aaudio_cmd_stop_io(struct aaudio_device *a, aaudio_device_id_t devid) ++{ ++ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_stop_io, devid); ++ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_stop_io_response); ++} ++int aaudio_cmd_get_property(struct aaudio_device *a, struct aaudio_msg *buf, ++ aaudio_device_id_t devid, aaudio_object_id_t obj, ++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void **data, u64 *data_size) ++{ ++ CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_property, devid, obj, prop, qualifier, qualifier_size); ++ CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_property_response, &obj, &prop, data, data_size); ++} ++int aaudio_cmd_get_primitive_property(struct aaudio_device *a, ++ aaudio_device_id_t devid, aaudio_object_id_t obj, ++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size) ++{ ++ int status; ++ struct aaudio_msg reply = aaudio_reply_alloc(); ++ void *r_data; ++ u64 r_data_size; ++ if ((status = aaudio_cmd_get_property(a, &reply, devid, obj, prop, qualifier, qualifier_size, ++ &r_data, &r_data_size))) ++ goto finish; ++ if (r_data_size != data_size) { ++ status = -EINVAL; ++ goto finish; ++ } ++ memcpy(data, r_data, data_size); ++finish: ++ aaudio_reply_free(&reply); ++ return status; ++} ++int aaudio_cmd_set_property(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj, ++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size) ++{ ++ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_property, devid, obj, prop, data, data_size, ++ qualifier, qualifier_size); ++ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_property_response, &obj); ++} ++int aaudio_cmd_property_listener(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj, ++ struct aaudio_prop_addr prop) ++{ ++ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_property_listener, devid, obj, prop); ++ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_property_listener_response, &obj, &prop); ++} ++int aaudio_cmd_set_input_stream_address_ranges(struct aaudio_device *a, aaudio_device_id_t devid) ++{ ++ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_input_stream_address_ranges, devid); ++ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_input_stream_address_ranges_response); ++} ++int aaudio_cmd_get_input_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid, ++ aaudio_object_id_t **str_l, u64 *str_cnt) ++{ ++ CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_input_stream_list, devid); ++ CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_input_stream_list_response, str_l, str_cnt); ++} ++int aaudio_cmd_get_output_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid, ++ aaudio_object_id_t **str_l, u64 *str_cnt) ++{ ++ CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_output_stream_list, devid); ++ CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_output_stream_list_response, str_l, str_cnt); ++} ++int aaudio_cmd_set_remote_access(struct aaudio_device *a, u64 mode) ++{ ++ CMD_DEF_SHARED_AND_SEND(aaudio_msg_write_set_remote_access, mode); ++ CMD_HNDL_REPLY_AND_FREE(aaudio_msg_read_set_remote_access_response); ++} ++int aaudio_cmd_get_device_list(struct aaudio_device *a, struct aaudio_msg *buf, ++ aaudio_device_id_t **dev_l, u64 *dev_cnt) ++{ ++ CMD_DEF_SHARED_NO_REPLY_AND_SEND(aaudio_msg_write_get_device_list); ++ CMD_HNDL_REPLY_NO_FREE(aaudio_msg_read_get_device_list_response, dev_l, dev_cnt); ++} +\ No newline at end of file +diff --git a/drivers/staging/apple-bce/audio/protocol.h b/drivers/staging/apple-bce/audio/protocol.h +new file mode 100644 +index 000000000..3427486f3 +--- /dev/null ++++ b/drivers/staging/apple-bce/audio/protocol.h +@@ -0,0 +1,147 @@ ++#ifndef AAUDIO_PROTOCOL_H ++#define AAUDIO_PROTOCOL_H ++ ++#include ++ ++struct aaudio_device; ++ ++typedef u64 aaudio_device_id_t; ++typedef u64 aaudio_object_id_t; ++ ++struct aaudio_msg { ++ void *data; ++ size_t size; ++}; ++ ++struct __attribute__((packed)) aaudio_msg_header { ++ char tag[4]; ++ u8 type; ++ aaudio_device_id_t device_id; // Idk, use zero for commands? ++}; ++struct __attribute__((packed)) aaudio_msg_base { ++ u32 msg; ++ u32 status; ++}; ++ ++struct aaudio_prop_addr { ++ u32 scope; ++ u32 selector; ++ u32 element; ++}; ++#define AAUDIO_PROP(scope, sel, el) (struct aaudio_prop_addr) { scope, sel, el } ++ ++enum { ++ AAUDIO_MSG_TYPE_COMMAND = 1, ++ AAUDIO_MSG_TYPE_RESPONSE = 2, ++ AAUDIO_MSG_TYPE_NOTIFICATION = 3 ++}; ++ ++enum { ++ AAUDIO_MSG_START_IO = 0, ++ AAUDIO_MSG_START_IO_RESPONSE = 1, ++ AAUDIO_MSG_STOP_IO = 2, ++ AAUDIO_MSG_STOP_IO_RESPONSE = 3, ++ AAUDIO_MSG_UPDATE_TIMESTAMP = 4, ++ AAUDIO_MSG_GET_PROPERTY = 7, ++ AAUDIO_MSG_GET_PROPERTY_RESPONSE = 8, ++ AAUDIO_MSG_SET_PROPERTY = 9, ++ AAUDIO_MSG_SET_PROPERTY_RESPONSE = 10, ++ AAUDIO_MSG_PROPERTY_LISTENER = 11, ++ AAUDIO_MSG_PROPERTY_LISTENER_RESPONSE = 12, ++ AAUDIO_MSG_PROPERTY_CHANGED = 13, ++ AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES = 18, ++ AAUDIO_MSG_SET_INPUT_STREAM_ADDRESS_RANGES_RESPONSE = 19, ++ AAUDIO_MSG_GET_INPUT_STREAM_LIST = 24, ++ AAUDIO_MSG_GET_INPUT_STREAM_LIST_RESPONSE = 25, ++ AAUDIO_MSG_GET_OUTPUT_STREAM_LIST = 26, ++ AAUDIO_MSG_GET_OUTPUT_STREAM_LIST_RESPONSE = 27, ++ AAUDIO_MSG_SET_REMOTE_ACCESS = 32, ++ AAUDIO_MSG_SET_REMOTE_ACCESS_RESPONSE = 33, ++ AAUDIO_MSG_UPDATE_TIMESTAMP_RESPONSE = 34, ++ ++ AAUDIO_MSG_NOTIFICATION_ALIVE = 100, ++ AAUDIO_MSG_GET_DEVICE_LIST = 101, ++ AAUDIO_MSG_GET_DEVICE_LIST_RESPONSE = 102, ++ AAUDIO_MSG_NOTIFICATION_BOOT = 104 ++}; ++ ++enum { ++ AAUDIO_REMOTE_ACCESS_OFF = 0, ++ AAUDIO_REMOTE_ACCESS_ON = 2 ++}; ++ ++enum { ++ AAUDIO_PROP_SCOPE_GLOBAL = 0x676c6f62, // 'glob' ++ AAUDIO_PROP_SCOPE_INPUT = 0x696e7074, // 'inpt' ++ AAUDIO_PROP_SCOPE_OUTPUT = 0x6f757470 // 'outp' ++}; ++ ++enum { ++ AAUDIO_PROP_UID = 0x75696420, // 'uid ' ++ AAUDIO_PROP_BOOL_VALUE = 0x6263766c, // 'bcvl' ++ AAUDIO_PROP_JACK_PLUGGED = 0x6a61636b, // 'jack' ++ AAUDIO_PROP_SEL_VOLUME = 0x64656176, // 'deav' ++ AAUDIO_PROP_LATENCY = 0x6c746e63, // 'ltnc' ++ AAUDIO_PROP_PHYS_FORMAT = 0x70667420 // 'pft ' ++}; ++ ++int aaudio_msg_read_base(struct aaudio_msg *msg, struct aaudio_msg_base *base); ++ ++int aaudio_msg_read_start_io_response(struct aaudio_msg *msg); ++int aaudio_msg_read_stop_io_response(struct aaudio_msg *msg); ++int aaudio_msg_read_update_timestamp(struct aaudio_msg *msg, aaudio_device_id_t *devid, ++ u64 *timestamp, u64 *update_seed); ++int aaudio_msg_read_get_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj, ++ struct aaudio_prop_addr *prop, void **data, u64 *data_size); ++int aaudio_msg_read_set_property_response(struct aaudio_msg *msg, aaudio_object_id_t *obj); ++int aaudio_msg_read_property_listener_response(struct aaudio_msg *msg,aaudio_object_id_t *obj, ++ struct aaudio_prop_addr *prop); ++int aaudio_msg_read_property_changed(struct aaudio_msg *msg, aaudio_device_id_t *devid, aaudio_object_id_t *obj, ++ struct aaudio_prop_addr *prop); ++int aaudio_msg_read_set_input_stream_address_ranges_response(struct aaudio_msg *msg); ++int aaudio_msg_read_get_input_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt); ++int aaudio_msg_read_get_output_stream_list_response(struct aaudio_msg *msg, aaudio_object_id_t **str_l, u64 *str_cnt); ++int aaudio_msg_read_set_remote_access_response(struct aaudio_msg *msg); ++int aaudio_msg_read_get_device_list_response(struct aaudio_msg *msg, aaudio_device_id_t **dev_l, u64 *dev_cnt); ++ ++void aaudio_msg_write_start_io(struct aaudio_msg *msg, aaudio_device_id_t dev); ++void aaudio_msg_write_stop_io(struct aaudio_msg *msg, aaudio_device_id_t dev); ++void aaudio_msg_write_get_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj, ++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size); ++void aaudio_msg_write_set_property(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj, ++ struct aaudio_prop_addr prop, void *data, u64 data_size, void *qualifier, u64 qualifier_size); ++void aaudio_msg_write_property_listener(struct aaudio_msg *msg, aaudio_device_id_t dev, aaudio_object_id_t obj, ++ struct aaudio_prop_addr prop); ++void aaudio_msg_write_set_input_stream_address_ranges(struct aaudio_msg *msg, aaudio_device_id_t devid); ++void aaudio_msg_write_get_input_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid); ++void aaudio_msg_write_get_output_stream_list(struct aaudio_msg *msg, aaudio_device_id_t devid); ++void aaudio_msg_write_set_remote_access(struct aaudio_msg *msg, u64 mode); ++void aaudio_msg_write_alive_notification(struct aaudio_msg *msg, u32 proto_ver, u32 msg_ver); ++void aaudio_msg_write_update_timestamp_response(struct aaudio_msg *msg); ++void aaudio_msg_write_get_device_list(struct aaudio_msg *msg); ++ ++ ++int aaudio_cmd_start_io(struct aaudio_device *a, aaudio_device_id_t devid); ++int aaudio_cmd_stop_io(struct aaudio_device *a, aaudio_device_id_t devid); ++int aaudio_cmd_get_property(struct aaudio_device *a, struct aaudio_msg *buf, ++ aaudio_device_id_t devid, aaudio_object_id_t obj, ++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void **data, u64 *data_size); ++int aaudio_cmd_get_primitive_property(struct aaudio_device *a, ++ aaudio_device_id_t devid, aaudio_object_id_t obj, ++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size); ++int aaudio_cmd_set_property(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj, ++ struct aaudio_prop_addr prop, void *qualifier, u64 qualifier_size, void *data, u64 data_size); ++int aaudio_cmd_property_listener(struct aaudio_device *a, aaudio_device_id_t devid, aaudio_object_id_t obj, ++ struct aaudio_prop_addr prop); ++int aaudio_cmd_set_input_stream_address_ranges(struct aaudio_device *a, aaudio_device_id_t devid); ++int aaudio_cmd_get_input_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid, ++ aaudio_object_id_t **str_l, u64 *str_cnt); ++int aaudio_cmd_get_output_stream_list(struct aaudio_device *a, struct aaudio_msg *buf, aaudio_device_id_t devid, ++ aaudio_object_id_t **str_l, u64 *str_cnt); ++int aaudio_cmd_set_remote_access(struct aaudio_device *a, u64 mode); ++int aaudio_cmd_get_device_list(struct aaudio_device *a, struct aaudio_msg *buf, ++ aaudio_device_id_t **dev_l, u64 *dev_cnt); ++ ++ ++ ++#endif //AAUDIO_PROTOCOL_H +diff --git a/drivers/staging/apple-bce/audio/protocol_bce.c b/drivers/staging/apple-bce/audio/protocol_bce.c +new file mode 100644 +index 000000000..28f2dfd44 +--- /dev/null ++++ b/drivers/staging/apple-bce/audio/protocol_bce.c +@@ -0,0 +1,226 @@ ++#include "protocol_bce.h" ++ ++#include "audio.h" ++ ++static void aaudio_bce_out_queue_completion(struct bce_queue_sq *sq); ++static void aaudio_bce_in_queue_completion(struct bce_queue_sq *sq); ++static int aaudio_bce_queue_init(struct aaudio_device *dev, struct aaudio_bce_queue *q, const char *name, int direction, ++ bce_sq_completion cfn); ++void aaudio_bce_in_queue_submit_pending(struct aaudio_bce_queue *q, size_t count); ++ ++int aaudio_bce_init(struct aaudio_device *dev) ++{ ++ int status; ++ struct aaudio_bce *bce = &dev->bcem; ++ bce->cq = bce_create_cq(dev->bce, 0x80); ++ spin_lock_init(&bce->spinlock); ++ if (!bce->cq) ++ return -EINVAL; ++ if ((status = aaudio_bce_queue_init(dev, &bce->qout, "com.apple.BridgeAudio.IntelToARM", DMA_TO_DEVICE, ++ aaudio_bce_out_queue_completion))) { ++ return status; ++ } ++ if ((status = aaudio_bce_queue_init(dev, &bce->qin, "com.apple.BridgeAudio.ARMToIntel", DMA_FROM_DEVICE, ++ aaudio_bce_in_queue_completion))) { ++ return status; ++ } ++ aaudio_bce_in_queue_submit_pending(&bce->qin, bce->qin.el_count); ++ return 0; ++} ++ ++int aaudio_bce_queue_init(struct aaudio_device *dev, struct aaudio_bce_queue *q, const char *name, int direction, ++ bce_sq_completion cfn) ++{ ++ q->cq = dev->bcem.cq; ++ q->el_size = AAUDIO_BCE_QUEUE_ELEMENT_SIZE; ++ q->el_count = AAUDIO_BCE_QUEUE_ELEMENT_COUNT; ++ /* NOTE: The Apple impl uses 0x80 as the queue size, however we use 21 (in fact 20) to simplify the impl */ ++ q->sq = bce_create_sq(dev->bce, q->cq, name, (u32) (q->el_count + 1), direction, cfn, dev); ++ if (!q->sq) ++ return -EINVAL; ++ ++ q->data = dma_alloc_coherent(&dev->bce->pci->dev, q->el_size * q->el_count, &q->dma_addr, GFP_KERNEL); ++ if (!q->data) { ++ bce_destroy_sq(dev->bce, q->sq); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static void aaudio_send_create_tag(struct aaudio_bce *b, int *tagn, char tag[4]) ++{ ++ char tag_zero[5]; ++ b->tag_num = (b->tag_num + 1) % AAUDIO_BCE_QUEUE_TAG_COUNT; ++ *tagn = b->tag_num; ++ snprintf(tag_zero, 5, "S%03d", b->tag_num); ++ *((u32 *) tag) = *((u32 *) tag_zero); ++} ++ ++int __aaudio_send_prepare(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, char *tag) ++{ ++ int status; ++ size_t index; ++ void *dptr; ++ struct aaudio_msg_header *header; ++ if ((status = bce_reserve_submission(b->qout.sq, &ctx->timeout))) ++ return status; ++ spin_lock_irqsave(&b->spinlock, ctx->irq_flags); ++ index = b->qout.data_tail; ++ dptr = (u8 *) b->qout.data + index * b->qout.el_size; ++ ctx->msg.data = dptr; ++ header = dptr; ++ if (tag) ++ *((u32 *) header->tag) = *((u32 *) tag); ++ else ++ aaudio_send_create_tag(b, &ctx->tag_n, header->tag); ++ return 0; ++} ++ ++void __aaudio_send(struct aaudio_bce *b, struct aaudio_send_ctx *ctx) ++{ ++ struct bce_qe_submission *s = bce_next_submission(b->qout.sq); ++#ifdef DEBUG ++ pr_debug("aaudio: Sending command data\n"); ++ print_hex_dump(KERN_DEBUG, "aaudio:OUT ", DUMP_PREFIX_NONE, 32, 1, ctx->msg.data, ctx->msg.size, true); ++#endif ++ bce_set_submission_single(s, b->qout.dma_addr + (dma_addr_t) (ctx->msg.data - b->qout.data), ctx->msg.size); ++ bce_submit_to_device(b->qout.sq); ++ b->qout.data_tail = (b->qout.data_tail + 1) % b->qout.el_count; ++ spin_unlock_irqrestore(&b->spinlock, ctx->irq_flags); ++} ++ ++int __aaudio_send_cmd_sync(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, struct aaudio_msg *reply) ++{ ++ struct aaudio_bce_queue_entry ent; ++ DECLARE_COMPLETION_ONSTACK(cmpl); ++ ent.msg = reply; ++ ent.cmpl = &cmpl; ++ b->pending_entries[ctx->tag_n] = &ent; ++ __aaudio_send(b, ctx); /* unlocks the spinlock */ ++ ctx->timeout = wait_for_completion_timeout(&cmpl, ctx->timeout); ++ if (ctx->timeout == 0) { ++ /* Remove the pending queue entry; this will be normally handled by the completion route but ++ * during a timeout it won't */ ++ spin_lock_irqsave(&b->spinlock, ctx->irq_flags); ++ if (b->pending_entries[ctx->tag_n] == &ent) ++ b->pending_entries[ctx->tag_n] = NULL; ++ spin_unlock_irqrestore(&b->spinlock, ctx->irq_flags); ++ return -ETIMEDOUT; ++ } ++ return 0; ++} ++ ++static void aaudio_handle_reply(struct aaudio_bce *b, struct aaudio_msg *reply) ++{ ++ const char *tag; ++ int tagn; ++ unsigned long irq_flags; ++ char tag_zero[5]; ++ struct aaudio_bce_queue_entry *entry; ++ ++ tag = ((struct aaudio_msg_header *) reply->data)->tag; ++ if (tag[0] != 'S') { ++ pr_err("aaudio_handle_reply: Unexpected tag: %.4s\n", tag); ++ return; ++ } ++ *((u32 *) tag_zero) = *((u32 *) tag); ++ tag_zero[4] = 0; ++ if (kstrtoint(&tag_zero[1], 10, &tagn)) { ++ pr_err("aaudio_handle_reply: Tag parse failed: %.4s\n", tag); ++ return; ++ } ++ ++ spin_lock_irqsave(&b->spinlock, irq_flags); ++ entry = b->pending_entries[tagn]; ++ if (entry) { ++ if (reply->size < entry->msg->size) ++ entry->msg->size = reply->size; ++ memcpy(entry->msg->data, reply->data, entry->msg->size); ++ complete(entry->cmpl); ++ ++ b->pending_entries[tagn] = NULL; ++ } else { ++ pr_err("aaudio_handle_reply: No queued item found for tag: %.4s\n", tag); ++ } ++ spin_unlock_irqrestore(&b->spinlock, irq_flags); ++} ++ ++static void aaudio_bce_out_queue_completion(struct bce_queue_sq *sq) ++{ ++ while (bce_next_completion(sq)) { ++ //pr_info("aaudio: Send confirmed\n"); ++ bce_notify_submission_complete(sq); ++ } ++} ++ ++static void aaudio_bce_in_queue_handle_msg(struct aaudio_device *a, struct aaudio_msg *msg); ++ ++static void aaudio_bce_in_queue_completion(struct bce_queue_sq *sq) ++{ ++ struct aaudio_msg msg; ++ struct aaudio_device *dev = sq->userdata; ++ struct aaudio_bce_queue *q = &dev->bcem.qin; ++ struct bce_sq_completion_data *c; ++ size_t cnt = 0; ++ ++ mb(); ++ while ((c = bce_next_completion(sq))) { ++ msg.data = (u8 *) q->data + q->data_head * q->el_size; ++ msg.size = c->data_size; ++#ifdef DEBUG ++ pr_debug("aaudio: Received command data %llx\n", c->data_size); ++ print_hex_dump(KERN_DEBUG, "aaudio:IN ", DUMP_PREFIX_NONE, 32, 1, msg.data, min(msg.size, 128UL), true); ++#endif ++ aaudio_bce_in_queue_handle_msg(dev, &msg); ++ ++ q->data_head = (q->data_head + 1) % q->el_count; ++ ++ bce_notify_submission_complete(sq); ++ ++cnt; ++ } ++ aaudio_bce_in_queue_submit_pending(q, cnt); ++} ++ ++static void aaudio_bce_in_queue_handle_msg(struct aaudio_device *a, struct aaudio_msg *msg) ++{ ++ struct aaudio_msg_header *header = (struct aaudio_msg_header *) msg->data; ++ if (msg->size < sizeof(struct aaudio_msg_header)) { ++ pr_err("aaudio: Msg size smaller than header (%lx)", msg->size); ++ return; ++ } ++ if (header->type == AAUDIO_MSG_TYPE_RESPONSE) { ++ aaudio_handle_reply(&a->bcem, msg); ++ } else if (header->type == AAUDIO_MSG_TYPE_COMMAND) { ++ aaudio_handle_command(a, msg); ++ } else if (header->type == AAUDIO_MSG_TYPE_NOTIFICATION) { ++ aaudio_handle_notification(a, msg); ++ } ++} ++ ++void aaudio_bce_in_queue_submit_pending(struct aaudio_bce_queue *q, size_t count) ++{ ++ struct bce_qe_submission *s; ++ while (count--) { ++ if (bce_reserve_submission(q->sq, NULL)) { ++ pr_err("aaudio: Failed to reserve an event queue submission\n"); ++ break; ++ } ++ s = bce_next_submission(q->sq); ++ bce_set_submission_single(s, q->dma_addr + (dma_addr_t) (q->data_tail * q->el_size), q->el_size); ++ q->data_tail = (q->data_tail + 1) % q->el_count; ++ } ++ bce_submit_to_device(q->sq); ++} ++ ++struct aaudio_msg aaudio_reply_alloc(void) ++{ ++ struct aaudio_msg ret; ++ ret.size = AAUDIO_BCE_QUEUE_ELEMENT_SIZE; ++ ret.data = kmalloc(ret.size, GFP_KERNEL); ++ return ret; ++} ++ ++void aaudio_reply_free(struct aaudio_msg *reply) ++{ ++ kfree(reply->data); ++} +diff --git a/drivers/staging/apple-bce/audio/protocol_bce.h b/drivers/staging/apple-bce/audio/protocol_bce.h +new file mode 100644 +index 000000000..14d26c05d +--- /dev/null ++++ b/drivers/staging/apple-bce/audio/protocol_bce.h +@@ -0,0 +1,72 @@ ++#ifndef AAUDIO_PROTOCOL_BCE_H ++#define AAUDIO_PROTOCOL_BCE_H ++ ++#include "protocol.h" ++#include "../queue.h" ++ ++#define AAUDIO_BCE_QUEUE_ELEMENT_SIZE 0x1000 ++#define AAUDIO_BCE_QUEUE_ELEMENT_COUNT 20 ++ ++#define AAUDIO_BCE_QUEUE_TAG_COUNT 1000 ++ ++struct aaudio_device; ++ ++struct aaudio_bce_queue_entry { ++ struct aaudio_msg *msg; ++ struct completion *cmpl; ++}; ++struct aaudio_bce_queue { ++ struct bce_queue_cq *cq; ++ struct bce_queue_sq *sq; ++ void *data; ++ dma_addr_t dma_addr; ++ size_t data_head, data_tail; ++ size_t el_size, el_count; ++}; ++struct aaudio_bce { ++ struct bce_queue_cq *cq; ++ struct aaudio_bce_queue qin; ++ struct aaudio_bce_queue qout; ++ int tag_num; ++ struct aaudio_bce_queue_entry *pending_entries[AAUDIO_BCE_QUEUE_TAG_COUNT]; ++ struct spinlock spinlock; ++}; ++ ++struct aaudio_send_ctx { ++ int status; ++ int tag_n; ++ unsigned long irq_flags; ++ struct aaudio_msg msg; ++ unsigned long timeout; ++}; ++ ++int aaudio_bce_init(struct aaudio_device *dev); ++int __aaudio_send_prepare(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, char *tag); ++void __aaudio_send(struct aaudio_bce *b, struct aaudio_send_ctx *ctx); ++int __aaudio_send_cmd_sync(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, struct aaudio_msg *reply); ++ ++#define aaudio_send_with_tag(a, ctx, tag, tout, fn, ...) ({ \ ++ (ctx)->timeout = msecs_to_jiffies(tout); \ ++ (ctx)->status = __aaudio_send_prepare(&(a)->bcem, (ctx), (tag)); \ ++ if (!(ctx)->status) { \ ++ fn(&(ctx)->msg, ##__VA_ARGS__); \ ++ __aaudio_send(&(a)->bcem, (ctx)); \ ++ } \ ++ (ctx)->status; \ ++}) ++#define aaudio_send(a, ctx, tout, fn, ...) aaudio_send_with_tag(a, ctx, NULL, tout, fn, ##__VA_ARGS__) ++ ++#define aaudio_send_cmd_sync(a, ctx, reply, tout, fn, ...) ({ \ ++ (ctx)->timeout = msecs_to_jiffies(tout); \ ++ (ctx)->status = __aaudio_send_prepare(&(a)->bcem, (ctx), NULL); \ ++ if (!(ctx)->status) { \ ++ fn(&(ctx)->msg, ##__VA_ARGS__); \ ++ (ctx)->status = __aaudio_send_cmd_sync(&(a)->bcem, (ctx), (reply)); \ ++ } \ ++ (ctx)->status; \ ++}) ++ ++struct aaudio_msg aaudio_reply_alloc(void); ++void aaudio_reply_free(struct aaudio_msg *reply); ++ ++#endif //AAUDIO_PROTOCOL_BCE_H +diff --git a/drivers/staging/apple-bce/mailbox.c b/drivers/staging/apple-bce/mailbox.c +new file mode 100644 +index 000000000..e24bd3521 +--- /dev/null ++++ b/drivers/staging/apple-bce/mailbox.c +@@ -0,0 +1,151 @@ ++#include "mailbox.h" ++#include ++#include "apple_bce.h" ++ ++#define REG_MBOX_OUT_BASE 0x820 ++#define REG_MBOX_REPLY_COUNTER 0x108 ++#define REG_MBOX_REPLY_BASE 0x810 ++#define REG_TIMESTAMP_BASE 0xC000 ++ ++#define BCE_MBOX_TIMEOUT_MS 200 ++ ++void bce_mailbox_init(struct bce_mailbox *mb, void __iomem *reg_mb) ++{ ++ mb->reg_mb = reg_mb; ++ init_completion(&mb->mb_completion); ++} ++ ++int bce_mailbox_send(struct bce_mailbox *mb, u64 msg, u64* recv) ++{ ++ u32 __iomem *regb; ++ ++ if (atomic_cmpxchg(&mb->mb_status, 0, 1) != 0) { ++ return -EEXIST; // We don't support two messages at once ++ } ++ reinit_completion(&mb->mb_completion); ++ ++ pr_debug("bce_mailbox_send: %llx\n", msg); ++ regb = (u32*) ((u8*) mb->reg_mb + REG_MBOX_OUT_BASE); ++ iowrite32((u32) msg, regb); ++ iowrite32((u32) (msg >> 32), regb + 1); ++ iowrite32(0, regb + 2); ++ iowrite32(0, regb + 3); ++ ++ wait_for_completion_timeout(&mb->mb_completion, msecs_to_jiffies(BCE_MBOX_TIMEOUT_MS)); ++ if (atomic_read(&mb->mb_status) != 2) { // Didn't get the reply ++ atomic_set(&mb->mb_status, 0); ++ return -ETIMEDOUT; ++ } ++ ++ *recv = mb->mb_result; ++ pr_debug("bce_mailbox_send: reply %llx\n", *recv); ++ ++ atomic_set(&mb->mb_status, 0); ++ return 0; ++} ++ ++static int bce_mailbox_retrive_response(struct bce_mailbox *mb) ++{ ++ u32 __iomem *regb; ++ u32 lo, hi; ++ int count, counter; ++ u32 res = ioread32((u8*) mb->reg_mb + REG_MBOX_REPLY_COUNTER); ++ count = (res >> 20) & 0xf; ++ counter = count; ++ pr_debug("bce_mailbox_retrive_response count=%i\n", count); ++ while (counter--) { ++ regb = (u32*) ((u8*) mb->reg_mb + REG_MBOX_REPLY_BASE); ++ lo = ioread32(regb); ++ hi = ioread32(regb + 1); ++ ioread32(regb + 2); ++ ioread32(regb + 3); ++ pr_debug("bce_mailbox_retrive_response %llx\n", ((u64) hi << 32) | lo); ++ mb->mb_result = ((u64) hi << 32) | lo; ++ } ++ return count > 0 ? 0 : -ENODATA; ++} ++ ++int bce_mailbox_handle_interrupt(struct bce_mailbox *mb) ++{ ++ int status = bce_mailbox_retrive_response(mb); ++ if (!status) { ++ atomic_set(&mb->mb_status, 2); ++ complete(&mb->mb_completion); ++ } ++ return status; ++} ++ ++static void bc_send_timestamp(struct timer_list *tl); ++ ++void bce_timestamp_init(struct bce_timestamp *ts, void __iomem *reg) ++{ ++ u32 __iomem *regb; ++ ++ spin_lock_init(&ts->stop_sl); ++ ts->stopped = false; ++ ++ ts->reg = reg; ++ ++ regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE); ++ ++ ioread32(regb); ++ mb(); ++ ++ timer_setup(&ts->timer, bc_send_timestamp, 0); ++} ++ ++void bce_timestamp_start(struct bce_timestamp *ts, bool is_initial) ++{ ++ unsigned long flags; ++ u32 __iomem *regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE); ++ ++ if (is_initial) { ++ iowrite32((u32) -4, regb + 2); ++ iowrite32((u32) -1, regb); ++ } else { ++ iowrite32((u32) -3, regb + 2); ++ iowrite32((u32) -1, regb); ++ } ++ ++ spin_lock_irqsave(&ts->stop_sl, flags); ++ ts->stopped = false; ++ spin_unlock_irqrestore(&ts->stop_sl, flags); ++ mod_timer(&ts->timer, jiffies + msecs_to_jiffies(150)); ++} ++ ++void bce_timestamp_stop(struct bce_timestamp *ts) ++{ ++ unsigned long flags; ++ u32 __iomem *regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE); ++ ++ spin_lock_irqsave(&ts->stop_sl, flags); ++ ts->stopped = true; ++ spin_unlock_irqrestore(&ts->stop_sl, flags); ++ del_timer_sync(&ts->timer); ++ ++ iowrite32((u32) -2, regb + 2); ++ iowrite32((u32) -1, regb); ++} ++ ++static void bc_send_timestamp(struct timer_list *tl) ++{ ++ struct bce_timestamp *ts; ++ unsigned long flags; ++ u32 __iomem *regb; ++ ktime_t bt; ++ ++ ts = container_of(tl, struct bce_timestamp, timer); ++ regb = (u32*) ((u8*) ts->reg + REG_TIMESTAMP_BASE); ++ local_irq_save(flags); ++ ioread32(regb + 2); ++ mb(); ++ bt = ktime_get_boottime(); ++ iowrite32((u32) bt, regb + 2); ++ iowrite32((u32) (bt >> 32), regb); ++ ++ spin_lock(&ts->stop_sl); ++ if (!ts->stopped) ++ mod_timer(&ts->timer, jiffies + msecs_to_jiffies(150)); ++ spin_unlock(&ts->stop_sl); ++ local_irq_restore(flags); ++} +\ No newline at end of file +diff --git a/drivers/staging/apple-bce/mailbox.h b/drivers/staging/apple-bce/mailbox.h +new file mode 100644 +index 000000000..f3323f95b +--- /dev/null ++++ b/drivers/staging/apple-bce/mailbox.h +@@ -0,0 +1,53 @@ ++#ifndef BCE_MAILBOX_H ++#define BCE_MAILBOX_H ++ ++#include ++#include ++#include ++ ++struct bce_mailbox { ++ void __iomem *reg_mb; ++ ++ atomic_t mb_status; // possible statuses: 0 (no msg), 1 (has active msg), 2 (got reply) ++ struct completion mb_completion; ++ uint64_t mb_result; ++}; ++ ++enum bce_message_type { ++ BCE_MB_REGISTER_COMMAND_SQ = 0x7, // to-device ++ BCE_MB_REGISTER_COMMAND_CQ = 0x8, // to-device ++ BCE_MB_REGISTER_COMMAND_QUEUE_REPLY = 0xB, // to-host ++ BCE_MB_SET_FW_PROTOCOL_VERSION = 0xC, // both ++ BCE_MB_SLEEP_NO_STATE = 0x14, // to-device ++ BCE_MB_RESTORE_NO_STATE = 0x15, // to-device ++ BCE_MB_SAVE_STATE_AND_SLEEP = 0x17, // to-device ++ BCE_MB_RESTORE_STATE_AND_WAKE = 0x18, // to-device ++ BCE_MB_SAVE_STATE_AND_SLEEP_FAILURE = 0x19, // from-device ++ BCE_MB_SAVE_RESTORE_STATE_COMPLETE = 0x1A, // from-device ++}; ++ ++#define BCE_MB_MSG(type, value) (((u64) (type) << 58) | ((value) & 0x3FFFFFFFFFFFFFFLL)) ++#define BCE_MB_TYPE(v) ((u32) (v >> 58)) ++#define BCE_MB_VALUE(v) (v & 0x3FFFFFFFFFFFFFFLL) ++ ++void bce_mailbox_init(struct bce_mailbox *mb, void __iomem *reg_mb); ++ ++int bce_mailbox_send(struct bce_mailbox *mb, u64 msg, u64* recv); ++ ++int bce_mailbox_handle_interrupt(struct bce_mailbox *mb); ++ ++ ++struct bce_timestamp { ++ void __iomem *reg; ++ struct timer_list timer; ++ struct spinlock stop_sl; ++ bool stopped; ++}; ++ ++void bce_timestamp_init(struct bce_timestamp *ts, void __iomem *reg); ++ ++void bce_timestamp_start(struct bce_timestamp *ts, bool is_initial); ++ ++void bce_timestamp_stop(struct bce_timestamp *ts); ++ ++#endif //BCEDRIVER_MAILBOX_H +diff --git a/drivers/staging/apple-bce/queue.c b/drivers/staging/apple-bce/queue.c +new file mode 100644 +index 000000000..bc9cd3bc6 +--- /dev/null ++++ b/drivers/staging/apple-bce/queue.c +@@ -0,0 +1,390 @@ ++#include "queue.h" ++#include "apple_bce.h" ++ ++#define REG_DOORBELL_BASE 0x44000 ++ ++struct bce_queue_cq *bce_alloc_cq(struct apple_bce_device *dev, int qid, u32 el_count) ++{ ++ struct bce_queue_cq *q; ++ q = kzalloc(sizeof(struct bce_queue_cq), GFP_KERNEL); ++ q->qid = qid; ++ q->type = BCE_QUEUE_CQ; ++ q->el_count = el_count; ++ q->data = dma_alloc_coherent(&dev->pci->dev, el_count * sizeof(struct bce_qe_completion), ++ &q->dma_handle, GFP_KERNEL); ++ if (!q->data) { ++ pr_err("DMA queue memory alloc failed\n"); ++ kfree(q); ++ return NULL; ++ } ++ return q; ++} ++ ++void bce_get_cq_memcfg(struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg) ++{ ++ cfg->qid = (u16) cq->qid; ++ cfg->el_count = (u16) cq->el_count; ++ cfg->vector_or_cq = 0; ++ cfg->_pad = 0; ++ cfg->addr = cq->dma_handle; ++ cfg->length = cq->el_count * sizeof(struct bce_qe_completion); ++} ++ ++void bce_free_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq) ++{ ++ dma_free_coherent(&dev->pci->dev, cq->el_count * sizeof(struct bce_qe_completion), cq->data, cq->dma_handle); ++ kfree(cq); ++} ++ ++static void bce_handle_cq_completion(struct apple_bce_device *dev, struct bce_qe_completion *e, size_t *ce) ++{ ++ struct bce_queue *target; ++ struct bce_queue_sq *target_sq; ++ struct bce_sq_completion_data *cmpl; ++ if (e->qid >= BCE_MAX_QUEUE_COUNT) { ++ pr_err("Device sent a response for qid (%u) >= BCE_MAX_QUEUE_COUNT\n", e->qid); ++ return; ++ } ++ target = dev->queues[e->qid]; ++ if (!target || target->type != BCE_QUEUE_SQ) { ++ pr_err("Device sent a response for qid (%u), which does not exist\n", e->qid); ++ return; ++ } ++ target_sq = (struct bce_queue_sq *) target; ++ if (target_sq->completion_tail != e->completion_index) { ++ pr_err("Completion index mismatch; this is likely going to make this driver unusable\n"); ++ return; ++ } ++ if (!target_sq->has_pending_completions) { ++ target_sq->has_pending_completions = true; ++ dev->int_sq_list[(*ce)++] = target_sq; ++ } ++ cmpl = &target_sq->completion_data[e->completion_index]; ++ cmpl->status = e->status; ++ cmpl->data_size = e->data_size; ++ cmpl->result = e->result; ++ wmb(); ++ target_sq->completion_tail = (target_sq->completion_tail + 1) % target_sq->el_count; ++} ++ ++void bce_handle_cq_completions(struct apple_bce_device *dev, struct bce_queue_cq *cq) ++{ ++ size_t ce = 0; ++ struct bce_qe_completion *e; ++ struct bce_queue_sq *sq; ++ e = bce_cq_element(cq, cq->index); ++ if (!(e->flags & BCE_COMPLETION_FLAG_PENDING)) ++ return; ++ mb(); ++ while (true) { ++ e = bce_cq_element(cq, cq->index); ++ if (!(e->flags & BCE_COMPLETION_FLAG_PENDING)) ++ break; ++ // pr_info("apple-bce: compl: %i: %i %llx %llx", e->qid, e->status, e->data_size, e->result); ++ bce_handle_cq_completion(dev, e, &ce); ++ e->flags = 0; ++ cq->index = (cq->index + 1) % cq->el_count; ++ } ++ mb(); ++ iowrite32(cq->index, (u32 *) ((u8 *) dev->reg_mem_dma + REG_DOORBELL_BASE) + cq->qid); ++ while (ce) { ++ --ce; ++ sq = dev->int_sq_list[ce]; ++ sq->completion(sq); ++ sq->has_pending_completions = false; ++ } ++} ++ ++ ++struct bce_queue_sq *bce_alloc_sq(struct apple_bce_device *dev, int qid, u32 el_size, u32 el_count, ++ bce_sq_completion compl, void *userdata) ++{ ++ struct bce_queue_sq *q; ++ q = kzalloc(sizeof(struct bce_queue_sq), GFP_KERNEL); ++ q->qid = qid; ++ q->type = BCE_QUEUE_SQ; ++ q->el_size = el_size; ++ q->el_count = el_count; ++ q->data = dma_alloc_coherent(&dev->pci->dev, el_count * el_size, ++ &q->dma_handle, GFP_KERNEL); ++ q->completion = compl; ++ q->userdata = userdata; ++ q->completion_data = kzalloc(sizeof(struct bce_sq_completion_data) * el_count, GFP_KERNEL); ++ q->reg_mem_dma = dev->reg_mem_dma; ++ atomic_set(&q->available_commands, el_count - 1); ++ init_completion(&q->available_command_completion); ++ atomic_set(&q->available_command_completion_waiting_count, 0); ++ if (!q->data) { ++ pr_err("DMA queue memory alloc failed\n"); ++ kfree(q); ++ return NULL; ++ } ++ return q; ++} ++ ++void bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg) ++{ ++ cfg->qid = (u16) sq->qid; ++ cfg->el_count = (u16) sq->el_count; ++ cfg->vector_or_cq = (u16) cq->qid; ++ cfg->_pad = 0; ++ cfg->addr = sq->dma_handle; ++ cfg->length = sq->el_count * sq->el_size; ++} ++ ++void bce_free_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq) ++{ ++ dma_free_coherent(&dev->pci->dev, sq->el_count * sq->el_size, sq->data, sq->dma_handle); ++ kfree(sq); ++} ++ ++int bce_reserve_submission(struct bce_queue_sq *sq, unsigned long *timeout) ++{ ++ while (atomic_dec_if_positive(&sq->available_commands) < 0) { ++ if (!timeout || !*timeout) ++ return -EAGAIN; ++ atomic_inc(&sq->available_command_completion_waiting_count); ++ *timeout = wait_for_completion_timeout(&sq->available_command_completion, *timeout); ++ if (!*timeout) { ++ if (atomic_dec_if_positive(&sq->available_command_completion_waiting_count) < 0) ++ try_wait_for_completion(&sq->available_command_completion); /* consume the pending completion */ ++ } ++ } ++ return 0; ++} ++ ++void bce_cancel_submission_reservation(struct bce_queue_sq *sq) ++{ ++ atomic_inc(&sq->available_commands); ++} ++ ++void *bce_next_submission(struct bce_queue_sq *sq) ++{ ++ void *ret = bce_sq_element(sq, sq->tail); ++ sq->tail = (sq->tail + 1) % sq->el_count; ++ return ret; ++} ++ ++void bce_submit_to_device(struct bce_queue_sq *sq) ++{ ++ mb(); ++ iowrite32(sq->tail, (u32 *) ((u8 *) sq->reg_mem_dma + REG_DOORBELL_BASE) + sq->qid); ++} ++ ++void bce_notify_submission_complete(struct bce_queue_sq *sq) ++{ ++ sq->head = (sq->head + 1) % sq->el_count; ++ atomic_inc(&sq->available_commands); ++ if (atomic_dec_if_positive(&sq->available_command_completion_waiting_count) >= 0) { ++ complete(&sq->available_command_completion); ++ } ++} ++ ++void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t addr, size_t size) ++{ ++ element->addr = addr; ++ element->length = size; ++ element->segl_addr = element->segl_length = 0; ++} ++ ++static void bce_cmdq_completion(struct bce_queue_sq *q); ++ ++struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 el_count) ++{ ++ struct bce_queue_cmdq *q; ++ q = kzalloc(sizeof(struct bce_queue_cmdq), GFP_KERNEL); ++ q->sq = bce_alloc_sq(dev, qid, BCE_CMD_SIZE, el_count, bce_cmdq_completion, q); ++ if (!q->sq) { ++ kfree(q); ++ return NULL; ++ } ++ spin_lock_init(&q->lck); ++ q->tres = kzalloc(sizeof(struct bce_queue_cmdq_result_el*) * el_count, GFP_KERNEL); ++ if (!q->tres) { ++ kfree(q); ++ return NULL; ++ } ++ return q; ++} ++ ++void bce_free_cmdq(struct apple_bce_device *dev, struct bce_queue_cmdq *cmdq) ++{ ++ bce_free_sq(dev, cmdq->sq); ++ kfree(cmdq->tres); ++ kfree(cmdq); ++} ++ ++void bce_cmdq_completion(struct bce_queue_sq *q) ++{ ++ struct bce_queue_cmdq_result_el *el; ++ struct bce_queue_cmdq *cmdq = q->userdata; ++ struct bce_sq_completion_data *result; ++ ++ spin_lock(&cmdq->lck); ++ while ((result = bce_next_completion(q))) { ++ el = cmdq->tres[cmdq->sq->head]; ++ if (el) { ++ el->result = result->result; ++ el->status = result->status; ++ mb(); ++ complete(&el->cmpl); ++ } else { ++ pr_err("apple-bce: Unexpected command queue completion\n"); ++ } ++ cmdq->tres[cmdq->sq->head] = NULL; ++ bce_notify_submission_complete(q); ++ } ++ spin_unlock(&cmdq->lck); ++} ++ ++static __always_inline void *bce_cmd_start(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res) ++{ ++ void *ret; ++ unsigned long timeout; ++ init_completion(&res->cmpl); ++ mb(); ++ ++ timeout = msecs_to_jiffies(1000L * 60 * 5); /* wait for up to ~5 minutes */ ++ if (bce_reserve_submission(cmdq->sq, &timeout)) ++ return NULL; ++ ++ spin_lock(&cmdq->lck); ++ cmdq->tres[cmdq->sq->tail] = res; ++ ret = bce_next_submission(cmdq->sq); ++ return ret; ++} ++ ++static __always_inline void bce_cmd_finish(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res) ++{ ++ bce_submit_to_device(cmdq->sq); ++ spin_unlock(&cmdq->lck); ++ ++ wait_for_completion(&res->cmpl); ++ mb(); ++} ++ ++u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout) ++{ ++ struct bce_queue_cmdq_result_el res; ++ struct bce_cmdq_register_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res); ++ if (!cmd) ++ return (u32) -1; ++ cmd->cmd = BCE_CMD_REGISTER_MEMORY_QUEUE; ++ cmd->flags = (u16) ((name ? 2 : 0) | (isdirout ? 1 : 0)); ++ cmd->qid = cfg->qid; ++ cmd->el_count = cfg->el_count; ++ cmd->vector_or_cq = cfg->vector_or_cq; ++ memset(cmd->name, 0, sizeof(cmd->name)); ++ if (name) { ++ cmd->name_len = (u16) min(strlen(name), (size_t) sizeof(cmd->name)); ++ memcpy(cmd->name, name, cmd->name_len); ++ } else { ++ cmd->name_len = 0; ++ } ++ cmd->addr = cfg->addr; ++ cmd->length = cfg->length; ++ ++ bce_cmd_finish(cmdq, &res); ++ return res.status; ++} ++ ++u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid) ++{ ++ struct bce_queue_cmdq_result_el res; ++ struct bce_cmdq_simple_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res); ++ if (!cmd) ++ return (u32) -1; ++ cmd->cmd = BCE_CMD_UNREGISTER_MEMORY_QUEUE; ++ cmd->flags = 0; ++ cmd->qid = qid; ++ bce_cmd_finish(cmdq, &res); ++ return res.status; ++} ++ ++u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid) ++{ ++ struct bce_queue_cmdq_result_el res; ++ struct bce_cmdq_simple_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res); ++ if (!cmd) ++ return (u32) -1; ++ cmd->cmd = BCE_CMD_FLUSH_MEMORY_QUEUE; ++ cmd->flags = 0; ++ cmd->qid = qid; ++ bce_cmd_finish(cmdq, &res); ++ return res.status; ++} ++ ++ ++struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count) ++{ ++ struct bce_queue_cq *cq; ++ struct bce_queue_memcfg cfg; ++ int qid = ida_simple_get(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX, GFP_KERNEL); ++ if (qid < 0) ++ return NULL; ++ cq = bce_alloc_cq(dev, qid, el_count); ++ if (!cq) ++ return NULL; ++ bce_get_cq_memcfg(cq, &cfg); ++ if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, NULL, false) != 0) { ++ pr_err("apple-bce: CQ registration failed (%i)", qid); ++ bce_free_cq(dev, cq); ++ ida_simple_remove(&dev->queue_ida, (uint) qid); ++ return NULL; ++ } ++ dev->queues[qid] = (struct bce_queue *) cq; ++ return cq; ++} ++ ++struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, u32 el_count, ++ int direction, bce_sq_completion compl, void *userdata) ++{ ++ struct bce_queue_sq *sq; ++ struct bce_queue_memcfg cfg; ++ int qid; ++ if (cq == NULL) ++ return NULL; /* cq can not be null */ ++ if (name == NULL) ++ return NULL; /* name can not be null */ ++ if (direction != DMA_TO_DEVICE && direction != DMA_FROM_DEVICE) ++ return NULL; /* unsupported direction */ ++ qid = ida_simple_get(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX, GFP_KERNEL); ++ if (qid < 0) ++ return NULL; ++ sq = bce_alloc_sq(dev, qid, sizeof(struct bce_qe_submission), el_count, compl, userdata); ++ if (!sq) ++ return NULL; ++ bce_get_sq_memcfg(sq, cq, &cfg); ++ if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, name, direction != DMA_FROM_DEVICE) != 0) { ++ pr_err("apple-bce: SQ registration failed (%i)", qid); ++ bce_free_sq(dev, sq); ++ ida_simple_remove(&dev->queue_ida, (uint) qid); ++ return NULL; ++ } ++ spin_lock(&dev->queues_lock); ++ dev->queues[qid] = (struct bce_queue *) sq; ++ spin_unlock(&dev->queues_lock); ++ return sq; ++} ++ ++void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq) ++{ ++ if (!dev->is_being_removed && bce_cmd_unregister_memory_queue(dev->cmd_cmdq, (u16) cq->qid)) ++ pr_err("apple-bce: CQ unregister failed"); ++ spin_lock(&dev->queues_lock); ++ dev->queues[cq->qid] = NULL; ++ spin_unlock(&dev->queues_lock); ++ ida_simple_remove(&dev->queue_ida, (uint) cq->qid); ++ bce_free_cq(dev, cq); ++} ++ ++void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq) ++{ ++ if (!dev->is_being_removed && bce_cmd_unregister_memory_queue(dev->cmd_cmdq, (u16) sq->qid)) ++ pr_err("apple-bce: CQ unregister failed"); ++ spin_lock(&dev->queues_lock); ++ dev->queues[sq->qid] = NULL; ++ spin_unlock(&dev->queues_lock); ++ ida_simple_remove(&dev->queue_ida, (uint) sq->qid); ++ bce_free_sq(dev, sq); ++} +\ No newline at end of file +diff --git a/drivers/staging/apple-bce/queue.h b/drivers/staging/apple-bce/queue.h +new file mode 100644 +index 000000000..8368ac5df +--- /dev/null ++++ b/drivers/staging/apple-bce/queue.h +@@ -0,0 +1,177 @@ ++#ifndef BCE_QUEUE_H ++#define BCE_QUEUE_H ++ ++#include ++#include ++ ++#define BCE_CMD_SIZE 0x40 ++ ++struct apple_bce_device; ++ ++enum bce_queue_type { ++ BCE_QUEUE_CQ, BCE_QUEUE_SQ ++}; ++struct bce_queue { ++ int qid; ++ int type; ++}; ++struct bce_queue_cq { ++ int qid; ++ int type; ++ u32 el_count; ++ dma_addr_t dma_handle; ++ void *data; ++ ++ u32 index; ++}; ++struct bce_queue_sq; ++typedef void (*bce_sq_completion)(struct bce_queue_sq *q); ++struct bce_sq_completion_data { ++ u32 status; ++ u64 data_size; ++ u64 result; ++}; ++struct bce_queue_sq { ++ int qid; ++ int type; ++ u32 el_size; ++ u32 el_count; ++ dma_addr_t dma_handle; ++ void *data; ++ void *userdata; ++ void __iomem *reg_mem_dma; ++ ++ atomic_t available_commands; ++ struct completion available_command_completion; ++ atomic_t available_command_completion_waiting_count; ++ u32 head, tail; ++ ++ u32 completion_cidx, completion_tail; ++ struct bce_sq_completion_data *completion_data; ++ bool has_pending_completions; ++ bce_sq_completion completion; ++}; ++ ++struct bce_queue_cmdq_result_el { ++ struct completion cmpl; ++ u32 status; ++ u64 result; ++}; ++struct bce_queue_cmdq { ++ struct bce_queue_sq *sq; ++ struct spinlock lck; ++ struct bce_queue_cmdq_result_el **tres; ++}; ++ ++struct bce_queue_memcfg { ++ u16 qid; ++ u16 el_count; ++ u16 vector_or_cq; ++ u16 _pad; ++ u64 addr; ++ u64 length; ++}; ++ ++enum bce_qe_completion_status { ++ BCE_COMPLETION_SUCCESS = 0, ++ BCE_COMPLETION_ERROR = 1, ++ BCE_COMPLETION_ABORTED = 2, ++ BCE_COMPLETION_NO_SPACE = 3, ++ BCE_COMPLETION_OVERRUN = 4 ++}; ++enum bce_qe_completion_flags { ++ BCE_COMPLETION_FLAG_PENDING = 0x8000 ++}; ++struct bce_qe_completion { ++ u64 result; ++ u64 data_size; ++ u16 qid; ++ u16 completion_index; ++ u16 status; // bce_qe_completion_status ++ u16 flags; // bce_qe_completion_flags ++}; ++ ++struct bce_qe_submission { ++ u64 length; ++ u64 addr; ++ ++ u64 segl_addr; ++ u64 segl_length; ++}; ++ ++enum bce_cmdq_command { ++ BCE_CMD_REGISTER_MEMORY_QUEUE = 0x20, ++ BCE_CMD_UNREGISTER_MEMORY_QUEUE = 0x30, ++ BCE_CMD_FLUSH_MEMORY_QUEUE = 0x40, ++ BCE_CMD_SET_MEMORY_QUEUE_PROPERTY = 0x50 ++}; ++struct bce_cmdq_simple_memory_queue_cmd { ++ u16 cmd; // bce_cmdq_command ++ u16 flags; ++ u16 qid; ++}; ++struct bce_cmdq_register_memory_queue_cmd { ++ u16 cmd; // bce_cmdq_command ++ u16 flags; ++ u16 qid; ++ u16 _pad; ++ u16 el_count; ++ u16 vector_or_cq; ++ u16 _pad2; ++ u16 name_len; ++ char name[0x20]; ++ u64 addr; ++ u64 length; ++}; ++ ++static __always_inline void *bce_sq_element(struct bce_queue_sq *q, int i) { ++ return (void *) ((u8 *) q->data + q->el_size * i); ++} ++static __always_inline void *bce_cq_element(struct bce_queue_cq *q, int i) { ++ return (void *) ((struct bce_qe_completion *) q->data + i); ++} ++ ++static __always_inline struct bce_sq_completion_data *bce_next_completion(struct bce_queue_sq *sq) { ++ struct bce_sq_completion_data *res; ++ rmb(); ++ if (sq->completion_cidx == sq->completion_tail) ++ return NULL; ++ res = &sq->completion_data[sq->completion_cidx]; ++ sq->completion_cidx = (sq->completion_cidx + 1) % sq->el_count; ++ return res; ++} ++ ++struct bce_queue_cq *bce_alloc_cq(struct apple_bce_device *dev, int qid, u32 el_count); ++void bce_get_cq_memcfg(struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg); ++void bce_free_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq); ++void bce_handle_cq_completions(struct apple_bce_device *dev, struct bce_queue_cq *cq); ++ ++struct bce_queue_sq *bce_alloc_sq(struct apple_bce_device *dev, int qid, u32 el_size, u32 el_count, ++ bce_sq_completion compl, void *userdata); ++void bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg); ++void bce_free_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq); ++int bce_reserve_submission(struct bce_queue_sq *sq, unsigned long *timeout); ++void bce_cancel_submission_reservation(struct bce_queue_sq *sq); ++void *bce_next_submission(struct bce_queue_sq *sq); ++void bce_submit_to_device(struct bce_queue_sq *sq); ++void bce_notify_submission_complete(struct bce_queue_sq *sq); ++ ++void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t addr, size_t size); ++ ++struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 el_count); ++void bce_free_cmdq(struct apple_bce_device *dev, struct bce_queue_cmdq *cmdq); ++ ++u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout); ++u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid); ++u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid); ++ ++ ++/* User API - Creates and registers the queue */ ++ ++struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count); ++struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, u32 el_count, ++ int direction, bce_sq_completion compl, void *userdata); ++void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq); ++void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq); ++ ++#endif //BCEDRIVER_MAILBOX_H +diff --git a/drivers/staging/apple-bce/queue_dma.c b/drivers/staging/apple-bce/queue_dma.c +new file mode 100644 +index 000000000..b23661328 +--- /dev/null ++++ b/drivers/staging/apple-bce/queue_dma.c +@@ -0,0 +1,220 @@ ++#include "queue_dma.h" ++#include ++#include ++#include "queue.h" ++ ++static int bce_alloc_scatterlist_from_vm(struct sg_table *tbl, void *data, size_t len); ++static struct bce_segment_list_element_hostinfo *bce_map_segment_list( ++ struct device *dev, struct scatterlist *pages, int pagen); ++static void bce_unmap_segement_list(struct device *dev, struct bce_segment_list_element_hostinfo *list); ++ ++int bce_map_dma_buffer(struct device *dev, struct bce_dma_buffer *buf, struct sg_table scatterlist, ++ enum dma_data_direction dir) ++{ ++ int cnt; ++ ++ buf->direction = dir; ++ buf->scatterlist = scatterlist; ++ buf->seglist_hostinfo = NULL; ++ ++ cnt = dma_map_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir); ++ if (cnt != buf->scatterlist.nents) { ++ pr_err("apple-bce: DMA scatter list mapping returned an unexpected count: %i\n", cnt); ++ dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir); ++ return -EIO; ++ } ++ if (cnt == 1) ++ return 0; ++ ++ buf->seglist_hostinfo = bce_map_segment_list(dev, buf->scatterlist.sgl, buf->scatterlist.nents); ++ if (!buf->seglist_hostinfo) { ++ pr_err("apple-bce: Creating segment list failed\n"); ++ dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, dir); ++ return -EIO; ++ } ++ return 0; ++} ++ ++int bce_map_dma_buffer_vm(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len, ++ enum dma_data_direction dir) ++{ ++ int status; ++ struct sg_table scatterlist; ++ if ((status = bce_alloc_scatterlist_from_vm(&scatterlist, data, len))) ++ return status; ++ if ((status = bce_map_dma_buffer(dev, buf, scatterlist, dir))) { ++ sg_free_table(&scatterlist); ++ return status; ++ } ++ return 0; ++} ++ ++int bce_map_dma_buffer_km(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len, ++ enum dma_data_direction dir) ++{ ++ /* Kernel memory is continuous which is great for us. */ ++ int status; ++ struct sg_table scatterlist; ++ if ((status = sg_alloc_table(&scatterlist, 1, GFP_KERNEL))) { ++ sg_free_table(&scatterlist); ++ return status; ++ } ++ sg_set_buf(scatterlist.sgl, data, (uint) len); ++ if ((status = bce_map_dma_buffer(dev, buf, scatterlist, dir))) { ++ sg_free_table(&scatterlist); ++ return status; ++ } ++ return 0; ++} ++ ++void bce_unmap_dma_buffer(struct device *dev, struct bce_dma_buffer *buf) ++{ ++ dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, buf->direction); ++ bce_unmap_segement_list(dev, buf->seglist_hostinfo); ++} ++ ++ ++static int bce_alloc_scatterlist_from_vm(struct sg_table *tbl, void *data, size_t len) ++{ ++ int status, i; ++ struct page **pages; ++ size_t off, start_page, end_page, page_count; ++ off = (size_t) data % PAGE_SIZE; ++ start_page = (size_t) data / PAGE_SIZE; ++ end_page = ((size_t) data + len - 1) / PAGE_SIZE; ++ page_count = end_page - start_page + 1; ++ ++ if (page_count > PAGE_SIZE / sizeof(struct page *)) ++ pages = vmalloc(page_count * sizeof(struct page *)); ++ else ++ pages = kmalloc(page_count * sizeof(struct page *), GFP_KERNEL); ++ ++ for (i = 0; i < page_count; i++) ++ pages[i] = vmalloc_to_page((void *) ((start_page + i) * PAGE_SIZE)); ++ ++ if ((status = sg_alloc_table_from_pages(tbl, pages, page_count, (unsigned int) off, len, GFP_KERNEL))) { ++ sg_free_table(tbl); ++ } ++ ++ if (page_count > PAGE_SIZE / sizeof(struct page *)) ++ vfree(pages); ++ else ++ kfree(pages); ++ return status; ++} ++ ++#define BCE_ELEMENTS_PER_PAGE ((PAGE_SIZE - sizeof(struct bce_segment_list_header)) \ ++ / sizeof(struct bce_segment_list_element)) ++#define BCE_ELEMENTS_PER_ADDITIONAL_PAGE (PAGE_SIZE / sizeof(struct bce_segment_list_element)) ++ ++static struct bce_segment_list_element_hostinfo *bce_map_segment_list( ++ struct device *dev, struct scatterlist *pages, int pagen) ++{ ++ size_t ptr, pptr = 0; ++ struct bce_segment_list_header theader; /* a temp header, to store the initial seg */ ++ struct bce_segment_list_header *header; ++ struct bce_segment_list_element *el, *el_end; ++ struct bce_segment_list_element_hostinfo *out, *pout, *out_root; ++ struct scatterlist *sg; ++ int i; ++ header = &theader; ++ out = out_root = NULL; ++ el = el_end = NULL; ++ for_each_sg(pages, sg, pagen, i) { ++ if (el >= el_end) { ++ /* allocate a new page, this will be also done for the first element */ ++ ptr = __get_free_page(GFP_KERNEL); ++ if (pptr && ptr == pptr + PAGE_SIZE) { ++ out->page_count++; ++ header->element_count += BCE_ELEMENTS_PER_ADDITIONAL_PAGE; ++ el_end += BCE_ELEMENTS_PER_ADDITIONAL_PAGE; ++ } else { ++ header = (void *) ptr; ++ header->element_count = BCE_ELEMENTS_PER_PAGE; ++ header->data_size = 0; ++ header->next_segl_addr = 0; ++ header->next_segl_length = 0; ++ el = (void *) (header + 1); ++ el_end = el + BCE_ELEMENTS_PER_PAGE; ++ ++ if (out) { ++ out->next = kmalloc(sizeof(struct bce_segment_list_element_hostinfo), GFP_KERNEL); ++ out = out->next; ++ } else { ++ out_root = out = kmalloc(sizeof(struct bce_segment_list_element_hostinfo), GFP_KERNEL); ++ } ++ out->page_start = (void *) ptr; ++ out->page_count = 1; ++ out->dma_start = DMA_MAPPING_ERROR; ++ out->next = NULL; ++ } ++ pptr = ptr; ++ } ++ el->addr = sg->dma_address; ++ el->length = sg->length; ++ header->data_size += el->length; ++ } ++ ++ /* DMA map */ ++ out = out_root; ++ pout = NULL; ++ while (out) { ++ out->dma_start = dma_map_single(dev, out->page_start, out->page_count * PAGE_SIZE, DMA_TO_DEVICE); ++ if (dma_mapping_error(dev, out->dma_start)) ++ goto error; ++ if (pout) { ++ header = pout->page_start; ++ header->next_segl_addr = out->dma_start; ++ header->next_segl_length = out->page_count * PAGE_SIZE; ++ } ++ pout = out; ++ out = out->next; ++ } ++ return out_root; ++ ++ error: ++ bce_unmap_segement_list(dev, out_root); ++ return NULL; ++} ++ ++static void bce_unmap_segement_list(struct device *dev, struct bce_segment_list_element_hostinfo *list) ++{ ++ struct bce_segment_list_element_hostinfo *next; ++ while (list) { ++ if (list->dma_start != DMA_MAPPING_ERROR) ++ dma_unmap_single(dev, list->dma_start, list->page_count * PAGE_SIZE, DMA_TO_DEVICE); ++ next = list->next; ++ kfree(list); ++ list = next; ++ } ++} ++ ++int bce_set_submission_buf(struct bce_qe_submission *element, struct bce_dma_buffer *buf, size_t offset, size_t length) ++{ ++ struct bce_segment_list_element_hostinfo *seg; ++ struct bce_segment_list_header *seg_header; ++ ++ seg = buf->seglist_hostinfo; ++ if (!seg) { ++ element->addr = buf->scatterlist.sgl->dma_address + offset; ++ element->length = length; ++ element->segl_addr = 0; ++ element->segl_length = 0; ++ return 0; ++ } ++ ++ while (seg) { ++ seg_header = seg->page_start; ++ if (offset <= seg_header->data_size) ++ break; ++ offset -= seg_header->data_size; ++ seg = seg->next; ++ } ++ if (!seg) ++ return -EINVAL; ++ element->addr = offset; ++ element->length = buf->scatterlist.sgl->dma_length; ++ element->segl_addr = seg->dma_start; ++ element->segl_length = seg->page_count * PAGE_SIZE; ++ return 0; ++} +\ No newline at end of file +diff --git a/drivers/staging/apple-bce/queue_dma.h b/drivers/staging/apple-bce/queue_dma.h +new file mode 100644 +index 000000000..f8a57e50e +--- /dev/null ++++ b/drivers/staging/apple-bce/queue_dma.h +@@ -0,0 +1,50 @@ ++#ifndef BCE_QUEUE_DMA_H ++#define BCE_QUEUE_DMA_H ++ ++#include ++ ++struct bce_qe_submission; ++ ++struct bce_segment_list_header { ++ u64 element_count; ++ u64 data_size; ++ ++ u64 next_segl_addr; ++ u64 next_segl_length; ++}; ++struct bce_segment_list_element { ++ u64 addr; ++ u64 length; ++}; ++ ++struct bce_segment_list_element_hostinfo { ++ struct bce_segment_list_element_hostinfo *next; ++ void *page_start; ++ size_t page_count; ++ dma_addr_t dma_start; ++}; ++ ++ ++struct bce_dma_buffer { ++ enum dma_data_direction direction; ++ struct sg_table scatterlist; ++ struct bce_segment_list_element_hostinfo *seglist_hostinfo; ++}; ++ ++/* NOTE: Takes ownership of the sg_table if it succeeds. Ownership is not transferred on failure. */ ++int bce_map_dma_buffer(struct device *dev, struct bce_dma_buffer *buf, struct sg_table scatterlist, ++ enum dma_data_direction dir); ++ ++/* Creates a buffer from virtual memory (vmalloc) */ ++int bce_map_dma_buffer_vm(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len, ++ enum dma_data_direction dir); ++ ++/* Creates a buffer from kernel memory (kmalloc) */ ++int bce_map_dma_buffer_km(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len, ++ enum dma_data_direction dir); ++ ++void bce_unmap_dma_buffer(struct device *dev, struct bce_dma_buffer *buf); ++ ++int bce_set_submission_buf(struct bce_qe_submission *element, struct bce_dma_buffer *buf, size_t offset, size_t length); ++ ++#endif //BCE_QUEUE_DMA_H +diff --git a/drivers/staging/apple-bce/vhci/command.h b/drivers/staging/apple-bce/vhci/command.h +new file mode 100644 +index 000000000..26619e0bc +--- /dev/null ++++ b/drivers/staging/apple-bce/vhci/command.h +@@ -0,0 +1,204 @@ ++#ifndef BCE_VHCI_COMMAND_H ++#define BCE_VHCI_COMMAND_H ++ ++#include "queue.h" ++#include ++#include ++ ++#define BCE_VHCI_CMD_TIMEOUT_SHORT msecs_to_jiffies(2000) ++#define BCE_VHCI_CMD_TIMEOUT_LONG msecs_to_jiffies(30000) ++ ++#define BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2 2 ++#define BCE_VHCI_BULK_MAX_ACTIVE_URBS (1 << BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2) ++ ++typedef u8 bce_vhci_port_t; ++typedef u8 bce_vhci_device_t; ++ ++enum bce_vhci_command { ++ BCE_VHCI_CMD_CONTROLLER_ENABLE = 1, ++ BCE_VHCI_CMD_CONTROLLER_DISABLE = 2, ++ BCE_VHCI_CMD_CONTROLLER_START = 3, ++ BCE_VHCI_CMD_CONTROLLER_PAUSE = 4, ++ ++ BCE_VHCI_CMD_PORT_POWER_ON = 0x10, ++ BCE_VHCI_CMD_PORT_POWER_OFF = 0x11, ++ BCE_VHCI_CMD_PORT_RESUME = 0x12, ++ BCE_VHCI_CMD_PORT_SUSPEND = 0x13, ++ BCE_VHCI_CMD_PORT_RESET = 0x14, ++ BCE_VHCI_CMD_PORT_DISABLE = 0x15, ++ BCE_VHCI_CMD_PORT_STATUS = 0x16, ++ ++ BCE_VHCI_CMD_DEVICE_CREATE = 0x30, ++ BCE_VHCI_CMD_DEVICE_DESTROY = 0x31, ++ ++ BCE_VHCI_CMD_ENDPOINT_CREATE = 0x40, ++ BCE_VHCI_CMD_ENDPOINT_DESTROY = 0x41, ++ BCE_VHCI_CMD_ENDPOINT_SET_STATE = 0x42, ++ BCE_VHCI_CMD_ENDPOINT_RESET = 0x44, ++ ++ /* Device to host only */ ++ BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE = 0x43, ++ BCE_VHCI_CMD_TRANSFER_REQUEST = 0x1000, ++ BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS = 0x1005 ++}; ++ ++enum bce_vhci_endpoint_state { ++ BCE_VHCI_ENDPOINT_ACTIVE = 0, ++ BCE_VHCI_ENDPOINT_PAUSED = 1, ++ BCE_VHCI_ENDPOINT_STALLED = 2 ++}; ++ ++static inline int bce_vhci_cmd_controller_enable(struct bce_vhci_command_queue *q, u8 busNum, u16 *portMask) ++{ ++ int status; ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_CONTROLLER_ENABLE; ++ cmd.param1 = 0x7100u | busNum; ++ status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); ++ if (!status) ++ *portMask = (u16) res.param2; ++ return status; ++} ++static inline int bce_vhci_cmd_controller_disable(struct bce_vhci_command_queue *q) ++{ ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_CONTROLLER_DISABLE; ++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); ++} ++static inline int bce_vhci_cmd_controller_start(struct bce_vhci_command_queue *q) ++{ ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_CONTROLLER_START; ++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); ++} ++static inline int bce_vhci_cmd_controller_pause(struct bce_vhci_command_queue *q) ++{ ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_CONTROLLER_PAUSE; ++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); ++} ++ ++static inline int bce_vhci_cmd_port_power_on(struct bce_vhci_command_queue *q, bce_vhci_port_t port) ++{ ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_PORT_POWER_ON; ++ cmd.param1 = port; ++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); ++} ++static inline int bce_vhci_cmd_port_power_off(struct bce_vhci_command_queue *q, bce_vhci_port_t port) ++{ ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_PORT_POWER_OFF; ++ cmd.param1 = port; ++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); ++} ++static inline int bce_vhci_cmd_port_resume(struct bce_vhci_command_queue *q, bce_vhci_port_t port) ++{ ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_PORT_RESUME; ++ cmd.param1 = port; ++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); ++} ++static inline int bce_vhci_cmd_port_suspend(struct bce_vhci_command_queue *q, bce_vhci_port_t port) ++{ ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_PORT_SUSPEND; ++ cmd.param1 = port; ++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); ++} ++static inline int bce_vhci_cmd_port_reset(struct bce_vhci_command_queue *q, bce_vhci_port_t port, u32 timeout) ++{ ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_PORT_RESET; ++ cmd.param1 = port; ++ cmd.param2 = timeout; ++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); ++} ++static inline int bce_vhci_cmd_port_disable(struct bce_vhci_command_queue *q, bce_vhci_port_t port) ++{ ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_PORT_DISABLE; ++ cmd.param1 = port; ++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); ++} ++static inline int bce_vhci_cmd_port_status(struct bce_vhci_command_queue *q, bce_vhci_port_t port, ++ u32 clearFlags, u32 *resStatus) ++{ ++ int status; ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_PORT_STATUS; ++ cmd.param1 = port; ++ cmd.param2 = clearFlags & 0x560000; ++ status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); ++ if (status >= 0) ++ *resStatus = (u32) res.param2; ++ return status; ++} ++ ++static inline int bce_vhci_cmd_device_create(struct bce_vhci_command_queue *q, bce_vhci_port_t port, ++ bce_vhci_device_t *dev) ++{ ++ int status; ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_DEVICE_CREATE; ++ cmd.param1 = port; ++ status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); ++ if (!status) ++ *dev = (bce_vhci_device_t) res.param2; ++ return status; ++} ++static inline int bce_vhci_cmd_device_destroy(struct bce_vhci_command_queue *q, bce_vhci_device_t dev) ++{ ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_DEVICE_DESTROY; ++ cmd.param1 = dev; ++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); ++} ++ ++static inline int bce_vhci_cmd_endpoint_create(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, ++ struct usb_endpoint_descriptor *desc) ++{ ++ struct bce_vhci_message cmd, res; ++ int endpoint_type = usb_endpoint_type(desc); ++ int maxp = usb_endpoint_maxp(desc); ++ int maxp_burst = usb_endpoint_maxp_mult(desc) * maxp; ++ u8 max_active_requests_pow2 = 0; ++ cmd.cmd = BCE_VHCI_CMD_ENDPOINT_CREATE; ++ cmd.param1 = dev | ((desc->bEndpointAddress & 0x8Fu) << 8); ++ if (endpoint_type == USB_ENDPOINT_XFER_BULK) ++ max_active_requests_pow2 = BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2; ++ cmd.param2 = endpoint_type | ((max_active_requests_pow2 & 0xf) << 4) | (maxp << 16) | ((u64) maxp_burst << 32); ++ if (endpoint_type == USB_ENDPOINT_XFER_INT) ++ cmd.param2 |= (desc->bInterval - 1) << 8; ++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); ++} ++static inline int bce_vhci_cmd_endpoint_destroy(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint) ++{ ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_ENDPOINT_DESTROY; ++ cmd.param1 = dev | (endpoint << 8); ++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); ++} ++static inline int bce_vhci_cmd_endpoint_set_state(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint, ++ enum bce_vhci_endpoint_state newState, enum bce_vhci_endpoint_state *retState) ++{ ++ int status; ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_ENDPOINT_SET_STATE; ++ cmd.param1 = dev | (endpoint << 8); ++ cmd.param2 = (u64) newState; ++ status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); ++ if (status != BCE_VHCI_INTERNAL_ERROR && status != BCE_VHCI_NO_POWER) ++ *retState = (enum bce_vhci_endpoint_state) res.param2; ++ return status; ++} ++static inline int bce_vhci_cmd_endpoint_reset(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, u8 endpoint) ++{ ++ struct bce_vhci_message cmd, res; ++ cmd.cmd = BCE_VHCI_CMD_ENDPOINT_RESET; ++ cmd.param1 = dev | (endpoint << 8); ++ return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); ++} ++ ++ ++#endif //BCE_VHCI_COMMAND_H +diff --git a/drivers/staging/apple-bce/vhci/queue.c b/drivers/staging/apple-bce/vhci/queue.c +new file mode 100644 +index 000000000..7b0b50271 +--- /dev/null ++++ b/drivers/staging/apple-bce/vhci/queue.c +@@ -0,0 +1,268 @@ ++#include "queue.h" ++#include "vhci.h" ++#include "../apple_bce.h" ++ ++ ++static void bce_vhci_message_queue_completion(struct bce_queue_sq *sq); ++ ++int bce_vhci_message_queue_create(struct bce_vhci *vhci, struct bce_vhci_message_queue *ret, const char *name) ++{ ++ int status; ++ ret->cq = bce_create_cq(vhci->dev, VHCI_EVENT_QUEUE_EL_COUNT); ++ if (!ret->cq) ++ return -EINVAL; ++ ret->sq = bce_create_sq(vhci->dev, ret->cq, name, VHCI_EVENT_QUEUE_EL_COUNT, DMA_TO_DEVICE, ++ bce_vhci_message_queue_completion, ret); ++ if (!ret->sq) { ++ status = -EINVAL; ++ goto fail_cq; ++ } ++ ret->data = dma_alloc_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT, ++ &ret->dma_addr, GFP_KERNEL); ++ if (!ret->data) { ++ status = -EINVAL; ++ goto fail_sq; ++ } ++ return 0; ++ ++fail_sq: ++ bce_destroy_sq(vhci->dev, ret->sq); ++ ret->sq = NULL; ++fail_cq: ++ bce_destroy_cq(vhci->dev, ret->cq); ++ ret->cq = NULL; ++ return status; ++} ++ ++void bce_vhci_message_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_message_queue *q) ++{ ++ if (!q->cq) ++ return; ++ dma_free_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT, ++ q->data, q->dma_addr); ++ bce_destroy_sq(vhci->dev, q->sq); ++ bce_destroy_cq(vhci->dev, q->cq); ++} ++ ++void bce_vhci_message_queue_write(struct bce_vhci_message_queue *q, struct bce_vhci_message *req) ++{ ++ int sidx; ++ struct bce_qe_submission *s; ++ sidx = q->sq->tail; ++ s = bce_next_submission(q->sq); ++ pr_debug("bce-vhci: Send message: %x s=%x p1=%x p2=%llx\n", req->cmd, req->status, req->param1, req->param2); ++ q->data[sidx] = *req; ++ bce_set_submission_single(s, q->dma_addr + sizeof(struct bce_vhci_message) * sidx, ++ sizeof(struct bce_vhci_message)); ++ bce_submit_to_device(q->sq); ++} ++ ++static void bce_vhci_message_queue_completion(struct bce_queue_sq *sq) ++{ ++ while (bce_next_completion(sq)) ++ bce_notify_submission_complete(sq); ++} ++ ++ ++ ++static void bce_vhci_event_queue_completion(struct bce_queue_sq *sq); ++ ++int __bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name, ++ bce_sq_completion compl) ++{ ++ ret->vhci = vhci; ++ ++ ret->sq = bce_create_sq(vhci->dev, vhci->ev_cq, name, VHCI_EVENT_QUEUE_EL_COUNT, DMA_FROM_DEVICE, compl, ret); ++ if (!ret->sq) ++ return -EINVAL; ++ ret->data = dma_alloc_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT, ++ &ret->dma_addr, GFP_KERNEL); ++ if (!ret->data) { ++ bce_destroy_sq(vhci->dev, ret->sq); ++ ret->sq = NULL; ++ return -EINVAL; ++ } ++ ++ init_completion(&ret->queue_empty_completion); ++ bce_vhci_event_queue_submit_pending(ret, VHCI_EVENT_PENDING_COUNT); ++ return 0; ++} ++ ++int bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name, ++ bce_vhci_event_queue_callback cb) ++{ ++ ret->cb = cb; ++ return __bce_vhci_event_queue_create(vhci, ret, name, bce_vhci_event_queue_completion); ++} ++ ++void bce_vhci_event_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_event_queue *q) ++{ ++ if (!q->sq) ++ return; ++ dma_free_coherent(&vhci->dev->pci->dev, sizeof(struct bce_vhci_message) * VHCI_EVENT_QUEUE_EL_COUNT, ++ q->data, q->dma_addr); ++ bce_destroy_sq(vhci->dev, q->sq); ++} ++ ++static void bce_vhci_event_queue_completion(struct bce_queue_sq *sq) ++{ ++ struct bce_sq_completion_data *cd; ++ struct bce_vhci_event_queue *ev = sq->userdata; ++ struct bce_vhci_message *msg; ++ size_t cnt = 0; ++ ++ while ((cd = bce_next_completion(sq))) { ++ if (cd->status == BCE_COMPLETION_ABORTED) { /* We flushed the queue */ ++ bce_notify_submission_complete(sq); ++ continue; ++ } ++ msg = &ev->data[sq->head]; ++ pr_debug("bce-vhci: Got event: %x s=%x p1=%x p2=%llx\n", msg->cmd, msg->status, msg->param1, msg->param2); ++ ev->cb(ev, msg); ++ ++ bce_notify_submission_complete(sq); ++ ++cnt; ++ } ++ bce_vhci_event_queue_submit_pending(ev, cnt); ++ if (atomic_read(&sq->available_commands) == sq->el_count - 1) ++ complete(&ev->queue_empty_completion); ++} ++ ++void bce_vhci_event_queue_submit_pending(struct bce_vhci_event_queue *q, size_t count) ++{ ++ int idx; ++ struct bce_qe_submission *s; ++ while (count--) { ++ if (bce_reserve_submission(q->sq, NULL)) { ++ pr_err("bce-vhci: Failed to reserve an event queue submission\n"); ++ break; ++ } ++ idx = q->sq->tail; ++ s = bce_next_submission(q->sq); ++ bce_set_submission_single(s, ++ q->dma_addr + idx * sizeof(struct bce_vhci_message), sizeof(struct bce_vhci_message)); ++ } ++ bce_submit_to_device(q->sq); ++} ++ ++void bce_vhci_event_queue_pause(struct bce_vhci_event_queue *q) ++{ ++ unsigned long timeout; ++ reinit_completion(&q->queue_empty_completion); ++ if (bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, q->sq->qid)) ++ pr_warn("bce-vhci: failed to flush event queue\n"); ++ timeout = msecs_to_jiffies(5000); ++ while (atomic_read(&q->sq->available_commands) != q->sq->el_count - 1) { ++ timeout = wait_for_completion_timeout(&q->queue_empty_completion, timeout); ++ if (timeout == 0) { ++ pr_err("bce-vhci: waiting for queue to be flushed timed out\n"); ++ break; ++ } ++ } ++} ++ ++void bce_vhci_event_queue_resume(struct bce_vhci_event_queue *q) ++{ ++ if (atomic_read(&q->sq->available_commands) != q->sq->el_count - 1) { ++ pr_err("bce-vhci: resume of a queue with pending submissions\n"); ++ return; ++ } ++ bce_vhci_event_queue_submit_pending(q, VHCI_EVENT_PENDING_COUNT); ++} ++ ++void bce_vhci_command_queue_create(struct bce_vhci_command_queue *ret, struct bce_vhci_message_queue *mq) ++{ ++ ret->mq = mq; ++ ret->completion.result = NULL; ++ init_completion(&ret->completion.completion); ++ spin_lock_init(&ret->completion_lock); ++ mutex_init(&ret->mutex); ++} ++ ++void bce_vhci_command_queue_destroy(struct bce_vhci_command_queue *cq) ++{ ++ spin_lock(&cq->completion_lock); ++ if (cq->completion.result) { ++ memset(cq->completion.result, 0, sizeof(struct bce_vhci_message)); ++ cq->completion.result->status = BCE_VHCI_ABORT; ++ complete(&cq->completion.completion); ++ cq->completion.result = NULL; ++ } ++ spin_unlock(&cq->completion_lock); ++ mutex_lock(&cq->mutex); ++ mutex_unlock(&cq->mutex); ++ mutex_destroy(&cq->mutex); ++} ++ ++void bce_vhci_command_queue_deliver_completion(struct bce_vhci_command_queue *cq, struct bce_vhci_message *msg) ++{ ++ struct bce_vhci_command_queue_completion *c = &cq->completion; ++ ++ spin_lock(&cq->completion_lock); ++ if (c->result) { ++ *c->result = *msg; ++ complete(&c->completion); ++ c->result = NULL; ++ } ++ spin_unlock(&cq->completion_lock); ++} ++ ++static int __bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req, ++ struct bce_vhci_message *res, unsigned long timeout) ++{ ++ int status; ++ struct bce_vhci_command_queue_completion *c; ++ struct bce_vhci_message creq; ++ c = &cq->completion; ++ ++ if ((status = bce_reserve_submission(cq->mq->sq, &timeout))) ++ return status; ++ ++ spin_lock(&cq->completion_lock); ++ c->result = res; ++ reinit_completion(&c->completion); ++ spin_unlock(&cq->completion_lock); ++ ++ bce_vhci_message_queue_write(cq->mq, req); ++ ++ if (!wait_for_completion_timeout(&c->completion, timeout)) { ++ /* we ran out of time, send cancellation */ ++ pr_debug("bce-vhci: command timed out req=%x\n", req->cmd); ++ if ((status = bce_reserve_submission(cq->mq->sq, &timeout))) ++ return status; ++ ++ creq = *req; ++ creq.cmd |= 0x4000; ++ bce_vhci_message_queue_write(cq->mq, &creq); ++ ++ if (!wait_for_completion_timeout(&c->completion, 1000)) { ++ pr_err("bce-vhci: Possible desync, cmd cancel timed out\n"); ++ ++ spin_lock(&cq->completion_lock); ++ c->result = NULL; ++ spin_unlock(&cq->completion_lock); ++ return -ETIMEDOUT; ++ } ++ if ((res->cmd & ~0x8000) == creq.cmd) ++ return -ETIMEDOUT; ++ /* reply for the previous command most likely arrived */ ++ } ++ ++ if ((res->cmd & ~0x8000) != req->cmd) { ++ pr_err("bce-vhci: Possible desync, cmd reply mismatch req=%x, res=%x\n", req->cmd, res->cmd); ++ return -EIO; ++ } ++ if (res->status == BCE_VHCI_SUCCESS) ++ return 0; ++ return res->status; ++} ++ ++int bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req, ++ struct bce_vhci_message *res, unsigned long timeout) ++{ ++ int status; ++ mutex_lock(&cq->mutex); ++ status = __bce_vhci_command_queue_execute(cq, req, res, timeout); ++ mutex_unlock(&cq->mutex); ++ return status; ++} +diff --git a/drivers/staging/apple-bce/vhci/queue.h b/drivers/staging/apple-bce/vhci/queue.h +new file mode 100644 +index 000000000..adb705b6b +--- /dev/null ++++ b/drivers/staging/apple-bce/vhci/queue.h +@@ -0,0 +1,76 @@ ++#ifndef BCE_VHCI_QUEUE_H ++#define BCE_VHCI_QUEUE_H ++ ++#include ++#include "../queue.h" ++ ++#define VHCI_EVENT_QUEUE_EL_COUNT 256 ++#define VHCI_EVENT_PENDING_COUNT 32 ++ ++struct bce_vhci; ++struct bce_vhci_event_queue; ++ ++enum bce_vhci_message_status { ++ BCE_VHCI_SUCCESS = 1, ++ BCE_VHCI_ERROR = 2, ++ BCE_VHCI_USB_PIPE_STALL = 3, ++ BCE_VHCI_ABORT = 4, ++ BCE_VHCI_BAD_ARGUMENT = 5, ++ BCE_VHCI_OVERRUN = 6, ++ BCE_VHCI_INTERNAL_ERROR = 7, ++ BCE_VHCI_NO_POWER = 8, ++ BCE_VHCI_UNSUPPORTED = 9 ++}; ++struct bce_vhci_message { ++ u16 cmd; ++ u16 status; // bce_vhci_message_status ++ u32 param1; ++ u64 param2; ++}; ++ ++struct bce_vhci_message_queue { ++ struct bce_queue_cq *cq; ++ struct bce_queue_sq *sq; ++ struct bce_vhci_message *data; ++ dma_addr_t dma_addr; ++}; ++typedef void (*bce_vhci_event_queue_callback)(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg); ++struct bce_vhci_event_queue { ++ struct bce_vhci *vhci; ++ struct bce_queue_sq *sq; ++ struct bce_vhci_message *data; ++ dma_addr_t dma_addr; ++ bce_vhci_event_queue_callback cb; ++ struct completion queue_empty_completion; ++}; ++struct bce_vhci_command_queue_completion { ++ struct bce_vhci_message *result; ++ struct completion completion; ++}; ++struct bce_vhci_command_queue { ++ struct bce_vhci_message_queue *mq; ++ struct bce_vhci_command_queue_completion completion; ++ struct spinlock completion_lock; ++ struct mutex mutex; ++}; ++ ++int bce_vhci_message_queue_create(struct bce_vhci *vhci, struct bce_vhci_message_queue *ret, const char *name); ++void bce_vhci_message_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_message_queue *q); ++void bce_vhci_message_queue_write(struct bce_vhci_message_queue *q, struct bce_vhci_message *req); ++ ++int __bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name, ++ bce_sq_completion compl); ++int bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_queue *ret, const char *name, ++ bce_vhci_event_queue_callback cb); ++void bce_vhci_event_queue_destroy(struct bce_vhci *vhci, struct bce_vhci_event_queue *q); ++void bce_vhci_event_queue_submit_pending(struct bce_vhci_event_queue *q, size_t count); ++void bce_vhci_event_queue_pause(struct bce_vhci_event_queue *q); ++void bce_vhci_event_queue_resume(struct bce_vhci_event_queue *q); ++ ++void bce_vhci_command_queue_create(struct bce_vhci_command_queue *ret, struct bce_vhci_message_queue *mq); ++void bce_vhci_command_queue_destroy(struct bce_vhci_command_queue *cq); ++int bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, struct bce_vhci_message *req, ++ struct bce_vhci_message *res, unsigned long timeout); ++void bce_vhci_command_queue_deliver_completion(struct bce_vhci_command_queue *cq, struct bce_vhci_message *msg); ++ ++#endif //BCE_VHCI_QUEUE_H +diff --git a/drivers/staging/apple-bce/vhci/transfer.c b/drivers/staging/apple-bce/vhci/transfer.c +new file mode 100644 +index 000000000..8226363d6 +--- /dev/null ++++ b/drivers/staging/apple-bce/vhci/transfer.c +@@ -0,0 +1,661 @@ ++#include "transfer.h" ++#include "../queue.h" ++#include "vhci.h" ++#include "../apple_bce.h" ++#include ++ ++static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq); ++static void bce_vhci_transfer_queue_giveback(struct bce_vhci_transfer_queue *q); ++static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q); ++ ++static int bce_vhci_urb_init(struct bce_vhci_urb *vurb); ++static int bce_vhci_urb_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg); ++static int bce_vhci_urb_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c); ++ ++static void bce_vhci_transfer_queue_reset_w(struct work_struct *work); ++ ++void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q, ++ struct usb_host_endpoint *endp, bce_vhci_device_t dev_addr, enum dma_data_direction dir) ++{ ++ char name[0x21]; ++ INIT_LIST_HEAD(&q->evq); ++ INIT_LIST_HEAD(&q->giveback_urb_list); ++ spin_lock_init(&q->urb_lock); ++ mutex_init(&q->pause_lock); ++ q->vhci = vhci; ++ q->endp = endp; ++ q->dev_addr = dev_addr; ++ q->endp_addr = (u8) (endp->desc.bEndpointAddress & 0x8F); ++ q->state = BCE_VHCI_ENDPOINT_ACTIVE; ++ q->active = true; ++ q->stalled = false; ++ q->max_active_requests = 1; ++ if (usb_endpoint_type(&endp->desc) == USB_ENDPOINT_XFER_BULK) ++ q->max_active_requests = BCE_VHCI_BULK_MAX_ACTIVE_URBS; ++ q->remaining_active_requests = q->max_active_requests; ++ q->cq = bce_create_cq(vhci->dev, 0x100); ++ INIT_WORK(&q->w_reset, bce_vhci_transfer_queue_reset_w); ++ q->sq_in = NULL; ++ if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) { ++ snprintf(name, sizeof(name), "VHC1-%i-%02x", dev_addr, 0x80 | usb_endpoint_num(&endp->desc)); ++ q->sq_in = bce_create_sq(vhci->dev, q->cq, name, 0x100, DMA_FROM_DEVICE, ++ bce_vhci_transfer_queue_completion, q); ++ } ++ q->sq_out = NULL; ++ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) { ++ snprintf(name, sizeof(name), "VHC1-%i-%02x", dev_addr, usb_endpoint_num(&endp->desc)); ++ q->sq_out = bce_create_sq(vhci->dev, q->cq, name, 0x100, DMA_TO_DEVICE, ++ bce_vhci_transfer_queue_completion, q); ++ } ++} ++ ++void bce_vhci_destroy_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q) ++{ ++ bce_vhci_transfer_queue_giveback(q); ++ bce_vhci_transfer_queue_remove_pending(q); ++ if (q->sq_in) ++ bce_destroy_sq(vhci->dev, q->sq_in); ++ if (q->sq_out) ++ bce_destroy_sq(vhci->dev, q->sq_out); ++ bce_destroy_cq(vhci->dev, q->cq); ++} ++ ++static inline bool bce_vhci_transfer_queue_can_init_urb(struct bce_vhci_transfer_queue *q) ++{ ++ return q->remaining_active_requests > 0; ++} ++ ++static void bce_vhci_transfer_queue_defer_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg) ++{ ++ struct bce_vhci_list_message *lm; ++ lm = kmalloc(sizeof(struct bce_vhci_list_message), GFP_KERNEL); ++ INIT_LIST_HEAD(&lm->list); ++ lm->msg = *msg; ++ list_add_tail(&lm->list, &q->evq); ++} ++ ++static void bce_vhci_transfer_queue_giveback(struct bce_vhci_transfer_queue *q) ++{ ++ unsigned long flags; ++ struct urb *urb; ++ spin_lock_irqsave(&q->urb_lock, flags); ++ while (!list_empty(&q->giveback_urb_list)) { ++ urb = list_first_entry(&q->giveback_urb_list, struct urb, urb_list); ++ list_del(&urb->urb_list); ++ ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++ usb_hcd_giveback_urb(q->vhci->hcd, urb, urb->status); ++ spin_lock_irqsave(&q->urb_lock, flags); ++ } ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++} ++ ++static void bce_vhci_transfer_queue_init_pending_urbs(struct bce_vhci_transfer_queue *q); ++ ++static void bce_vhci_transfer_queue_deliver_pending(struct bce_vhci_transfer_queue *q) ++{ ++ struct urb *urb; ++ struct bce_vhci_list_message *lm; ++ ++ while (!list_empty(&q->endp->urb_list) && !list_empty(&q->evq)) { ++ urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list); ++ ++ lm = list_first_entry(&q->evq, struct bce_vhci_list_message, list); ++ if (bce_vhci_urb_update(urb->hcpriv, &lm->msg) == -EAGAIN) ++ break; ++ list_del(&lm->list); ++ kfree(lm); ++ } ++ ++ /* some of the URBs could have been completed, so initialize more URBs if possible */ ++ bce_vhci_transfer_queue_init_pending_urbs(q); ++} ++ ++static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q) ++{ ++ unsigned long flags; ++ struct bce_vhci_list_message *lm; ++ spin_lock_irqsave(&q->urb_lock, flags); ++ while (!list_empty(&q->evq)) { ++ lm = list_first_entry(&q->evq, struct bce_vhci_list_message, list); ++ list_del(&lm->list); ++ kfree(lm); ++ } ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++} ++ ++void bce_vhci_transfer_queue_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg) ++{ ++ unsigned long flags; ++ struct bce_vhci_urb *turb; ++ struct urb *urb; ++ spin_lock_irqsave(&q->urb_lock, flags); ++ bce_vhci_transfer_queue_deliver_pending(q); ++ ++ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && ++ (!list_empty(&q->evq) || list_empty(&q->endp->urb_list))) { ++ bce_vhci_transfer_queue_defer_event(q, msg); ++ goto complete; ++ } ++ if (list_empty(&q->endp->urb_list)) { ++ pr_err("bce-vhci: [%02x] Unexpected transfer queue event\n", q->endp_addr); ++ goto complete; ++ } ++ urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list); ++ turb = urb->hcpriv; ++ if (bce_vhci_urb_update(turb, msg) == -EAGAIN) { ++ bce_vhci_transfer_queue_defer_event(q, msg); ++ } else { ++ bce_vhci_transfer_queue_init_pending_urbs(q); ++ } ++ ++complete: ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++ bce_vhci_transfer_queue_giveback(q); ++} ++ ++static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq) ++{ ++ unsigned long flags; ++ struct bce_sq_completion_data *c; ++ struct urb *urb; ++ struct bce_vhci_transfer_queue *q = sq->userdata; ++ spin_lock_irqsave(&q->urb_lock, flags); ++ while ((c = bce_next_completion(sq))) { ++ if (c->status == BCE_COMPLETION_ABORTED) { /* We flushed the queue */ ++ pr_debug("bce-vhci: [%02x] Got an abort completion\n", q->endp_addr); ++ bce_notify_submission_complete(sq); ++ continue; ++ } ++ if (list_empty(&q->endp->urb_list)) { ++ pr_err("bce-vhci: [%02x] Got a completion while no requests are pending\n", q->endp_addr); ++ continue; ++ } ++ pr_debug("bce-vhci: [%02x] Got a transfer queue completion\n", q->endp_addr); ++ urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list); ++ bce_vhci_urb_transfer_completion(urb->hcpriv, c); ++ bce_notify_submission_complete(sq); ++ } ++ bce_vhci_transfer_queue_deliver_pending(q); ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++ bce_vhci_transfer_queue_giveback(q); ++} ++ ++int bce_vhci_transfer_queue_do_pause(struct bce_vhci_transfer_queue *q) ++{ ++ unsigned long flags; ++ int status; ++ u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F); ++ spin_lock_irqsave(&q->urb_lock, flags); ++ q->active = false; ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++ if (q->sq_out) { ++ pr_err("bce-vhci: Not implemented: wait for pending output requests\n"); ++ } ++ bce_vhci_transfer_queue_remove_pending(q); ++ if ((status = bce_vhci_cmd_endpoint_set_state( ++ &q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_PAUSED, &q->state))) ++ return status; ++ if (q->state != BCE_VHCI_ENDPOINT_PAUSED) ++ return -EINVAL; ++ if (q->sq_in) ++ bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid); ++ if (q->sq_out) ++ bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid); ++ return 0; ++} ++ ++static void bce_vhci_urb_resume(struct bce_vhci_urb *urb); ++ ++int bce_vhci_transfer_queue_do_resume(struct bce_vhci_transfer_queue *q) ++{ ++ unsigned long flags; ++ int status; ++ struct urb *urb, *urbt; ++ struct bce_vhci_urb *vurb; ++ u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F); ++ if ((status = bce_vhci_cmd_endpoint_set_state( ++ &q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_ACTIVE, &q->state))) ++ return status; ++ if (q->state != BCE_VHCI_ENDPOINT_ACTIVE) ++ return -EINVAL; ++ spin_lock_irqsave(&q->urb_lock, flags); ++ q->active = true; ++ list_for_each_entry_safe(urb, urbt, &q->endp->urb_list, urb_list) { ++ vurb = urb->hcpriv; ++ if (vurb->state == BCE_VHCI_URB_INIT_PENDING) { ++ if (!bce_vhci_transfer_queue_can_init_urb(q)) ++ break; ++ bce_vhci_urb_init(vurb); ++ } else { ++ bce_vhci_urb_resume(vurb); ++ } ++ } ++ bce_vhci_transfer_queue_deliver_pending(q); ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++ return 0; ++} ++ ++int bce_vhci_transfer_queue_pause(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src) ++{ ++ int ret = 0; ++ mutex_lock(&q->pause_lock); ++ if ((q->paused_by & src) != src) { ++ if (!q->paused_by) ++ ret = bce_vhci_transfer_queue_do_pause(q); ++ if (!ret) ++ q->paused_by |= src; ++ } ++ mutex_unlock(&q->pause_lock); ++ return ret; ++} ++ ++int bce_vhci_transfer_queue_resume(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src) ++{ ++ int ret = 0; ++ mutex_lock(&q->pause_lock); ++ if (q->paused_by & src) { ++ if (!(q->paused_by & ~src)) ++ ret = bce_vhci_transfer_queue_do_resume(q); ++ if (!ret) ++ q->paused_by &= ~src; ++ } ++ mutex_unlock(&q->pause_lock); ++ return ret; ++} ++ ++static void bce_vhci_transfer_queue_reset_w(struct work_struct *work) ++{ ++ unsigned long flags; ++ struct bce_vhci_transfer_queue *q = container_of(work, struct bce_vhci_transfer_queue, w_reset); ++ ++ mutex_lock(&q->pause_lock); ++ spin_lock_irqsave(&q->urb_lock, flags); ++ if (!q->stalled) { ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++ mutex_unlock(&q->pause_lock); ++ return; ++ } ++ q->active = false; ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++ q->paused_by |= BCE_VHCI_PAUSE_INTERNAL_WQ; ++ bce_vhci_transfer_queue_remove_pending(q); ++ if (q->sq_in) ++ bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid); ++ if (q->sq_out) ++ bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid); ++ bce_vhci_cmd_endpoint_reset(&q->vhci->cq, q->dev_addr, (u8) (q->endp->desc.bEndpointAddress & 0x8F)); ++ spin_lock_irqsave(&q->urb_lock, flags); ++ q->stalled = false; ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++ mutex_unlock(&q->pause_lock); ++ bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ); ++} ++ ++void bce_vhci_transfer_queue_request_reset(struct bce_vhci_transfer_queue *q) ++{ ++ queue_work(q->vhci->tq_state_wq, &q->w_reset); ++} ++ ++static void bce_vhci_transfer_queue_init_pending_urbs(struct bce_vhci_transfer_queue *q) ++{ ++ struct urb *urb, *urbt; ++ struct bce_vhci_urb *vurb; ++ list_for_each_entry_safe(urb, urbt, &q->endp->urb_list, urb_list) { ++ vurb = urb->hcpriv; ++ if (!bce_vhci_transfer_queue_can_init_urb(q)) ++ break; ++ if (vurb->state == BCE_VHCI_URB_INIT_PENDING) ++ bce_vhci_urb_init(vurb); ++ } ++} ++ ++ ++ ++static int bce_vhci_urb_data_start(struct bce_vhci_urb *urb, unsigned long *timeout); ++ ++int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb) ++{ ++ unsigned long flags; ++ int status = 0; ++ struct bce_vhci_urb *vurb; ++ vurb = kzalloc(sizeof(struct bce_vhci_urb), GFP_KERNEL); ++ urb->hcpriv = vurb; ++ ++ vurb->q = q; ++ vurb->urb = urb; ++ vurb->dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; ++ vurb->is_control = (usb_endpoint_num(&urb->ep->desc) == 0); ++ ++ spin_lock_irqsave(&q->urb_lock, flags); ++ status = usb_hcd_link_urb_to_ep(q->vhci->hcd, urb); ++ if (status) { ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++ urb->hcpriv = NULL; ++ kfree(vurb); ++ return status; ++ } ++ ++ if (q->active) { ++ if (bce_vhci_transfer_queue_can_init_urb(vurb->q)) ++ status = bce_vhci_urb_init(vurb); ++ else ++ vurb->state = BCE_VHCI_URB_INIT_PENDING; ++ } else { ++ if (q->stalled) ++ bce_vhci_transfer_queue_request_reset(q); ++ vurb->state = BCE_VHCI_URB_INIT_PENDING; ++ } ++ if (status) { ++ usb_hcd_unlink_urb_from_ep(q->vhci->hcd, urb); ++ urb->hcpriv = NULL; ++ kfree(vurb); ++ } else { ++ bce_vhci_transfer_queue_deliver_pending(q); ++ } ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++ pr_debug("bce-vhci: [%02x] URB enqueued (dir = %s, size = %i)\n", q->endp_addr, ++ usb_urb_dir_in(urb) ? "IN" : "OUT", urb->transfer_buffer_length); ++ return status; ++} ++ ++static int bce_vhci_urb_init(struct bce_vhci_urb *vurb) ++{ ++ int status = 0; ++ ++ if (vurb->q->remaining_active_requests == 0) { ++ pr_err("bce-vhci: cannot init request (remaining_active_requests = 0)\n"); ++ return -EINVAL; ++ } ++ ++ if (vurb->is_control) { ++ vurb->state = BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST; ++ } else { ++ status = bce_vhci_urb_data_start(vurb, NULL); ++ } ++ ++ if (!status) { ++ --vurb->q->remaining_active_requests; ++ } ++ return status; ++} ++ ++static void bce_vhci_urb_complete(struct bce_vhci_urb *urb, int status) ++{ ++ struct bce_vhci_transfer_queue *q = urb->q; ++ struct bce_vhci *vhci = q->vhci; ++ struct urb *real_urb = urb->urb; ++ pr_debug("bce-vhci: [%02x] URB complete %i\n", q->endp_addr, status); ++ usb_hcd_unlink_urb_from_ep(vhci->hcd, real_urb); ++ real_urb->hcpriv = NULL; ++ real_urb->status = status; ++ if (urb->state != BCE_VHCI_URB_INIT_PENDING) ++ ++urb->q->remaining_active_requests; ++ kfree(urb); ++ list_add_tail(&real_urb->urb_list, &q->giveback_urb_list); ++} ++ ++int bce_vhci_urb_request_cancel(struct bce_vhci_transfer_queue *q, struct urb *urb, int status) ++{ ++ struct bce_vhci_urb *vurb; ++ unsigned long flags; ++ int ret; ++ ++ spin_lock_irqsave(&q->urb_lock, flags); ++ if ((ret = usb_hcd_check_unlink_urb(q->vhci->hcd, urb, status))) { ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++ return ret; ++ } ++ ++ vurb = urb->hcpriv; ++ /* If the URB wasn't posted to the device yet, we can still remove it on the host without pausing the queue. */ ++ if (vurb->state != BCE_VHCI_URB_INIT_PENDING) { ++ pr_debug("bce-vhci: [%02x] Cancelling URB\n", q->endp_addr); ++ ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++ bce_vhci_transfer_queue_pause(q, BCE_VHCI_PAUSE_INTERNAL_WQ); ++ spin_lock_irqsave(&q->urb_lock, flags); ++ ++ ++q->remaining_active_requests; ++ } ++ ++ usb_hcd_unlink_urb_from_ep(q->vhci->hcd, urb); ++ ++ spin_unlock_irqrestore(&q->urb_lock, flags); ++ ++ usb_hcd_giveback_urb(q->vhci->hcd, urb, status); ++ ++ if (vurb->state != BCE_VHCI_URB_INIT_PENDING) ++ bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ); ++ ++ kfree(vurb); ++ ++ return 0; ++} ++ ++static int bce_vhci_urb_data_transfer_in(struct bce_vhci_urb *urb, unsigned long *timeout) ++{ ++ struct bce_vhci_message msg; ++ struct bce_qe_submission *s; ++ u32 tr_len; ++ int reservation1, reservation2 = -EFAULT; ++ ++ pr_debug("bce-vhci: [%02x] DMA from device %llx %x\n", urb->q->endp_addr, ++ (u64) urb->urb->transfer_dma, urb->urb->transfer_buffer_length); ++ ++ /* Reserve both a message and a submission, so we don't run into issues later. */ ++ reservation1 = bce_reserve_submission(urb->q->vhci->msg_asynchronous.sq, timeout); ++ if (!reservation1) ++ reservation2 = bce_reserve_submission(urb->q->sq_in, timeout); ++ if (reservation1 || reservation2) { ++ pr_err("bce-vhci: Failed to reserve a submission for URB data transfer\n"); ++ if (!reservation1) ++ bce_cancel_submission_reservation(urb->q->vhci->msg_asynchronous.sq); ++ return -ENOMEM; ++ } ++ ++ urb->send_offset = urb->receive_offset; ++ ++ tr_len = urb->urb->transfer_buffer_length - urb->send_offset; ++ ++ spin_lock(&urb->q->vhci->msg_asynchronous_lock); ++ msg.cmd = BCE_VHCI_CMD_TRANSFER_REQUEST; ++ msg.status = 0; ++ msg.param1 = ((urb->urb->ep->desc.bEndpointAddress & 0x8Fu) << 8) | urb->q->dev_addr; ++ msg.param2 = tr_len; ++ bce_vhci_message_queue_write(&urb->q->vhci->msg_asynchronous, &msg); ++ spin_unlock(&urb->q->vhci->msg_asynchronous_lock); ++ ++ s = bce_next_submission(urb->q->sq_in); ++ bce_set_submission_single(s, urb->urb->transfer_dma + urb->send_offset, tr_len); ++ bce_submit_to_device(urb->q->sq_in); ++ ++ urb->state = BCE_VHCI_URB_WAITING_FOR_COMPLETION; ++ return 0; ++} ++ ++static int bce_vhci_urb_data_start(struct bce_vhci_urb *urb, unsigned long *timeout) ++{ ++ if (urb->dir == DMA_TO_DEVICE) { ++ if (urb->urb->transfer_buffer_length > 0) ++ urb->state = BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST; ++ else ++ urb->state = BCE_VHCI_URB_DATA_TRANSFER_COMPLETE; ++ return 0; ++ } else { ++ return bce_vhci_urb_data_transfer_in(urb, timeout); ++ } ++} ++ ++static int bce_vhci_urb_send_out_data(struct bce_vhci_urb *urb, dma_addr_t addr, size_t size) ++{ ++ struct bce_qe_submission *s; ++ unsigned long timeout = 0; ++ if (bce_reserve_submission(urb->q->sq_out, &timeout)) { ++ pr_err("bce-vhci: Failed to reserve a submission for URB data transfer\n"); ++ return -EPIPE; ++ } ++ ++ pr_debug("bce-vhci: [%02x] DMA to device %llx %lx\n", urb->q->endp_addr, (u64) addr, size); ++ ++ s = bce_next_submission(urb->q->sq_out); ++ bce_set_submission_single(s, addr, size); ++ bce_submit_to_device(urb->q->sq_out); ++ return 0; ++} ++ ++static int bce_vhci_urb_data_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg) ++{ ++ u32 tr_len; ++ int status; ++ if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST) { ++ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST) { ++ tr_len = min(urb->urb->transfer_buffer_length - urb->send_offset, (u32) msg->param2); ++ if ((status = bce_vhci_urb_send_out_data(urb, urb->urb->transfer_dma + urb->send_offset, tr_len))) ++ return status; ++ urb->send_offset += tr_len; ++ urb->state = BCE_VHCI_URB_WAITING_FOR_COMPLETION; ++ return 0; ++ } ++ } ++ ++ /* 0x1000 in out queues aren't really unexpected */ ++ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && urb->q->sq_out != NULL) ++ return -EAGAIN; ++ pr_err("bce-vhci: [%02x] %s URB unexpected message (state = %x, msg: %x %x %x %llx)\n", ++ urb->q->endp_addr, (urb->is_control ? "Control (data update)" : "Data"), urb->state, ++ msg->cmd, msg->status, msg->param1, msg->param2); ++ return -EAGAIN; ++} ++ ++static int bce_vhci_urb_data_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c) ++{ ++ if (urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) { ++ urb->receive_offset += c->data_size; ++ if (urb->dir == DMA_FROM_DEVICE || urb->receive_offset >= urb->urb->transfer_buffer_length) { ++ urb->urb->actual_length = (u32) urb->receive_offset; ++ urb->state = BCE_VHCI_URB_DATA_TRANSFER_COMPLETE; ++ if (!urb->is_control) { ++ bce_vhci_urb_complete(urb, 0); ++ return -ENOENT; ++ } ++ } ++ } else { ++ pr_err("bce-vhci: [%02x] Data URB unexpected completion\n", urb->q->endp_addr); ++ } ++ return 0; ++} ++ ++ ++static int bce_vhci_urb_control_check_status(struct bce_vhci_urb *urb) ++{ ++ struct bce_vhci_transfer_queue *q = urb->q; ++ if (urb->received_status == 0) ++ return 0; ++ if (urb->state == BCE_VHCI_URB_DATA_TRANSFER_COMPLETE || ++ (urb->received_status != BCE_VHCI_SUCCESS && urb->state != BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST && ++ urb->state != BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION)) { ++ urb->state = BCE_VHCI_URB_CONTROL_COMPLETE; ++ if (urb->received_status != BCE_VHCI_SUCCESS) { ++ pr_err("bce-vhci: [%02x] URB failed: %x\n", urb->q->endp_addr, urb->received_status); ++ urb->q->active = false; ++ urb->q->stalled = true; ++ bce_vhci_urb_complete(urb, -EPIPE); ++ if (!list_empty(&q->endp->urb_list)) ++ bce_vhci_transfer_queue_request_reset(q); ++ return -ENOENT; ++ } ++ bce_vhci_urb_complete(urb, 0); ++ return -ENOENT; ++ } ++ return 0; ++} ++ ++static int bce_vhci_urb_control_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg) ++{ ++ int status; ++ if (msg->cmd == BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS) { ++ urb->received_status = msg->status; ++ return bce_vhci_urb_control_check_status(urb); ++ } ++ ++ if (urb->state == BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST) { ++ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST) { ++ if (bce_vhci_urb_send_out_data(urb, urb->urb->setup_dma, sizeof(struct usb_ctrlrequest))) { ++ pr_err("bce-vhci: [%02x] Failed to start URB setup transfer\n", urb->q->endp_addr); ++ return 0; /* TODO: fail the URB? */ ++ } ++ urb->state = BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION; ++ pr_debug("bce-vhci: [%02x] Sent setup %llx\n", urb->q->endp_addr, urb->urb->setup_dma); ++ return 0; ++ } ++ } else if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST || ++ urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) { ++ if ((status = bce_vhci_urb_data_update(urb, msg))) ++ return status; ++ return bce_vhci_urb_control_check_status(urb); ++ } ++ ++ /* 0x1000 in out queues aren't really unexpected */ ++ if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && urb->q->sq_out != NULL) ++ return -EAGAIN; ++ pr_err("bce-vhci: [%02x] Control URB unexpected message (state = %x, msg: %x %x %x %llx)\n", urb->q->endp_addr, ++ urb->state, msg->cmd, msg->status, msg->param1, msg->param2); ++ return -EAGAIN; ++} ++ ++static int bce_vhci_urb_control_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c) ++{ ++ int status; ++ unsigned long timeout; ++ ++ if (urb->state == BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION) { ++ if (c->data_size != sizeof(struct usb_ctrlrequest)) ++ pr_err("bce-vhci: [%02x] transfer complete data size mistmatch for usb_ctrlrequest (%llx instead of %lx)\n", ++ urb->q->endp_addr, c->data_size, sizeof(struct usb_ctrlrequest)); ++ ++ timeout = 1000; ++ status = bce_vhci_urb_data_start(urb, &timeout); ++ if (status) { ++ bce_vhci_urb_complete(urb, status); ++ return -ENOENT; ++ } ++ return 0; ++ } else if (urb->state == BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST || ++ urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) { ++ if ((status = bce_vhci_urb_data_transfer_completion(urb, c))) ++ return status; ++ return bce_vhci_urb_control_check_status(urb); ++ } else { ++ pr_err("bce-vhci: [%02x] Control URB unexpected completion (state = %x)\n", urb->q->endp_addr, urb->state); ++ } ++ return 0; ++} ++ ++static int bce_vhci_urb_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg) ++{ ++ if (urb->state == BCE_VHCI_URB_INIT_PENDING) ++ return -EAGAIN; ++ if (urb->is_control) ++ return bce_vhci_urb_control_update(urb, msg); ++ else ++ return bce_vhci_urb_data_update(urb, msg); ++} ++ ++static int bce_vhci_urb_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c) ++{ ++ if (urb->is_control) ++ return bce_vhci_urb_control_transfer_completion(urb, c); ++ else ++ return bce_vhci_urb_data_transfer_completion(urb, c); ++} ++ ++static void bce_vhci_urb_resume(struct bce_vhci_urb *urb) ++{ ++ int status = 0; ++ if (urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) { ++ status = bce_vhci_urb_data_transfer_in(urb, NULL); ++ } ++ if (status) ++ bce_vhci_urb_complete(urb, status); ++} +diff --git a/drivers/staging/apple-bce/vhci/transfer.h b/drivers/staging/apple-bce/vhci/transfer.h +new file mode 100644 +index 000000000..6a62a00b2 +--- /dev/null ++++ b/drivers/staging/apple-bce/vhci/transfer.h +@@ -0,0 +1,71 @@ ++#ifndef BCEDRIVER_TRANSFER_H ++#define BCEDRIVER_TRANSFER_H ++ ++#include ++#include "queue.h" ++#include "command.h" ++#include "../queue.h" ++ ++struct bce_vhci_list_message { ++ struct list_head list; ++ struct bce_vhci_message msg; ++}; ++enum bce_vhci_pause_source { ++ BCE_VHCI_PAUSE_INTERNAL_WQ = 1, ++ BCE_VHCI_PAUSE_FIRMWARE = 2, ++ BCE_VHCI_PAUSE_SUSPEND = 4, ++ BCE_VHCI_PAUSE_SHUTDOWN = 8 ++}; ++struct bce_vhci_transfer_queue { ++ struct bce_vhci *vhci; ++ struct usb_host_endpoint *endp; ++ enum bce_vhci_endpoint_state state; ++ u32 max_active_requests, remaining_active_requests; ++ bool active, stalled; ++ u32 paused_by; ++ bce_vhci_device_t dev_addr; ++ u8 endp_addr; ++ struct bce_queue_cq *cq; ++ struct bce_queue_sq *sq_in; ++ struct bce_queue_sq *sq_out; ++ struct list_head evq; ++ struct spinlock urb_lock; ++ struct mutex pause_lock; ++ struct list_head giveback_urb_list; ++ ++ struct work_struct w_reset; ++}; ++enum bce_vhci_urb_state { ++ BCE_VHCI_URB_INIT_PENDING, ++ ++ BCE_VHCI_URB_WAITING_FOR_TRANSFER_REQUEST, ++ BCE_VHCI_URB_WAITING_FOR_COMPLETION, ++ BCE_VHCI_URB_DATA_TRANSFER_COMPLETE, ++ ++ BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_REQUEST, ++ BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION, ++ BCE_VHCI_URB_CONTROL_COMPLETE ++}; ++struct bce_vhci_urb { ++ struct urb *urb; ++ struct bce_vhci_transfer_queue *q; ++ enum dma_data_direction dir; ++ bool is_control; ++ enum bce_vhci_urb_state state; ++ int received_status; ++ u32 send_offset; ++ u32 receive_offset; ++}; ++ ++void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q, ++ struct usb_host_endpoint *endp, bce_vhci_device_t dev_addr, enum dma_data_direction dir); ++void bce_vhci_destroy_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q); ++void bce_vhci_transfer_queue_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg); ++int bce_vhci_transfer_queue_pause(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src); ++int bce_vhci_transfer_queue_resume(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src); ++void bce_vhci_transfer_queue_request_reset(struct bce_vhci_transfer_queue *q); ++ ++int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb); ++int bce_vhci_urb_request_cancel(struct bce_vhci_transfer_queue *q, struct urb *urb, int status); ++ ++#endif //BCEDRIVER_TRANSFER_H +diff --git a/drivers/staging/apple-bce/vhci/vhci.c b/drivers/staging/apple-bce/vhci/vhci.c +new file mode 100644 +index 000000000..053a9f39e +--- /dev/null ++++ b/drivers/staging/apple-bce/vhci/vhci.c +@@ -0,0 +1,759 @@ ++#include "vhci.h" ++#include "../apple_bce.h" ++#include "command.h" ++#include ++#include ++#include ++#include ++ ++static dev_t bce_vhci_chrdev; ++static struct class *bce_vhci_class; ++static const struct hc_driver bce_vhci_driver; ++static u16 bce_vhci_port_mask = U16_MAX; ++ ++static int bce_vhci_create_event_queues(struct bce_vhci *vhci); ++static void bce_vhci_destroy_event_queues(struct bce_vhci *vhci); ++static int bce_vhci_create_message_queues(struct bce_vhci *vhci); ++static void bce_vhci_destroy_message_queues(struct bce_vhci *vhci); ++static void bce_vhci_handle_firmware_events_w(struct work_struct *ws); ++static void bce_vhci_firmware_event_completion(struct bce_queue_sq *sq); ++ ++int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci) ++{ ++ int status; ++ ++ spin_lock_init(&vhci->hcd_spinlock); ++ ++ vhci->dev = dev; ++ ++ vhci->vdevt = bce_vhci_chrdev; ++ vhci->vdev = device_create(bce_vhci_class, dev->dev, vhci->vdevt, NULL, "bce-vhci"); ++ if (IS_ERR_OR_NULL(vhci->vdev)) { ++ status = PTR_ERR(vhci->vdev); ++ goto fail_dev; ++ } ++ ++ if ((status = bce_vhci_create_message_queues(vhci))) ++ goto fail_mq; ++ if ((status = bce_vhci_create_event_queues(vhci))) ++ goto fail_eq; ++ ++ vhci->tq_state_wq = alloc_ordered_workqueue("bce-vhci-tq-state", 0); ++ INIT_WORK(&vhci->w_fw_events, bce_vhci_handle_firmware_events_w); ++ ++ vhci->hcd = usb_create_hcd(&bce_vhci_driver, vhci->vdev, "bce-vhci"); ++ if (!vhci->hcd) { ++ status = -ENOMEM; ++ goto fail_hcd; ++ } ++ vhci->hcd->self.sysdev = &dev->pci->dev; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5,4,0) ++ vhci->hcd->self.uses_dma = 1; ++#endif ++ *((struct bce_vhci **) vhci->hcd->hcd_priv) = vhci; ++ vhci->hcd->speed = HCD_USB2; ++ ++ if ((status = usb_add_hcd(vhci->hcd, 0, 0))) ++ goto fail_hcd; ++ ++ return 0; ++ ++fail_hcd: ++ bce_vhci_destroy_event_queues(vhci); ++fail_eq: ++ bce_vhci_destroy_message_queues(vhci); ++fail_mq: ++ device_destroy(bce_vhci_class, vhci->vdevt); ++fail_dev: ++ if (!status) ++ status = -EINVAL; ++ return status; ++} ++ ++void bce_vhci_destroy(struct bce_vhci *vhci) ++{ ++ usb_remove_hcd(vhci->hcd); ++ bce_vhci_destroy_event_queues(vhci); ++ bce_vhci_destroy_message_queues(vhci); ++ device_destroy(bce_vhci_class, vhci->vdevt); ++} ++ ++struct bce_vhci *bce_vhci_from_hcd(struct usb_hcd *hcd) ++{ ++ return *((struct bce_vhci **) hcd->hcd_priv); ++} ++ ++int bce_vhci_start(struct usb_hcd *hcd) ++{ ++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); ++ int status; ++ u16 port_mask = 0; ++ bce_vhci_port_t port_no = 0; ++ if ((status = bce_vhci_cmd_controller_enable(&vhci->cq, 1, &port_mask))) ++ return status; ++ vhci->port_mask = port_mask; ++ vhci->port_power_mask = 0; ++ if ((status = bce_vhci_cmd_controller_start(&vhci->cq))) ++ return status; ++ port_mask = vhci->port_mask; ++ while (port_mask) { ++ port_no += 1; ++ port_mask >>= 1; ++ } ++ vhci->port_count = port_no; ++ return 0; ++} ++ ++void bce_vhci_stop(struct usb_hcd *hcd) ++{ ++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); ++ bce_vhci_cmd_controller_disable(&vhci->cq); ++} ++ ++static int bce_vhci_hub_status_data(struct usb_hcd *hcd, char *buf) ++{ ++ return 0; ++} ++ ++static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout); ++ ++static int bce_vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, char *buf, u16 wLength) ++{ ++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); ++ int status; ++ struct usb_hub_descriptor *hd; ++ struct usb_hub_status *hs; ++ struct usb_port_status *ps; ++ u32 port_status; ++ // pr_info("bce-vhci: bce_vhci_hub_control %x %i %i [bufl=%i]\n", typeReq, wValue, wIndex, wLength); ++ if (typeReq == GetHubDescriptor && wLength >= sizeof(struct usb_hub_descriptor)) { ++ hd = (struct usb_hub_descriptor *) buf; ++ memset(hd, 0, sizeof(*hd)); ++ hd->bDescLength = sizeof(struct usb_hub_descriptor); ++ hd->bDescriptorType = USB_DT_HUB; ++ hd->bNbrPorts = (u8) vhci->port_count; ++ hd->wHubCharacteristics = HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_INDV_PORT_OCPM; ++ hd->bPwrOn2PwrGood = 0; ++ hd->bHubContrCurrent = 0; ++ return 0; ++ } else if (typeReq == GetHubStatus && wLength >= sizeof(struct usb_hub_status)) { ++ hs = (struct usb_hub_status *) buf; ++ memset(hs, 0, sizeof(*hs)); ++ hs->wHubStatus = 0; ++ hs->wHubChange = 0; ++ return 0; ++ } else if (typeReq == GetPortStatus && wLength >= 4 /* usb 2.0 */) { ++ ps = (struct usb_port_status *) buf; ++ ps->wPortStatus = 0; ++ ps->wPortChange = 0; ++ ++ if (vhci->port_power_mask & BIT(wIndex)) ++ ps->wPortStatus |= USB_PORT_STAT_POWER; ++ ++ if (!(bce_vhci_port_mask & BIT(wIndex))) ++ return 0; ++ ++ if ((status = bce_vhci_cmd_port_status(&vhci->cq, (u8) wIndex, 0, &port_status))) ++ return status; ++ ++ if (port_status & 16) ++ ps->wPortStatus |= USB_PORT_STAT_ENABLE | USB_PORT_STAT_HIGH_SPEED; ++ if (port_status & 4) ++ ps->wPortStatus |= USB_PORT_STAT_CONNECTION; ++ if (port_status & 2) ++ ps->wPortStatus |= USB_PORT_STAT_OVERCURRENT; ++ if (port_status & 8) ++ ps->wPortStatus |= USB_PORT_STAT_RESET; ++ if (port_status & 0x60) ++ ps->wPortStatus |= USB_PORT_STAT_SUSPEND; ++ ++ if (port_status & 0x40000) ++ ps->wPortChange |= USB_PORT_STAT_C_CONNECTION; ++ ++ pr_debug("bce-vhci: Translated status %x to %x:%x\n", port_status, ps->wPortStatus, ps->wPortChange); ++ return 0; ++ } else if (typeReq == SetPortFeature) { ++ if (wValue == USB_PORT_FEAT_POWER) { ++ status = bce_vhci_cmd_port_power_on(&vhci->cq, (u8) wIndex); ++ /* As far as I am aware, power status is not part of the port status so store it separately */ ++ if (!status) ++ vhci->port_power_mask |= BIT(wIndex); ++ return status; ++ } ++ if (wValue == USB_PORT_FEAT_RESET) { ++ return bce_vhci_reset_device(vhci, wIndex, wValue); ++ } ++ if (wValue == USB_PORT_FEAT_SUSPEND) { ++ /* TODO: Am I supposed to also suspend the endpoints? */ ++ pr_debug("bce-vhci: Suspending port %i\n", wIndex); ++ return bce_vhci_cmd_port_suspend(&vhci->cq, (u8) wIndex); ++ } ++ } else if (typeReq == ClearPortFeature) { ++ if (wValue == USB_PORT_FEAT_ENABLE) ++ return bce_vhci_cmd_port_disable(&vhci->cq, (u8) wIndex); ++ if (wValue == USB_PORT_FEAT_POWER) { ++ status = bce_vhci_cmd_port_power_off(&vhci->cq, (u8) wIndex); ++ if (!status) ++ vhci->port_power_mask &= ~BIT(wIndex); ++ return status; ++ } ++ if (wValue == USB_PORT_FEAT_C_CONNECTION) ++ return bce_vhci_cmd_port_status(&vhci->cq, (u8) wIndex, 0x40000, &port_status); ++ if (wValue == USB_PORT_FEAT_C_RESET) { /* I don't think I can transfer it in any way */ ++ return 0; ++ } ++ if (wValue == USB_PORT_FEAT_SUSPEND) { ++ pr_debug("bce-vhci: Resuming port %i\n", wIndex); ++ return bce_vhci_cmd_port_resume(&vhci->cq, (u8) wIndex); ++ } ++ } ++ pr_err("bce-vhci: bce_vhci_hub_control unhandled request: %x %i %i [bufl=%i]\n", typeReq, wValue, wIndex, wLength); ++ dump_stack(); ++ return -EIO; ++} ++ ++static int bce_vhci_enable_device(struct usb_hcd *hcd, struct usb_device *udev) ++{ ++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); ++ struct bce_vhci_device *vdev; ++ bce_vhci_device_t devid; ++ pr_info("bce_vhci_enable_device\n"); ++ ++ if (vhci->port_to_device[udev->portnum]) ++ return 0; ++ ++ /* We need to early address the device */ ++ if (bce_vhci_cmd_device_create(&vhci->cq, udev->portnum, &devid)) ++ return -EIO; ++ ++ pr_info("bce_vhci_cmd_device_create %i -> %i\n", udev->portnum, devid); ++ ++ vdev = kzalloc(sizeof(struct bce_vhci_device), GFP_KERNEL); ++ vhci->port_to_device[udev->portnum] = devid; ++ vhci->devices[devid] = vdev; ++ ++ bce_vhci_create_transfer_queue(vhci, &vdev->tq[0], &udev->ep0, devid, DMA_BIDIRECTIONAL); ++ udev->ep0.hcpriv = &vdev->tq[0]; ++ vdev->tq_mask |= BIT(0); ++ ++ bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &udev->ep0.desc); ++ return 0; ++} ++ ++static int bce_vhci_address_device(struct usb_hcd *hcd, struct usb_device *udev) ++{ ++ /* This is the same as enable_device, but instead in the old scheme */ ++ return bce_vhci_enable_device(hcd, udev); ++} ++ ++static void bce_vhci_free_device(struct usb_hcd *hcd, struct usb_device *udev) ++{ ++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); ++ int i; ++ bce_vhci_device_t devid; ++ struct bce_vhci_device *dev; ++ pr_info("bce_vhci_free_device %i\n", udev->portnum); ++ if (!vhci->port_to_device[udev->portnum]) ++ return; ++ devid = vhci->port_to_device[udev->portnum]; ++ dev = vhci->devices[devid]; ++ for (i = 0; i < 32; i++) { ++ if (dev->tq_mask & BIT(i)) { ++ bce_vhci_transfer_queue_pause(&dev->tq[i], BCE_VHCI_PAUSE_SHUTDOWN); ++ bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) i); ++ bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]); ++ } ++ } ++ vhci->devices[devid] = NULL; ++ vhci->port_to_device[udev->portnum] = 0; ++ bce_vhci_cmd_device_destroy(&vhci->cq, devid); ++ kfree(dev); ++} ++ ++static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout) ++{ ++ struct bce_vhci_device *dev = NULL; ++ bce_vhci_device_t devid; ++ int i; ++ int status; ++ enum dma_data_direction dir; ++ pr_info("bce_vhci_reset_device %i\n", index); ++ ++ devid = vhci->port_to_device[index]; ++ if (devid) { ++ dev = vhci->devices[devid]; ++ ++ for (i = 0; i < 32; i++) { ++ if (dev->tq_mask & BIT(i)) { ++ bce_vhci_transfer_queue_pause(&dev->tq[i], BCE_VHCI_PAUSE_SHUTDOWN); ++ bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) i); ++ bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]); ++ } ++ } ++ vhci->devices[devid] = NULL; ++ vhci->port_to_device[index] = 0; ++ bce_vhci_cmd_device_destroy(&vhci->cq, devid); ++ } ++ status = bce_vhci_cmd_port_reset(&vhci->cq, (u8) index, timeout); ++ ++ if (dev) { ++ if ((status = bce_vhci_cmd_device_create(&vhci->cq, index, &devid))) ++ return status; ++ vhci->devices[devid] = dev; ++ vhci->port_to_device[index] = devid; ++ ++ for (i = 0; i < 32; i++) { ++ if (dev->tq_mask & BIT(i)) { ++ dir = usb_endpoint_dir_in(&dev->tq[i].endp->desc) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; ++ if (i == 0) ++ dir = DMA_BIDIRECTIONAL; ++ bce_vhci_create_transfer_queue(vhci, &dev->tq[i], dev->tq[i].endp, devid, dir); ++ bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &dev->tq[i].endp->desc); ++ } ++ } ++ } ++ ++ return status; ++} ++ ++static int bce_vhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) ++{ ++ return 0; ++} ++ ++static int bce_vhci_get_frame_number(struct usb_hcd *hcd) ++{ ++ return 0; ++} ++ ++static int bce_vhci_bus_suspend(struct usb_hcd *hcd) ++{ ++ int i, j; ++ int status; ++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); ++ pr_info("bce_vhci: suspend started\n"); ++ ++ pr_info("bce_vhci: suspend endpoints\n"); ++ for (i = 0; i < 16; i++) { ++ if (!vhci->port_to_device[i]) ++ continue; ++ for (j = 0; j < 32; j++) { ++ if (!(vhci->devices[vhci->port_to_device[i]]->tq_mask & BIT(j))) ++ continue; ++ bce_vhci_transfer_queue_pause(&vhci->devices[vhci->port_to_device[i]]->tq[j], ++ BCE_VHCI_PAUSE_SUSPEND); ++ } ++ } ++ ++ pr_info("bce_vhci: suspend ports\n"); ++ for (i = 0; i < 16; i++) { ++ if (!vhci->port_to_device[i]) ++ continue; ++ bce_vhci_cmd_port_suspend(&vhci->cq, i); ++ } ++ pr_info("bce_vhci: suspend controller\n"); ++ if ((status = bce_vhci_cmd_controller_pause(&vhci->cq))) ++ return status; ++ ++ bce_vhci_event_queue_pause(&vhci->ev_commands); ++ bce_vhci_event_queue_pause(&vhci->ev_system); ++ bce_vhci_event_queue_pause(&vhci->ev_isochronous); ++ bce_vhci_event_queue_pause(&vhci->ev_interrupt); ++ bce_vhci_event_queue_pause(&vhci->ev_asynchronous); ++ pr_info("bce_vhci: suspend done\n"); ++ return 0; ++} ++ ++static int bce_vhci_bus_resume(struct usb_hcd *hcd) ++{ ++ int i, j; ++ int status; ++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); ++ pr_info("bce_vhci: resume started\n"); ++ ++ bce_vhci_event_queue_resume(&vhci->ev_system); ++ bce_vhci_event_queue_resume(&vhci->ev_isochronous); ++ bce_vhci_event_queue_resume(&vhci->ev_interrupt); ++ bce_vhci_event_queue_resume(&vhci->ev_asynchronous); ++ bce_vhci_event_queue_resume(&vhci->ev_commands); ++ ++ pr_info("bce_vhci: resume controller\n"); ++ if ((status = bce_vhci_cmd_controller_start(&vhci->cq))) ++ return status; ++ ++ pr_info("bce_vhci: resume ports\n"); ++ for (i = 0; i < 16; i++) { ++ if (!vhci->port_to_device[i]) ++ continue; ++ bce_vhci_cmd_port_resume(&vhci->cq, i); ++ } ++ pr_info("bce_vhci: resume endpoints\n"); ++ for (i = 0; i < 16; i++) { ++ if (!vhci->port_to_device[i]) ++ continue; ++ for (j = 0; j < 32; j++) { ++ if (!(vhci->devices[vhci->port_to_device[i]]->tq_mask & BIT(j))) ++ continue; ++ bce_vhci_transfer_queue_resume(&vhci->devices[vhci->port_to_device[i]]->tq[j], ++ BCE_VHCI_PAUSE_SUSPEND); ++ } ++ } ++ ++ pr_info("bce_vhci: resume done\n"); ++ return 0; ++} ++ ++static int bce_vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) ++{ ++ struct bce_vhci_transfer_queue *q = urb->ep->hcpriv; ++ pr_debug("bce_vhci_urb_enqueue %i:%x\n", q->dev_addr, urb->ep->desc.bEndpointAddress); ++ if (!q) ++ return -ENOENT; ++ return bce_vhci_urb_create(q, urb); ++} ++ ++static int bce_vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) ++{ ++ struct bce_vhci_transfer_queue *q = urb->ep->hcpriv; ++ pr_debug("bce_vhci_urb_dequeue %x\n", urb->ep->desc.bEndpointAddress); ++ return bce_vhci_urb_request_cancel(q, urb, status); ++} ++ ++static void bce_vhci_endpoint_reset(struct usb_hcd *hcd, struct usb_host_endpoint *ep) ++{ ++ struct bce_vhci_transfer_queue *q = ep->hcpriv; ++ pr_debug("bce_vhci_endpoint_reset\n"); ++ if (q) ++ bce_vhci_transfer_queue_request_reset(q); ++} ++ ++static u8 bce_vhci_endpoint_index(u8 addr) ++{ ++ if (addr & 0x80) ++ return (u8) (0x10 + (addr & 0xf)); ++ return (u8) (addr & 0xf); ++} ++ ++static int bce_vhci_add_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *endp) ++{ ++ u8 endp_index = bce_vhci_endpoint_index(endp->desc.bEndpointAddress); ++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); ++ bce_vhci_device_t devid = vhci->port_to_device[udev->portnum]; ++ struct bce_vhci_device *vdev = vhci->devices[devid]; ++ pr_debug("bce_vhci_add_endpoint %x/%x:%x\n", udev->portnum, devid, endp_index); ++ ++ if (udev->bus->root_hub == udev) /* The USB hub */ ++ return 0; ++ if (vdev == NULL) ++ return -ENODEV; ++ if (vdev->tq_mask & BIT(endp_index)) { ++ endp->hcpriv = &vdev->tq[endp_index]; ++ return 0; ++ } ++ ++ bce_vhci_create_transfer_queue(vhci, &vdev->tq[endp_index], endp, devid, ++ usb_endpoint_dir_in(&endp->desc) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); ++ endp->hcpriv = &vdev->tq[endp_index]; ++ vdev->tq_mask |= BIT(endp_index); ++ ++ bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &endp->desc); ++ return 0; ++} ++ ++static int bce_vhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *endp) ++{ ++ u8 endp_index = bce_vhci_endpoint_index(endp->desc.bEndpointAddress); ++ struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); ++ bce_vhci_device_t devid = vhci->port_to_device[udev->portnum]; ++ struct bce_vhci_transfer_queue *q = endp->hcpriv; ++ struct bce_vhci_device *vdev = vhci->devices[devid]; ++ pr_info("bce_vhci_drop_endpoint %x:%x\n", udev->portnum, endp_index); ++ if (!q) { ++ if (vdev && vdev->tq_mask & BIT(endp_index)) { ++ pr_err("something deleted the hcpriv?\n"); ++ q = &vdev->tq[endp_index]; ++ } else { ++ return 0; ++ } ++ } ++ ++ bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) (endp->desc.bEndpointAddress & 0x8Fu)); ++ vhci->devices[devid]->tq_mask &= ~BIT(endp_index); ++ bce_vhci_destroy_transfer_queue(vhci, q); ++ return 0; ++} ++ ++static int bce_vhci_create_message_queues(struct bce_vhci *vhci) ++{ ++ if (bce_vhci_message_queue_create(vhci, &vhci->msg_commands, "VHC1HostCommands") || ++ bce_vhci_message_queue_create(vhci, &vhci->msg_system, "VHC1HostSystemEvents") || ++ bce_vhci_message_queue_create(vhci, &vhci->msg_isochronous, "VHC1HostIsochronousEvents") || ++ bce_vhci_message_queue_create(vhci, &vhci->msg_interrupt, "VHC1HostInterruptEvents") || ++ bce_vhci_message_queue_create(vhci, &vhci->msg_asynchronous, "VHC1HostAsynchronousEvents")) { ++ bce_vhci_destroy_message_queues(vhci); ++ return -EINVAL; ++ } ++ spin_lock_init(&vhci->msg_asynchronous_lock); ++ bce_vhci_command_queue_create(&vhci->cq, &vhci->msg_commands); ++ return 0; ++} ++ ++static void bce_vhci_destroy_message_queues(struct bce_vhci *vhci) ++{ ++ bce_vhci_command_queue_destroy(&vhci->cq); ++ bce_vhci_message_queue_destroy(vhci, &vhci->msg_commands); ++ bce_vhci_message_queue_destroy(vhci, &vhci->msg_system); ++ bce_vhci_message_queue_destroy(vhci, &vhci->msg_isochronous); ++ bce_vhci_message_queue_destroy(vhci, &vhci->msg_interrupt); ++ bce_vhci_message_queue_destroy(vhci, &vhci->msg_asynchronous); ++} ++ ++static void bce_vhci_handle_system_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg); ++static void bce_vhci_handle_usb_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg); ++ ++static int bce_vhci_create_event_queues(struct bce_vhci *vhci) ++{ ++ vhci->ev_cq = bce_create_cq(vhci->dev, 0x100); ++ if (!vhci->ev_cq) ++ return -EINVAL; ++#define CREATE_EVENT_QUEUE(field, name, cb) bce_vhci_event_queue_create(vhci, &vhci->field, name, cb) ++ if (__bce_vhci_event_queue_create(vhci, &vhci->ev_commands, "VHC1FirmwareCommands", ++ bce_vhci_firmware_event_completion) || ++ CREATE_EVENT_QUEUE(ev_system, "VHC1FirmwareSystemEvents", bce_vhci_handle_system_event) || ++ CREATE_EVENT_QUEUE(ev_isochronous, "VHC1FirmwareIsochronousEvents", bce_vhci_handle_usb_event) || ++ CREATE_EVENT_QUEUE(ev_interrupt, "VHC1FirmwareInterruptEvents", bce_vhci_handle_usb_event) || ++ CREATE_EVENT_QUEUE(ev_asynchronous, "VHC1FirmwareAsynchronousEvents", bce_vhci_handle_usb_event)) { ++ bce_vhci_destroy_event_queues(vhci); ++ return -EINVAL; ++ } ++#undef CREATE_EVENT_QUEUE ++ return 0; ++} ++ ++static void bce_vhci_destroy_event_queues(struct bce_vhci *vhci) ++{ ++ bce_vhci_event_queue_destroy(vhci, &vhci->ev_commands); ++ bce_vhci_event_queue_destroy(vhci, &vhci->ev_system); ++ bce_vhci_event_queue_destroy(vhci, &vhci->ev_isochronous); ++ bce_vhci_event_queue_destroy(vhci, &vhci->ev_interrupt); ++ bce_vhci_event_queue_destroy(vhci, &vhci->ev_asynchronous); ++ if (vhci->ev_cq) ++ bce_destroy_cq(vhci->dev, vhci->ev_cq); ++} ++ ++static void bce_vhci_send_fw_event_response(struct bce_vhci *vhci, struct bce_vhci_message *req, u16 status) ++{ ++ unsigned long timeout = 1000; ++ struct bce_vhci_message r = *req; ++ r.cmd = (u16) (req->cmd | 0x8000u); ++ r.status = status; ++ r.param1 = req->param1; ++ r.param2 = 0; ++ ++ if (bce_reserve_submission(vhci->msg_system.sq, &timeout)) { ++ pr_err("bce-vhci: Cannot reserve submision for FW event reply\n"); ++ return; ++ } ++ bce_vhci_message_queue_write(&vhci->msg_system, &r); ++} ++ ++static int bce_vhci_handle_firmware_event(struct bce_vhci *vhci, struct bce_vhci_message *msg) ++{ ++ unsigned long flags; ++ bce_vhci_device_t devid; ++ u8 endp; ++ struct bce_vhci_device *dev; ++ struct bce_vhci_transfer_queue *tq; ++ if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE || msg->cmd == BCE_VHCI_CMD_ENDPOINT_SET_STATE) { ++ devid = (bce_vhci_device_t) (msg->param1 & 0xff); ++ endp = bce_vhci_endpoint_index((u8) ((msg->param1 >> 8) & 0xff)); ++ dev = vhci->devices[devid]; ++ if (!dev || !(dev->tq_mask & BIT(endp))) ++ return BCE_VHCI_BAD_ARGUMENT; ++ tq = &dev->tq[endp]; ++ } ++ ++ if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE) { ++ if (msg->param2 == BCE_VHCI_ENDPOINT_ACTIVE) { ++ bce_vhci_transfer_queue_resume(tq, BCE_VHCI_PAUSE_FIRMWARE); ++ return BCE_VHCI_SUCCESS; ++ } else if (msg->param2 == BCE_VHCI_ENDPOINT_PAUSED) { ++ bce_vhci_transfer_queue_pause(tq, BCE_VHCI_PAUSE_FIRMWARE); ++ return BCE_VHCI_SUCCESS; ++ } ++ return BCE_VHCI_BAD_ARGUMENT; ++ } else if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_SET_STATE) { ++ if (msg->param2 == BCE_VHCI_ENDPOINT_STALLED) { ++ tq->state = msg->param2; ++ spin_lock_irqsave(&tq->urb_lock, flags); ++ tq->stalled = true; ++ spin_unlock_irqrestore(&tq->urb_lock, flags); ++ return BCE_VHCI_SUCCESS; ++ } ++ return BCE_VHCI_BAD_ARGUMENT; ++ } ++ pr_warn("bce-vhci: Unhandled firmware event: %x s=%x p1=%x p2=%llx\n", ++ msg->cmd, msg->status, msg->param1, msg->param2); ++ return BCE_VHCI_BAD_ARGUMENT; ++} ++ ++static void bce_vhci_handle_firmware_events_w(struct work_struct *ws) ++{ ++ size_t cnt = 0; ++ int result; ++ struct bce_vhci *vhci = container_of(ws, struct bce_vhci, w_fw_events); ++ struct bce_queue_sq *sq = vhci->ev_commands.sq; ++ struct bce_sq_completion_data *cq; ++ struct bce_vhci_message *msg, *msg2 = NULL; ++ ++ while (true) { ++ if (msg2) { ++ msg = msg2; ++ msg2 = NULL; ++ } else if ((cq = bce_next_completion(sq))) { ++ if (cq->status == BCE_COMPLETION_ABORTED) { ++ bce_notify_submission_complete(sq); ++ continue; ++ } ++ msg = &vhci->ev_commands.data[sq->head]; ++ } else { ++ break; ++ } ++ ++ pr_debug("bce-vhci: Got fw event: %x s=%x p1=%x p2=%llx\n", msg->cmd, msg->status, msg->param1, msg->param2); ++ if ((cq = bce_next_completion(sq))) { ++ msg2 = &vhci->ev_commands.data[(sq->head + 1) % sq->el_count]; ++ pr_debug("bce-vhci: Got second fw event: %x s=%x p1=%x p2=%llx\n", ++ msg->cmd, msg->status, msg->param1, msg->param2); ++ if (cq->status != BCE_COMPLETION_ABORTED && ++ msg2->cmd == (msg->cmd | 0x4000) && msg2->param1 == msg->param1) { ++ /* Take two elements */ ++ pr_debug("bce-vhci: Cancelled\n"); ++ bce_vhci_send_fw_event_response(vhci, msg, BCE_VHCI_ABORT); ++ ++ bce_notify_submission_complete(sq); ++ bce_notify_submission_complete(sq); ++ msg2 = NULL; ++ cnt += 2; ++ continue; ++ } ++ ++ pr_warn("bce-vhci: Handle fw event - unexpected cancellation\n"); ++ } ++ ++ result = bce_vhci_handle_firmware_event(vhci, msg); ++ bce_vhci_send_fw_event_response(vhci, msg, (u16) result); ++ ++ ++ bce_notify_submission_complete(sq); ++ ++cnt; ++ } ++ bce_vhci_event_queue_submit_pending(&vhci->ev_commands, cnt); ++ if (atomic_read(&sq->available_commands) == sq->el_count - 1) { ++ pr_debug("bce-vhci: complete\n"); ++ complete(&vhci->ev_commands.queue_empty_completion); ++ } ++} ++ ++static void bce_vhci_firmware_event_completion(struct bce_queue_sq *sq) ++{ ++ struct bce_vhci_event_queue *q = sq->userdata; ++ queue_work(q->vhci->tq_state_wq, &q->vhci->w_fw_events); ++} ++ ++static void bce_vhci_handle_system_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg) ++{ ++ if (msg->cmd & 0x8000) { ++ bce_vhci_command_queue_deliver_completion(&q->vhci->cq, msg); ++ } else { ++ pr_warn("bce-vhci: Unhandled system event: %x s=%x p1=%x p2=%llx\n", ++ msg->cmd, msg->status, msg->param1, msg->param2); ++ } ++} ++ ++static void bce_vhci_handle_usb_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg) ++{ ++ bce_vhci_device_t devid; ++ u8 endp; ++ struct bce_vhci_device *dev; ++ if (msg->cmd & 0x8000) { ++ bce_vhci_command_queue_deliver_completion(&q->vhci->cq, msg); ++ } else if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST || msg->cmd == BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS) { ++ devid = (bce_vhci_device_t) (msg->param1 & 0xff); ++ endp = bce_vhci_endpoint_index((u8) ((msg->param1 >> 8) & 0xff)); ++ dev = q->vhci->devices[devid]; ++ if (!dev || (dev->tq_mask & BIT(endp)) == 0) { ++ pr_err("bce-vhci: Didn't find destination for transfer queue event\n"); ++ return; ++ } ++ bce_vhci_transfer_queue_event(&dev->tq[endp], msg); ++ } else { ++ pr_warn("bce-vhci: Unhandled USB event: %x s=%x p1=%x p2=%llx\n", ++ msg->cmd, msg->status, msg->param1, msg->param2); ++ } ++} ++ ++ ++ ++static const struct hc_driver bce_vhci_driver = { ++ .description = "bce-vhci", ++ .product_desc = "BCE VHCI Host Controller", ++ .hcd_priv_size = sizeof(struct bce_vhci *), ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5,4,0) ++ .flags = HCD_USB2, ++#else ++ .flags = HCD_USB2 | HCD_DMA, ++#endif ++ ++ .start = bce_vhci_start, ++ .stop = bce_vhci_stop, ++ .hub_status_data = bce_vhci_hub_status_data, ++ .hub_control = bce_vhci_hub_control, ++ .urb_enqueue = bce_vhci_urb_enqueue, ++ .urb_dequeue = bce_vhci_urb_dequeue, ++ .enable_device = bce_vhci_enable_device, ++ .free_dev = bce_vhci_free_device, ++ .address_device = bce_vhci_address_device, ++ .add_endpoint = bce_vhci_add_endpoint, ++ .drop_endpoint = bce_vhci_drop_endpoint, ++ .endpoint_reset = bce_vhci_endpoint_reset, ++ .check_bandwidth = bce_vhci_check_bandwidth, ++ .get_frame_number = bce_vhci_get_frame_number, ++ .bus_suspend = bce_vhci_bus_suspend, ++ .bus_resume = bce_vhci_bus_resume ++}; ++ ++ ++int __init bce_vhci_module_init(void) ++{ ++ int result; ++ if ((result = alloc_chrdev_region(&bce_vhci_chrdev, 0, 1, "bce-vhci"))) ++ goto fail_chrdev; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6,4,0) ++ bce_vhci_class = class_create(THIS_MODULE, "bce-vhci"); ++#else ++ bce_vhci_class = class_create("bce-vhci"); ++#endif ++ if (IS_ERR(bce_vhci_class)) { ++ result = PTR_ERR(bce_vhci_class); ++ goto fail_class; ++ } ++ return 0; ++ ++fail_class: ++ class_destroy(bce_vhci_class); ++fail_chrdev: ++ unregister_chrdev_region(bce_vhci_chrdev, 1); ++ if (!result) ++ result = -EINVAL; ++ return result; ++} ++void __exit bce_vhci_module_exit(void) ++{ ++ class_destroy(bce_vhci_class); ++ unregister_chrdev_region(bce_vhci_chrdev, 1); ++} ++ ++module_param_named(vhci_port_mask, bce_vhci_port_mask, ushort, 0444); ++MODULE_PARM_DESC(vhci_port_mask, "Specifies which VHCI ports are enabled"); +diff --git a/drivers/staging/apple-bce/vhci/vhci.h b/drivers/staging/apple-bce/vhci/vhci.h +new file mode 100644 +index 000000000..90641d1ba +--- /dev/null ++++ b/drivers/staging/apple-bce/vhci/vhci.h +@@ -0,0 +1,48 @@ ++#ifndef BCE_VHCI_H ++#define BCE_VHCI_H ++ ++#include "queue.h" ++#include "transfer.h" ++ ++struct usb_hcd; ++struct bce_queue_cq; ++ ++struct bce_vhci_device { ++ struct bce_vhci_transfer_queue tq[32]; ++ u32 tq_mask; ++}; ++struct bce_vhci { ++ struct apple_bce_device *dev; ++ dev_t vdevt; ++ struct device *vdev; ++ struct usb_hcd *hcd; ++ struct spinlock hcd_spinlock; ++ struct bce_vhci_message_queue msg_commands; ++ struct bce_vhci_message_queue msg_system; ++ struct bce_vhci_message_queue msg_isochronous; ++ struct bce_vhci_message_queue msg_interrupt; ++ struct bce_vhci_message_queue msg_asynchronous; ++ struct spinlock msg_asynchronous_lock; ++ struct bce_vhci_command_queue cq; ++ struct bce_queue_cq *ev_cq; ++ struct bce_vhci_event_queue ev_commands; ++ struct bce_vhci_event_queue ev_system; ++ struct bce_vhci_event_queue ev_isochronous; ++ struct bce_vhci_event_queue ev_interrupt; ++ struct bce_vhci_event_queue ev_asynchronous; ++ u16 port_mask; ++ u8 port_count; ++ u16 port_power_mask; ++ bce_vhci_device_t port_to_device[16]; ++ struct bce_vhci_device *devices[16]; ++ struct workqueue_struct *tq_state_wq; ++ struct work_struct w_fw_events; ++}; ++ ++int __init bce_vhci_module_init(void); ++void __exit bce_vhci_module_exit(void); ++ ++int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci); ++void bce_vhci_destroy(struct bce_vhci *vhci); ++ ++#endif //BCE_VHCI_H +-- +2.39.2 + +From dc27d4db5787546ae5eacf3483f3b87f2d4fb1c1 Mon Sep 17 00:00:00 2001 +From: Redecorating <69827514+Redecorating@users.noreply.github.com> +Date: Mon, 7 Nov 2022 14:56:34 +0530 +Subject: [PATCH] Put apple-bce in drivers/staging + +--- + drivers/staging/Kconfig | 2 ++ + drivers/staging/Makefile | 1 + + drivers/staging/apple-bce/Kconfig | 18 ++++++++++++++++++ + drivers/staging/apple-bce/Makefile | 2 +- + 4 files changed, 22 insertions(+), 1 deletion(-) + create mode 100644 drivers/staging/apple-bce/Kconfig + +diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig +index 5cfabd537..3b8e61d26 100644 +--- a/drivers/staging/Kconfig ++++ b/drivers/staging/Kconfig +@@ -80,4 +80,6 @@ source "drivers/staging/qlge/Kconfig" + + source "drivers/staging/vme_user/Kconfig" + ++source "drivers/staging/apple-bce/Kconfig" ++ + endif # STAGING +diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile +index f8c3aa9c2..1e148d6c3 100644 +--- a/drivers/staging/Makefile ++++ b/drivers/staging/Makefile +@@ -29,3 +29,4 @@ obj-$(CONFIG_PI433) += pi433/ + obj-$(CONFIG_PI433) += pi433/ + obj-$(CONFIG_XIL_AXIS_FIFO) += axis-fifo/ + obj-$(CONFIG_FIELDBUS_DEV) += fieldbus/ ++obj-$(CONFIG_APPLE_BCE) += apple-bce/ +diff --git a/drivers/staging/apple-bce/Kconfig b/drivers/staging/apple-bce/Kconfig +new file mode 100644 +index 000000000..fe92bc441 +--- /dev/null ++++ b/drivers/staging/apple-bce/Kconfig +@@ -0,0 +1,18 @@ ++config APPLE_BCE ++ tristate "Apple BCE driver (VHCI and Audio support)" ++ default m ++ depends on X86 ++ select SOUND ++ select SND ++ select SND_PCM ++ select SND_JACK ++ help ++ VHCI and audio support on Apple MacBooks with the T2 Chip. ++ This driver is divided in three components: ++ - BCE (Buffer Copy Engine): which establishes a basic communication ++ channel with the T2 chip. This component is required by the other two: ++ - VHCI (Virtual Host Controller Interface): Access to keyboard, mouse ++ and other system devices depend on this virtual USB host controller ++ - Audio: a driver for the T2 audio interface. ++ ++ If "M" is selected, the module will be called apple-bce.' +diff --git a/drivers/staging/apple-bce/Makefile b/drivers/staging/apple-bce/Makefile +index a6a656f06..8cfbd3f64 100644 +--- a/drivers/staging/apple-bce/Makefile ++++ b/drivers/staging/apple-bce/Makefile +@@ -1,5 +1,5 @@ + modname := apple-bce +-obj-m += $(modname).o ++obj-$(CONFIG_APPLE_BCE) += $(modname).o + + apple-bce-objs := apple_bce.o mailbox.o queue.o queue_dma.o vhci/vhci.o vhci/queue.o vhci/transfer.o audio/audio.o audio/protocol.o audio/protocol_bce.o audio/pcm.o + +-- +2.34.1 +From 153b587ed53135eaf244144f6f8bdd5a0fe6b69e Mon Sep 17 00:00:00 2001 +From: Redecorating <69827514+Redecorating@users.noreply.github.com> +Date: Fri, 24 Dec 2021 18:12:25 +1100 +Subject: [PATCH 1/1] add modalias to apple-bce + +--- + drivers/staging/apple-bce/apple_bce.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/staging/apple-bce/apple_bce.c b/drivers/staging/apple-bce/apple_bce.c +index ad89632df..5e2f2f3b9 100644 +--- a/drivers/staging/apple-bce/apple_bce.c ++++ b/drivers/staging/apple-bce/apple_bce.c +@@ -439,5 +439,6 @@ MODULE_LICENSE("GPL"); + MODULE_AUTHOR("MrARM"); + MODULE_DESCRIPTION("Apple BCE Driver"); + MODULE_VERSION("0.01"); ++MODULE_ALIAS("pci:v0000106Bd00001801sv*sd*bc*sc*i*"); + module_init(apple_bce_module_init); + module_exit(apple_bce_module_exit); +-- +2.43.0 + +From 75ca57b64ce6846622d8aefac5a76fc638a2123d Mon Sep 17 00:00:00 2001 +From: Kerem Karabay +Date: Sun, 5 Mar 2023 19:12:53 +0300 +Subject: [PATCH 01/12] HID: core: add helper for finding a field with a + certain usage + +This helper will allow HID drivers to easily determine if they should +bind to a hid_device by checking for the prescence of a certain field +when its ID is not enough, which can be the case on USB devices with +multiple interfaces and/or configurations. + +Signed-off-by: Kerem Karabay +--- + drivers/hid/hid-core.c | 25 +++++++++++++++++++++++++ + drivers/hid/hid-google-hammer.c | 27 ++------------------------- + include/linux/hid.h | 2 ++ + 3 files changed, 29 insertions(+), 25 deletions(-) + +diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c +index 8992e3c1e..6395bdc2e 100644 +--- a/drivers/hid/hid-core.c ++++ b/drivers/hid/hid-core.c +@@ -1906,6 +1906,31 @@ int hid_set_field(struct hid_field *field, unsigned offset, __s32 value) + } + EXPORT_SYMBOL_GPL(hid_set_field); + ++struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_type, ++ unsigned int application, unsigned int usage) ++{ ++ struct list_head *report_list = &hdev->report_enum[report_type].report_list; ++ struct hid_report *report; ++ int i, j; ++ ++ list_for_each_entry(report, report_list, list) { ++ if (report->application != application) ++ continue; ++ ++ for (i = 0; i < report->maxfield; i++) { ++ struct hid_field *field = report->field[i]; ++ ++ for (j = 0; j < field->maxusage; j++) { ++ if (field->usage[j].hid == usage) ++ return field; ++ } ++ } ++ } ++ ++ return NULL; ++} ++EXPORT_SYMBOL_GPL(hid_find_field); ++ + static struct hid_report *hid_get_report(struct hid_report_enum *report_enum, + const u8 *data) + { +diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c +index c6bdb9c4e..fba3652aa 100644 +--- a/drivers/hid/hid-google-hammer.c ++++ b/drivers/hid/hid-google-hammer.c +@@ -419,38 +419,15 @@ static int hammer_event(struct hid_device *hid, struct hid_field *field, + return 0; + } + +-static bool hammer_has_usage(struct hid_device *hdev, unsigned int report_type, +- unsigned application, unsigned usage) +-{ +- struct hid_report_enum *re = &hdev->report_enum[report_type]; +- struct hid_report *report; +- int i, j; +- +- list_for_each_entry(report, &re->report_list, list) { +- if (report->application != application) +- continue; +- +- for (i = 0; i < report->maxfield; i++) { +- struct hid_field *field = report->field[i]; +- +- for (j = 0; j < field->maxusage; j++) +- if (field->usage[j].hid == usage) +- return true; +- } +- } +- +- return false; +-} +- + static bool hammer_has_folded_event(struct hid_device *hdev) + { +- return hammer_has_usage(hdev, HID_INPUT_REPORT, ++ return !!hid_find_field(hdev, HID_INPUT_REPORT, + HID_GD_KEYBOARD, HID_USAGE_KBD_FOLDED); + } + + static bool hammer_has_backlight_control(struct hid_device *hdev) + { +- return hammer_has_usage(hdev, HID_OUTPUT_REPORT, ++ return !!hid_find_field(hdev, HID_OUTPUT_REPORT, + HID_GD_KEYBOARD, HID_AD_BRIGHTNESS); + } + +diff --git a/include/linux/hid.h b/include/linux/hid.h +index 39e21e381..9520fdfdd 100644 +--- a/include/linux/hid.h ++++ b/include/linux/hid.h +@@ -913,6 +913,8 @@ extern void hidinput_report_event(struct hid_device *hid, struct hid_report *rep + extern int hidinput_connect(struct hid_device *hid, unsigned int force); + extern void hidinput_disconnect(struct hid_device *); + ++struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_type, ++ unsigned int application, unsigned int usage); + int hid_set_field(struct hid_field *, unsigned, __s32); + int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, + int interrupt); +-- +2.42.0 + +From 05cd738ce1c0e1a930a1dab02528fd9f1c702c38 Mon Sep 17 00:00:00 2001 +From: Kerem Karabay +Date: Sun, 5 Mar 2023 18:52:43 +0300 +Subject: [PATCH 02/12] HID: hid-appletb-bl: add driver for the backlight of + Apple Touch Bars +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This commit adds a driver for the backlight of Apple Touch Bars on x86 +Macs. Note that currently only T2 Macs are supported. + +This driver is based on previous work done by Ronald Tschalär +. + +Signed-off-by: Kerem Karabay +--- + MAINTAINERS | 6 ++ + drivers/hid/Kconfig | 10 ++ + drivers/hid/Makefile | 1 + + drivers/hid/hid-appletb-bl.c | 193 +++++++++++++++++++++++++++++++++++ + drivers/hid/hid-quirks.c | 4 +- + 5 files changed, 213 insertions(+), 1 deletion(-) + create mode 100644 drivers/hid/hid-appletb-bl.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index 4cc6bf79f..519b3b736 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -9157,6 +9157,12 @@ F: include/linux/pm.h + F: include/linux/suspend.h + F: kernel/power/ + ++HID APPLE TOUCH BAR DRIVERS ++M: Kerem Karabay ++L: linux-input@vger.kernel.org ++S: Maintained ++F: drivers/hid/hid-appletb-* ++ + HID CORE LAYER + M: Jiri Kosina + M: Benjamin Tissoires +diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig +index e11c1c803..cf19a3b33 100644 +--- a/drivers/hid/Kconfig ++++ b/drivers/hid/Kconfig +@@ -148,6 +148,16 @@ config HID_APPLEIR + + Say Y here if you want support for Apple infrared remote control. + ++config HID_APPLETB_BL ++ tristate "Apple Touch Bar Backlight" ++ depends on BACKLIGHT_CLASS_DEVICE ++ help ++ Say Y here if you want support for the backlight of Touch Bars on x86 ++ MacBook Pros. ++ ++ To compile this driver as a module, choose M here: the ++ module will be called hid-appletb-bl. ++ + config HID_ASUS + tristate "Asus" + depends on USB_HID +diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile +index 7a9e16015..bc86e38b2 100644 +--- a/drivers/hid/Makefile ++++ b/drivers/hid/Makefile +@@ -29,6 +29,7 @@ obj-$(CONFIG_HID_ALPS) += hid-alps.o + obj-$(CONFIG_HID_ACRUX) += hid-axff.o + obj-$(CONFIG_HID_APPLE) += hid-apple.o + obj-$(CONFIG_HID_APPLEIR) += hid-appleir.o ++obj-$(CONFIG_HID_APPLETB_BL) += hid-appletb-bl.o + obj-$(CONFIG_HID_CREATIVE_SB0540) += hid-creative-sb0540.o + obj-$(CONFIG_HID_ASUS) += hid-asus.o + obj-$(CONFIG_HID_AUREAL) += hid-aureal.o +diff --git a/drivers/hid/hid-appletb-bl.c b/drivers/hid/hid-appletb-bl.c +new file mode 100644 +index 000000000..0c5e4b776 +--- /dev/null ++++ b/drivers/hid/hid-appletb-bl.c +@@ -0,0 +1,193 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Apple Touch Bar Backlight Driver ++ * ++ * Copyright (c) 2017-2018 Ronald Tschalär ++ * Copyright (c) 2022-2023 Kerem Karabay ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++ ++#include "hid-ids.h" ++ ++#define APPLETB_BL_ON 1 ++#define APPLETB_BL_DIM 3 ++#define APPLETB_BL_OFF 4 ++ ++#define HID_UP_APPLEVENDOR_TB_BL 0xff120000 ++ ++#define HID_VD_APPLE_TB_BRIGHTNESS 0xff120001 ++#define HID_USAGE_AUX1 0xff120020 ++#define HID_USAGE_BRIGHTNESS 0xff120021 ++ ++struct appletb_bl { ++ struct hid_field *aux1_field, *brightness_field; ++ struct backlight_device *bdev; ++ ++ bool full_on; ++}; ++ ++const u8 appletb_bl_brightness_map[] = { ++ APPLETB_BL_OFF, ++ APPLETB_BL_DIM, ++ APPLETB_BL_ON ++}; ++ ++static int appletb_bl_set_brightness(struct appletb_bl *bl, u8 brightness) ++{ ++ struct hid_report *report = bl->brightness_field->report; ++ struct hid_device *hdev = report->device; ++ int ret; ++ ++ ret = hid_set_field(bl->aux1_field, 0, 1); ++ if (ret) { ++ hid_err(hdev, "Failed to set auxiliary field (%pe)\n", ERR_PTR(ret)); ++ return ret; ++ } ++ ++ ret = hid_set_field(bl->brightness_field, 0, brightness); ++ if (ret) { ++ hid_err(hdev, "Failed to set brightness field (%pe)\n", ERR_PTR(ret)); ++ return ret; ++ } ++ ++ if (!bl->full_on) { ++ ret = hid_hw_power(hdev, PM_HINT_FULLON); ++ if (ret < 0) { ++ hid_err(hdev, "Device didn't power on (%pe)\n", ERR_PTR(ret)); ++ return ret; ++ } ++ ++ bl->full_on = true; ++ } ++ ++ hid_hw_request(hdev, report, HID_REQ_SET_REPORT); ++ ++ if (brightness == APPLETB_BL_OFF) { ++ hid_hw_power(hdev, PM_HINT_NORMAL); ++ bl->full_on = false; ++ } ++ ++ return 0; ++} ++ ++static int appletb_bl_update_status(struct backlight_device *bdev) ++{ ++ struct appletb_bl *bl = bl_get_data(bdev); ++ u16 brightness; ++ ++ if (bdev->props.state & BL_CORE_SUSPENDED) ++ brightness = 0; ++ else ++ brightness = backlight_get_brightness(bdev); ++ ++ return appletb_bl_set_brightness(bl, appletb_bl_brightness_map[brightness]); ++} ++ ++static const struct backlight_ops appletb_bl_backlight_ops = { ++ .options = BL_CORE_SUSPENDRESUME, ++ .update_status = appletb_bl_update_status, ++}; ++ ++static int appletb_bl_probe(struct hid_device *hdev, const struct hid_device_id *id) ++{ ++ struct hid_field *aux1_field, *brightness_field; ++ struct backlight_properties bl_props = { 0 }; ++ struct device *dev = &hdev->dev; ++ struct appletb_bl *bl; ++ int ret; ++ ++ ret = hid_parse(hdev); ++ if (ret) ++ return dev_err_probe(dev, ret, "HID parse failed\n"); ++ ++ aux1_field = hid_find_field(hdev, HID_FEATURE_REPORT, ++ HID_VD_APPLE_TB_BRIGHTNESS, HID_USAGE_AUX1); ++ ++ brightness_field = hid_find_field(hdev, HID_FEATURE_REPORT, ++ HID_VD_APPLE_TB_BRIGHTNESS, HID_USAGE_BRIGHTNESS); ++ ++ if (!aux1_field || !brightness_field) ++ return -ENODEV; ++ ++ if (aux1_field->report != brightness_field->report) ++ return dev_err_probe(dev, -ENODEV, "Encountered unexpected report structure\n"); ++ ++ bl = devm_kzalloc(dev, sizeof(*bl), GFP_KERNEL); ++ if (!bl) ++ return -ENOMEM; ++ ++ ret = hid_hw_start(hdev, HID_CONNECT_DRIVER); ++ if (ret) ++ return dev_err_probe(dev, ret, "HID hardware start failed\n"); ++ ++ ret = hid_hw_open(hdev); ++ if (ret) { ++ dev_err_probe(dev, ret, "HID hardware open failed\n"); ++ goto stop_hw; ++ } ++ ++ bl->aux1_field = aux1_field; ++ bl->brightness_field = brightness_field; ++ ++ ret = appletb_bl_set_brightness(bl, APPLETB_BL_OFF); ++ if (ret) { ++ dev_err_probe(dev, ret, "Failed to set touch bar brightness to off\n"); ++ goto close_hw; ++ } ++ ++ bl_props.type = BACKLIGHT_RAW; ++ bl_props.max_brightness = ARRAY_SIZE(appletb_bl_brightness_map) - 1; ++ ++ bl->bdev = devm_backlight_device_register(dev, "appletb_backlight", dev, bl, ++ &appletb_bl_backlight_ops, &bl_props); ++ if (IS_ERR(bl->bdev)) { ++ ret = PTR_ERR(bl->bdev); ++ dev_err_probe(dev, ret, "Failed to register backlight device\n"); ++ goto close_hw; ++ } ++ ++ hid_set_drvdata(hdev, bl); ++ ++ return 0; ++ ++close_hw: ++ hid_hw_close(hdev); ++stop_hw: ++ hid_hw_stop(hdev); ++ ++ return ret; ++} ++ ++static void appletb_bl_remove(struct hid_device *hdev) ++{ ++ struct appletb_bl *bl = hid_get_drvdata(hdev); ++ ++ appletb_bl_set_brightness(bl, APPLETB_BL_OFF); ++ ++ hid_hw_close(hdev); ++ hid_hw_stop(hdev); ++} ++ ++static const struct hid_device_id appletb_bl_hid_ids[] = { ++ /* MacBook Pro's 2018, 2019, with T2 chip: iBridge DFR Brightness */ ++ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) }, ++ { } ++}; ++MODULE_DEVICE_TABLE(hid, appletb_bl_hid_ids); ++ ++static struct hid_driver appletb_bl_hid_driver = { ++ .name = "hid-appletb-bl", ++ .id_table = appletb_bl_hid_ids, ++ .probe = appletb_bl_probe, ++ .remove = appletb_bl_remove, ++}; ++module_hid_driver(appletb_bl_hid_driver); ++ ++MODULE_AUTHOR("Ronald Tschalär"); ++MODULE_AUTHOR("Kerem Karabay "); ++MODULE_DESCRIPTION("MacBookPro Touch Bar Backlight Driver"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c +index 3983b4f28..82e7a80c9 100644 +--- a/drivers/hid/hid-quirks.c ++++ b/drivers/hid/hid-quirks.c +@@ -325,7 +325,6 @@ static const struct hid_device_id hid_have_special_driver[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021) }, +- { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) }, + #endif + #if IS_ENABLED(CONFIG_HID_APPLEIR) +@@ -335,6 +334,9 @@ static const struct hid_device_id hid_have_special_driver[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) }, + #endif ++#if IS_ENABLED(CONFIG_HID_APPLETB_BL) ++ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) }, ++#endif + #if IS_ENABLED(CONFIG_HID_ASUS) + { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD) }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD) }, +-- +2.42.0 + +From 514b4f088b7ed916c634ca6f61de72c5f86268dd Mon Sep 17 00:00:00 2001 +From: Kerem Karabay +Date: Sun, 5 Mar 2023 18:17:23 +0300 +Subject: [PATCH 03/12] HID: hid-appletb-kbd: add driver for the keyboard mode + of Apple Touch Bars +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The Touch Bars found on x86 Macs support two USB configurations: one +where the device presents itself as a HID keyboard and can display +predefined sets of keys, and one where the operating system has full +control over what is displayed. This commit adds a driver for the +display functionality of the first configuration. + +Note that currently only T2 Macs are supported. + +This driver is based on previous work done by Ronald Tschalär +. + +Signed-off-by: Kerem Karabay +--- + .../ABI/testing/sysfs-driver-hid-appletb-kbd | 13 + + drivers/hid/Kconfig | 11 + + drivers/hid/Makefile | 1 + + drivers/hid/hid-appletb-kbd.c | 289 ++++++++++++++++++ + drivers/hid/hid-quirks.c | 4 +- + 5 files changed, 317 insertions(+), 1 deletion(-) + create mode 100644 Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd + create mode 100644 drivers/hid/hid-appletb-kbd.c + +diff --git a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd +new file mode 100644 +index 000000000..2a19584d0 +--- /dev/null ++++ b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd +@@ -0,0 +1,13 @@ ++What: /sys/bus/hid/drivers/hid-appletb-kbd//mode ++Date: September, 2023 ++KernelVersion: 6.5 ++Contact: linux-input@vger.kernel.org ++Description: ++ The set of keys displayed on the Touch Bar. ++ Valid values are: ++ == ================= ++ 0 Escape key only ++ 1 Function keys ++ 2 Media/brightness keys ++ 3 None ++ == ================= +diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig +index cf19a3b33..852de13aa 100644 +--- a/drivers/hid/Kconfig ++++ b/drivers/hid/Kconfig +@@ -158,6 +158,17 @@ config HID_APPLETB_BL + To compile this driver as a module, choose M here: the + module will be called hid-appletb-bl. + ++config HID_APPLETB_KBD ++ tristate "Apple Touch Bar Keyboard Mode" ++ depends on USB_HID ++ help ++ Say Y here if you want support for the keyboard mode (escape, ++ function, media and brightness keys) of Touch Bars on x86 MacBook ++ Pros. ++ ++ To compile this driver as a module, choose M here: the ++ module will be called hid-appletb-kbd. ++ + config HID_ASUS + tristate "Asus" + depends on USB_HID +diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile +index bc86e38b2..5b60015fd 100644 +--- a/drivers/hid/Makefile ++++ b/drivers/hid/Makefile +@@ -30,6 +30,7 @@ obj-$(CONFIG_HID_ACRUX) += hid-axff.o + obj-$(CONFIG_HID_APPLE) += hid-apple.o + obj-$(CONFIG_HID_APPLEIR) += hid-appleir.o + obj-$(CONFIG_HID_APPLETB_BL) += hid-appletb-bl.o ++obj-$(CONFIG_HID_APPLETB_KBD) += hid-appletb-kbd.o + obj-$(CONFIG_HID_CREATIVE_SB0540) += hid-creative-sb0540.o + obj-$(CONFIG_HID_ASUS) += hid-asus.o + obj-$(CONFIG_HID_AUREAL) += hid-aureal.o +diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c +new file mode 100644 +index 000000000..bc004c408 +--- /dev/null ++++ b/drivers/hid/hid-appletb-kbd.c +@@ -0,0 +1,289 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Apple Touch Bar Keyboard Mode Driver ++ * ++ * Copyright (c) 2017-2018 Ronald Tschalär ++ * Copyright (c) 2022-2023 Kerem Karabay ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "hid-ids.h" ++ ++#define APPLETB_KBD_MODE_ESC 0 ++#define APPLETB_KBD_MODE_FN 1 ++#define APPLETB_KBD_MODE_SPCL 2 ++#define APPLETB_KBD_MODE_OFF 3 ++#define APPLETB_KBD_MODE_MAX APPLETB_KBD_MODE_OFF ++ ++#define HID_USAGE_MODE 0x00ff0004 ++ ++struct appletb_kbd { ++ struct hid_field *mode_field; ++ ++ u8 saved_mode; ++ u8 current_mode; ++}; ++ ++static const struct key_entry appletb_kbd_keymap[] = { ++ { KE_KEY, KEY_ESC, { KEY_ESC } }, ++ { KE_KEY, KEY_F1, { KEY_BRIGHTNESSDOWN } }, ++ { KE_KEY, KEY_F2, { KEY_BRIGHTNESSUP } }, ++ { KE_KEY, KEY_F3, { KEY_RESERVED } }, ++ { KE_KEY, KEY_F4, { KEY_RESERVED } }, ++ { KE_KEY, KEY_F5, { KEY_KBDILLUMDOWN } }, ++ { KE_KEY, KEY_F6, { KEY_KBDILLUMUP } }, ++ { KE_KEY, KEY_F7, { KEY_PREVIOUSSONG } }, ++ { KE_KEY, KEY_F8, { KEY_PLAYPAUSE } }, ++ { KE_KEY, KEY_F9, { KEY_NEXTSONG } }, ++ { KE_KEY, KEY_F10, { KEY_MUTE } }, ++ { KE_KEY, KEY_F11, { KEY_VOLUMEDOWN } }, ++ { KE_KEY, KEY_F12, { KEY_VOLUMEUP } }, ++ { KE_END, 0 } ++}; ++ ++static int appletb_kbd_set_mode(struct appletb_kbd *kbd, u8 mode) ++{ ++ struct hid_report *report = kbd->mode_field->report; ++ struct hid_device *hdev = report->device; ++ int ret; ++ ++ ret = hid_hw_power(hdev, PM_HINT_FULLON); ++ if (ret) { ++ hid_err(hdev, "Device didn't resume (%pe)\n", ERR_PTR(ret)); ++ return ret; ++ } ++ ++ ret = hid_set_field(kbd->mode_field, 0, mode); ++ if (ret) { ++ hid_err(hdev, "Failed to set mode field to %u (%pe)\n", mode, ERR_PTR(ret)); ++ goto power_normal; ++ } ++ ++ hid_hw_request(hdev, report, HID_REQ_SET_REPORT); ++ ++ kbd->current_mode = mode; ++ ++power_normal: ++ hid_hw_power(hdev, PM_HINT_NORMAL); ++ ++ return ret; ++} ++ ++static ssize_t mode_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct appletb_kbd *kbd = dev_get_drvdata(dev); ++ ++ return sysfs_emit(buf, "%d\n", kbd->current_mode); ++} ++ ++static ssize_t mode_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t size) ++{ ++ struct appletb_kbd *kbd = dev_get_drvdata(dev); ++ u8 mode; ++ int ret; ++ ++ ret = kstrtou8(buf, 0, &mode); ++ if (ret) ++ return ret; ++ ++ if (mode > APPLETB_KBD_MODE_MAX) ++ return -EINVAL; ++ ++ ret = appletb_kbd_set_mode(kbd, mode); ++ ++ return ret < 0 ? ret : size; ++} ++static DEVICE_ATTR_RW(mode); ++ ++struct attribute *appletb_kbd_attrs[] = { ++ &dev_attr_mode.attr, ++ NULL ++}; ++ATTRIBUTE_GROUPS(appletb_kbd); ++ ++static int appletb_tb_key_to_slot(unsigned int code) ++{ ++ switch (code) { ++ case KEY_ESC: ++ return 0; ++ case KEY_F1 ... KEY_F10: ++ return code - KEY_F1 + 1; ++ case KEY_F11 ... KEY_F12: ++ return code - KEY_F11 + 11; ++ ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int appletb_kbd_hid_event(struct hid_device *hdev, struct hid_field *field, ++ struct hid_usage *usage, __s32 value) ++{ ++ struct appletb_kbd *kbd = hid_get_drvdata(hdev); ++ struct key_entry *translation; ++ struct input_dev *input; ++ int slot; ++ ++ if ((usage->hid & HID_USAGE_PAGE) != HID_UP_KEYBOARD || usage->type != EV_KEY) ++ return 0; ++ ++ input = field->hidinput->input; ++ ++ /* ++ * Skip non-touch-bar keys. ++ * ++ * Either the touch bar itself or usbhid generate a slew of key-down ++ * events for all the meta keys. None of which we're at all interested ++ * in. ++ */ ++ slot = appletb_tb_key_to_slot(usage->code); ++ if (slot < 0) ++ return 0; ++ ++ translation = sparse_keymap_entry_from_scancode(input, usage->code); ++ ++ if (translation && kbd->current_mode == APPLETB_KBD_MODE_SPCL) { ++ input_event(input, usage->type, translation->keycode, value); ++ ++ return 1; ++ } ++ ++ return kbd->current_mode == APPLETB_KBD_MODE_OFF; ++} ++ ++static int appletb_kbd_input_configured(struct hid_device *hdev, struct hid_input *hidinput) ++{ ++ struct input_dev *input = hidinput->input; ++ ++ /* ++ * Clear various input capabilities that are blindly set by the hid ++ * driver (usbkbd.c) ++ */ ++ memset(input->evbit, 0, sizeof(input->evbit)); ++ memset(input->keybit, 0, sizeof(input->keybit)); ++ memset(input->ledbit, 0, sizeof(input->ledbit)); ++ ++ __set_bit(EV_REP, input->evbit); ++ ++ return sparse_keymap_setup(input, appletb_kbd_keymap, NULL); ++} ++ ++static int appletb_kbd_probe(struct hid_device *hdev, const struct hid_device_id *id) ++{ ++ struct appletb_kbd *kbd; ++ struct device *dev = &hdev->dev; ++ struct hid_field *mode_field; ++ int ret; ++ ++ ret = hid_parse(hdev); ++ if (ret) ++ return dev_err_probe(dev, ret, "HID parse failed\n"); ++ ++ mode_field = hid_find_field(hdev, HID_OUTPUT_REPORT, ++ HID_GD_KEYBOARD, HID_USAGE_MODE); ++ if (!mode_field) ++ return -ENODEV; ++ ++ kbd = devm_kzalloc(dev, sizeof(*kbd), GFP_KERNEL); ++ if (!kbd) ++ return -ENOMEM; ++ ++ kbd->mode_field = mode_field; ++ ++ ret = hid_hw_start(hdev, HID_CONNECT_HIDINPUT); ++ if (ret) ++ return dev_err_probe(dev, ret, "HID hw start failed\n"); ++ ++ ret = hid_hw_open(hdev); ++ if (ret) { ++ dev_err_probe(dev, ret, "HID hw open failed\n"); ++ goto stop_hw; ++ } ++ ++ ret = appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF); ++ if (ret) { ++ dev_err_probe(dev, ret, "Failed to set touchbar mode\n"); ++ goto close_hw; ++ } ++ ++ hid_set_drvdata(hdev, kbd); ++ ++ return 0; ++ ++close_hw: ++ hid_hw_close(hdev); ++stop_hw: ++ hid_hw_stop(hdev); ++ return ret; ++} ++ ++static void appletb_kbd_remove(struct hid_device *hdev) ++{ ++ struct appletb_kbd *kbd = hid_get_drvdata(hdev); ++ ++ appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF); ++ ++ hid_hw_close(hdev); ++ hid_hw_stop(hdev); ++} ++ ++#ifdef CONFIG_PM ++static int appletb_kbd_suspend(struct hid_device *hdev, pm_message_t msg) ++{ ++ struct appletb_kbd *kbd = hid_get_drvdata(hdev); ++ ++ kbd->saved_mode = kbd->current_mode; ++ appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF); ++ ++ return 0; ++} ++ ++static int appletb_kbd_reset_resume(struct hid_device *hdev) ++{ ++ struct appletb_kbd *kbd = hid_get_drvdata(hdev); ++ ++ appletb_kbd_set_mode(kbd, kbd->saved_mode); ++ ++ return 0; ++} ++#endif ++ ++static const struct hid_device_id appletb_kbd_hid_ids[] = { ++ /* MacBook Pro's 2018, 2019, with T2 chip: iBridge Display */ ++ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) }, ++ { } ++}; ++MODULE_DEVICE_TABLE(hid, appletb_kbd_hid_ids); ++ ++static struct hid_driver appletb_kbd_hid_driver = { ++ .name = "hid-appletb-kbd", ++ .id_table = appletb_kbd_hid_ids, ++ .probe = appletb_kbd_probe, ++ .remove = appletb_kbd_remove, ++ .event = appletb_kbd_hid_event, ++ .input_configured = appletb_kbd_input_configured, ++#ifdef CONFIG_PM ++ .suspend = appletb_kbd_suspend, ++ .reset_resume = appletb_kbd_reset_resume, ++#endif ++ .driver.dev_groups = appletb_kbd_groups, ++}; ++module_hid_driver(appletb_kbd_hid_driver); ++ ++MODULE_AUTHOR("Ronald Tschalär"); ++MODULE_AUTHOR("Kerem Karabay "); ++MODULE_DESCRIPTION("MacBookPro Touch Bar Keyboard Mode Driver"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c +index 82e7a80c9..82be9dfaf 100644 +--- a/drivers/hid/hid-quirks.c ++++ b/drivers/hid/hid-quirks.c +@@ -325,7 +325,6 @@ static const struct hid_device_id hid_have_special_driver[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021) }, +- { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) }, + #endif + #if IS_ENABLED(CONFIG_HID_APPLEIR) + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) }, +@@ -337,6 +336,9 @@ static const struct hid_device_id hid_have_special_driver[] = { + #if IS_ENABLED(CONFIG_HID_APPLETB_BL) + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) }, + #endif ++#if IS_ENABLED(CONFIG_HID_APPLETB_KBD) ++ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) }, ++#endif + #if IS_ENABLED(CONFIG_HID_ASUS) + { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD) }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD) }, +-- +2.42.0 + +From 2f9be28549307b4ac51e8d66bf3b8d5e0621466d Mon Sep 17 00:00:00 2001 +From: Kerem Karabay +Date: Wed, 19 Jul 2023 19:37:14 +0300 +Subject: [PATCH 04/12] HID: multitouch: support getting the contact ID from + HID_DG_TRANSDUCER_INDEX fields + +This is needed to support Apple Touch Bars, where the contact ID is +contained in fields with the HID_DG_TRANSDUCER_INDEX usage. + +Signed-off-by: Kerem Karabay +--- + drivers/hid/hid-multitouch.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c +index e31be0cb8..902a59928 100644 +--- a/drivers/hid/hid-multitouch.c ++++ b/drivers/hid/hid-multitouch.c +@@ -636,7 +636,9 @@ static struct mt_report_data *mt_allocate_report_data(struct mt_device *td, + + if (field->logical == HID_DG_FINGER || td->hdev->group != HID_GROUP_MULTITOUCH_WIN_8) { + for (n = 0; n < field->report_count; n++) { +- if (field->usage[n].hid == HID_DG_CONTACTID) { ++ unsigned int hid = field->usage[n].hid; ++ ++ if (hid == HID_DG_CONTACTID || hid == HID_DG_TRANSDUCER_INDEX) { + rdata->is_mt_collection = true; + break; + } +@@ -815,6 +817,7 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, + MT_STORE_FIELD(tip_state); + return 1; + case HID_DG_CONTACTID: ++ case HID_DG_TRANSDUCER_INDEX: + MT_STORE_FIELD(contactid); + app->touches_by_report++; + return 1; +-- +2.42.0 + +From 6162d328fe7b2cf5a3ee8c29bdb229e9528c7a6c Mon Sep 17 00:00:00 2001 +From: Kerem Karabay +Date: Wed, 19 Jul 2023 19:44:10 +0300 +Subject: [PATCH 05/12] HID: multitouch: support getting the tip state from + HID_DG_TOUCH fields + +This is necessary on Apple Touch Bars, where the tip state is contained +in fields with the HID_DG_TOUCH usage. This feature is gated by a quirk +in order to prevent breaking other devices, see commit c2ef8f21ea8f +("HID: multitouch: add support for trackpads"). + +Signed-off-by: Kerem Karabay +--- + drivers/hid/hid-multitouch.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c +index 902a59928..dd5509eeb 100644 +--- a/drivers/hid/hid-multitouch.c ++++ b/drivers/hid/hid-multitouch.c +@@ -78,6 +78,8 @@ MODULE_LICENSE("GPL"); + #define MT_QUIRK_ORIENTATION_INVERT BIT(22) + #define MT_QUIRK_HAS_TYPE_COVER_BACKLIGHT BIT(23) + #define MT_QUIRK_HAS_TYPE_COVER_TABLET_MODE_SWITCH BIT(24) ++#define MT_QUIRK_TOUCH_IS_TIPSTATE BIT(25) ++ + + #define MT_INPUTMODE_TOUCHSCREEN 0x02 + #define MT_INPUTMODE_TOUCHPAD 0x03 +@@ -810,6 +811,15 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, + + MT_STORE_FIELD(confidence_state); + return 1; ++ case HID_DG_TOUCH: ++ /* ++ * Legacy devices use TIPSWITCH and not TOUCH. ++ * Let's just ignore this field unless the quirk is set. ++ */ ++ if (!(cls->quirks & MT_QUIRK_TOUCH_IS_TIPSTATE)) ++ return -1; ++ ++ fallthrough; + case HID_DG_TIPSWITCH: + if (field->application != HID_GD_SYSTEM_MULTIAXIS) + input_set_capability(hi->input, +@@ -873,10 +883,6 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, + case HID_DG_CONTACTMAX: + /* contact max are global to the report */ + return -1; +- case HID_DG_TOUCH: +- /* Legacy devices use TIPSWITCH and not TOUCH. +- * Let's just ignore this field. */ +- return -1; + } + /* let hid-input decide for the others */ + return 0; +-- +2.42.0 + +From e923c6e1a5a508e341851ae020cdb3e7333ccd18 Mon Sep 17 00:00:00 2001 +From: Kerem Karabay +Date: Wed, 19 Jul 2023 19:26:57 +0300 +Subject: [PATCH 06/12] HID: multitouch: take cls->maxcontacts into account for + devices without a HID_DG_CONTACTMAX field too + +This is needed for Apple Touch Bars, where no HID_DG_CONTACTMAX field is +present and the maximum contact count is greater than the default. + +Signed-off-by: Kerem Karabay +--- + drivers/hid/hid-multitouch.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c +index dd5509eeb..624c1d3cc 100644 +--- a/drivers/hid/hid-multitouch.c ++++ b/drivers/hid/hid-multitouch.c +@@ -491,9 +491,6 @@ static void mt_feature_mapping(struct hid_device *hdev, + if (!td->maxcontacts && + field->logical_maximum <= MT_MAX_MAXCONTACT) + td->maxcontacts = field->logical_maximum; +- if (td->mtclass.maxcontacts) +- /* check if the maxcontacts is given by the class */ +- td->maxcontacts = td->mtclass.maxcontacts; + + break; + case HID_DG_BUTTONTYPE: +@@ -1310,6 +1307,10 @@ static int mt_touch_input_configured(struct hid_device *hdev, + struct input_dev *input = hi->input; + int ret; + ++ /* check if the maxcontacts is given by the class */ ++ if (cls->maxcontacts) ++ td->maxcontacts = cls->maxcontacts; ++ + if (!td->maxcontacts) + td->maxcontacts = MT_DEFAULT_MAXCONTACT; + +-- +2.42.0 + +From b9f7232d2696b91ae98fadd7b14c531aa8edceb5 Mon Sep 17 00:00:00 2001 +From: Kerem Karabay +Date: Wed, 19 Jul 2023 19:39:53 +0300 +Subject: [PATCH 07/12] HID: multitouch: allow specifying if a device is direct + in a class + +Currently the driver determines the device type based on the +application, but this value is not reliable on Apple Touch Bars, where +the application is HID_DG_TOUCHPAD even though the devices are direct, +so allow setting it in classes. + +Signed-off-by: Kerem Karabay +--- + drivers/hid/hid-multitouch.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c +index 624c1d3cc..f98fb36ff 100644 +--- a/drivers/hid/hid-multitouch.c ++++ b/drivers/hid/hid-multitouch.c +@@ -147,6 +147,7 @@ struct mt_class { + __s32 sn_height; /* Signal/noise ratio for height events */ + __s32 sn_pressure; /* Signal/noise ratio for pressure events */ + __u8 maxcontacts; ++ bool is_direct; /* true for touchscreens */ + bool is_indirect; /* true for touchpads */ + bool export_all_inputs; /* do not ignore mouse, keyboards, etc... */ + }; +@@ -564,13 +565,13 @@ static struct mt_application *mt_allocate_application(struct mt_device *td, + mt_application->application = application; + INIT_LIST_HEAD(&mt_application->mt_usages); + +- if (application == HID_DG_TOUCHSCREEN) ++ if (application == HID_DG_TOUCHSCREEN && !td->mtclass.is_indirect) + mt_application->mt_flags |= INPUT_MT_DIRECT; + + /* + * Model touchscreens providing buttons as touchpads. + */ +- if (application == HID_DG_TOUCHPAD) { ++ if (application == HID_DG_TOUCHPAD && !td->mtclass.is_direct) { + mt_application->mt_flags |= INPUT_MT_POINTER; + td->inputmode_value = MT_INPUTMODE_TOUCHPAD; + } +@@ -1318,6 +1319,9 @@ static int mt_touch_input_configured(struct hid_device *hdev, + if (td->serial_maybe) + mt_post_parse_default_settings(td, app); + ++ if (cls->is_direct) ++ app->mt_flags |= INPUT_MT_DIRECT; ++ + if (cls->is_indirect) + app->mt_flags |= INPUT_MT_POINTER; + +-- +2.42.0 + +From a74de0b6f2e1b79d54e84dbeab1b310232275d6c Mon Sep 17 00:00:00 2001 +From: Kerem Karabay +Date: Wed, 19 Jul 2023 19:46:02 +0300 +Subject: [PATCH 08/12] HID: multitouch: add device ID for Apple Touch Bars + +Note that this is device ID is for T2 Macs. Testing on T1 Macs would be +appreciated. + +Signed-off-by: Kerem Karabay +--- + drivers/hid/Kconfig | 1 + + drivers/hid/hid-multitouch.c | 26 ++++++++++++++++++++++---- + 2 files changed, 23 insertions(+), 4 deletions(-) + +diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig +index 852de13aa..4e238df87 100644 +--- a/drivers/hid/Kconfig ++++ b/drivers/hid/Kconfig +@@ -737,6 +737,7 @@ config HID_MULTITOUCH + Say Y here if you have one of the following devices: + - 3M PCT touch screens + - ActionStar dual touch panels ++ - Touch Bars on x86 MacBook Pros + - Atmel panels + - Cando dual touch panels + - Chunghwa panels +diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c +index f98fb36ff..f881b19db 100644 +--- a/drivers/hid/hid-multitouch.c ++++ b/drivers/hid/hid-multitouch.c +@@ -226,6 +229,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app); + #define MT_CLS_RAZER_BLADE_STEALTH 0x0112 + #define MT_CLS_SMART_TECH 0x0113 + #define MT_CLS_WIN_8_MS_SURFACE_TYPE_COVER 0x0114 ++#define MT_CLS_APPLE_TOUCHBAR 0x0115 + + #define MT_DEFAULT_MAXCONTACT 10 + #define MT_MAX_MAXCONTACT 250 +@@ -421,6 +421,13 @@ + MT_QUIRK_WIN8_PTP_BUTTONS, + .export_all_inputs = true + }, ++ { .name = MT_CLS_APPLE_TOUCHBAR, ++ .quirks = MT_QUIRK_HOVERING | ++ MT_QUIRK_TOUCH_IS_TIPSTATE | ++ MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE, ++ .is_direct = true, ++ .maxcontacts = 11, ++ }, + { } + }; + +@@ -1883,6 +1906,17 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) + } + } + ++ ret = hid_parse(hdev); ++ if (ret != 0) { ++ unregister_pm_notifier(&td->pm_notifier); ++ return ret; ++ } ++ ++ if (mtclass->name == MT_CLS_APPLE_TOUCHBAR && ++ !hid_find_field(hdev, HID_INPUT_REPORT, ++ HID_DG_TOUCHPAD, HID_DG_TRANSDUCER_INDEX)) ++ return -ENODEV; ++ + td = devm_kzalloc(&hdev->dev, sizeof(struct mt_device), GFP_KERNEL); + if (!td) { + dev_err(&hdev->dev, "cannot allocate multitouch data\n"); +@@ -1933,12 +1967,6 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) + + timer_setup(&td->release_timer, mt_expired_timeout, 0); + +- ret = hid_parse(hdev); +- if (ret != 0) { +- unregister_pm_notifier(&td->pm_notifier); +- return ret; +- } +- + if (mtclass->quirks & MT_QUIRK_FIX_CONST_CONTACT_ID) + mt_fix_const_fields(hdev, HID_DG_CONTACTID); + +@@ -2418,6 +2418,11 @@ + HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, + USB_VENDOR_ID_MICROSOFT, 0x09c0) }, + ++ /* Apple Touch Bars */ ++ { .driver_data = MT_CLS_APPLE_TOUCHBAR, ++ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, ++ USB_DEVICE_ID_APPLE_TOUCHBAR_DISPLAY) }, ++ + /* Google MT devices */ + { .driver_data = MT_CLS_GOOGLE, + HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE, +-- +2.42.0 + +From f6ab7e4580962c9d82e7dc40dd074d47b2bce034 Mon Sep 17 00:00:00 2001 +From: Hector Martin +Date: Tue, 1 Feb 2022 00:40:51 +0900 +Subject: [PATCH 09/12] lib/vsprintf: Add support for generic FOURCCs by + extending %p4cc + +%p4cc is designed for DRM/V4L2 FOURCCs with their specific quirks, but +it's useful to be able to print generic 4-character codes formatted as +an integer. Extend it to add format specifiers for printing generic +32-bit FOURCCs with various endian semantics: + +%p4ch Host-endian +%p4cl Little-endian +%p4cb Big-endian +%p4cr Reverse-endian + +The endianness determines how bytes are interpreted as a u32, and the +FOURCC is then always printed MSByte-first (this is the opposite of +V4L/DRM FOURCCs). This covers most practical cases, e.g. %p4cr would +allow printing LSByte-first FOURCCs stored in host endian order +(other than the hex form being in character order, not the integer +value). + +Signed-off-by: Hector Martin +Signed-off-by: Kerem Karabay +--- + Documentation/core-api/printk-formats.rst | 32 ++++++++++++++++++++ + lib/test_printf.c | 20 +++++++++---- + lib/vsprintf.c | 36 +++++++++++++++++++---- + scripts/checkpatch.pl | 2 +- + 4 files changed, 77 insertions(+), 13 deletions(-) + +diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst +index dfe7e75a7..0ccef63e6 100644 +--- a/Documentation/core-api/printk-formats.rst ++++ b/Documentation/core-api/printk-formats.rst +@@ -631,6 +631,38 @@ Examples:: + %p4cc Y10 little-endian (0x20303159) + %p4cc NV12 big-endian (0xb231564e) + ++Generic FourCC code ++------------------- ++ ++:: ++ %p4c[hnbl] gP00 (0x67503030) ++ ++Print a generic FourCC code, as both ASCII characters and its numerical ++value as hexadecimal. ++ ++The additional ``h``, ``r``, ``b``, and ``l`` specifiers are used to specify ++host, reversed, big or little endian order data respectively. Host endian ++order means the data is interpreted as a 32-bit integer and the most ++significant byte is printed first; that is, the character code as printed ++matches the byte order stored in memory on big-endian systems, and is reversed ++on little-endian systems. ++ ++Passed by reference. ++ ++Examples for a little-endian machine, given &(u32)0x67503030:: ++ ++ %p4ch gP00 (0x67503030) ++ %p4cl gP00 (0x67503030) ++ %p4cb 00Pg (0x30305067) ++ %p4cr 00Pg (0x30305067) ++ ++Examples for a big-endian machine, given &(u32)0x67503030:: ++ ++ %p4ch gP00 (0x67503030) ++ %p4cl 00Pg (0x30305067) ++ %p4cb gP00 (0x67503030) ++ %p4cr 00Pg (0x30305067) ++ + Rust + ---- + +diff --git a/lib/test_printf.c b/lib/test_printf.c +index 7677ebccf..2355be36f 100644 +--- a/lib/test_printf.c ++++ b/lib/test_printf.c +@@ -746,18 +746,26 @@ static void __init fwnode_pointer(void) + static void __init fourcc_pointer(void) + { + struct { ++ char type; + u32 code; + char *str; + } const try[] = { +- { 0x3231564e, "NV12 little-endian (0x3231564e)", }, +- { 0xb231564e, "NV12 big-endian (0xb231564e)", }, +- { 0x10111213, ".... little-endian (0x10111213)", }, +- { 0x20303159, "Y10 little-endian (0x20303159)", }, ++ { 'c', 0x3231564e, "NV12 little-endian (0x3231564e)", }, ++ { 'c', 0xb231564e, "NV12 big-endian (0xb231564e)", }, ++ { 'c', 0x10111213, ".... little-endian (0x10111213)", }, ++ { 'c', 0x20303159, "Y10 little-endian (0x20303159)", }, ++ { 'h', 0x67503030, "gP00 (0x67503030)", }, ++ { 'r', 0x30305067, "gP00 (0x67503030)", }, ++ { 'l', cpu_to_le32(0x67503030), "gP00 (0x67503030)", }, ++ { 'b', cpu_to_be32(0x67503030), "gP00 (0x67503030)", }, + }; + unsigned int i; + +- for (i = 0; i < ARRAY_SIZE(try); i++) +- test(try[i].str, "%p4cc", &try[i].code); ++ for (i = 0; i < ARRAY_SIZE(try); i++) { ++ char fmt[] = { '%', 'p', '4', 'c', try[i].type, '\0' }; ++ ++ test(try[i].str, fmt, &try[i].code); ++ } + } + + static void __init +diff --git a/lib/vsprintf.c b/lib/vsprintf.c +index 40f560959..bd9af783c 100644 +--- a/lib/vsprintf.c ++++ b/lib/vsprintf.c +@@ -1758,27 +1758,50 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc, + char output[sizeof("0123 little-endian (0x01234567)")]; + char *p = output; + unsigned int i; ++ bool pix_fmt = false; + u32 orig, val; + +- if (fmt[1] != 'c' || fmt[2] != 'c') ++ if (fmt[1] != 'c') + return error_string(buf, end, "(%p4?)", spec); + + if (check_pointer(&buf, end, fourcc, spec)) + return buf; + + orig = get_unaligned(fourcc); +- val = orig & ~BIT(31); ++ switch (fmt[2]) { ++ case 'h': ++ val = orig; ++ break; ++ case 'r': ++ val = orig = swab32(orig); ++ break; ++ case 'l': ++ val = orig = le32_to_cpu(orig); ++ break; ++ case 'b': ++ val = orig = be32_to_cpu(orig); ++ break; ++ case 'c': ++ /* Pixel formats are printed LSB-first */ ++ val = swab32(orig & ~BIT(31)); ++ pix_fmt = true; ++ break; ++ default: ++ return error_string(buf, end, "(%p4?)", spec); ++ } + + for (i = 0; i < sizeof(u32); i++) { +- unsigned char c = val >> (i * 8); ++ unsigned char c = val >> ((3 - i) * 8); + + /* Print non-control ASCII characters as-is, dot otherwise */ + *p++ = isascii(c) && isprint(c) ? c : '.'; + } + +- *p++ = ' '; +- strcpy(p, orig & BIT(31) ? "big-endian" : "little-endian"); +- p += strlen(p); ++ if (pix_fmt) { ++ *p++ = ' '; ++ strcpy(p, orig & BIT(31) ? "big-endian" : "little-endian"); ++ p += strlen(p); ++ } + + *p++ = ' '; + *p++ = '('; +@@ -2348,6 +2371,7 @@ char *rust_fmt_argument(char *buf, char *end, void *ptr); + * read the documentation (path below) first. + * - 'NF' For a netdev_features_t + * - '4cc' V4L2 or DRM FourCC code, with endianness and raw numerical value. ++ * - '4c[hlbr]' Generic FourCC code. + * - 'h[CDN]' For a variable-length buffer, it prints it as a hex string with + * a certain separator (' ' by default): + * C colon +diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl +index 880fde13d..f080e33a4 100755 +--- a/scripts/checkpatch.pl ++++ b/scripts/checkpatch.pl +@@ -6906,7 +6906,7 @@ sub process { + ($extension eq "f" && + defined $qualifier && $qualifier !~ /^w/) || + ($extension eq "4" && +- defined $qualifier && $qualifier !~ /^cc/)) { ++ defined $qualifier && $qualifier !~ /^c[chlbr]/)) { + $bad_specifier = $specifier; + last; + } +-- +2.42.0 + +From f893444f7c842f97f3707897ba29f2c8dd77c8df Mon Sep 17 00:00:00 2001 +From: Kerem Karabay +Date: Mon, 7 Aug 2023 20:29:27 +0300 +Subject: [PATCH 10/12] USB: core: add 'shutdown' callback to usb_driver + +This simplifies running code on shutdown for USB drivers. + +Signed-off-by: Kerem Karabay +--- + drivers/usb/core/driver.c | 14 ++++++++++++++ + drivers/usb/storage/uas.c | 5 ++--- + include/linux/usb.h | 3 +++ + 3 files changed, 19 insertions(+), 3 deletions(-) + +diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c +index f58a0299f..dc0f86376 100644 +--- a/drivers/usb/core/driver.c ++++ b/drivers/usb/core/driver.c +@@ -514,6 +514,19 @@ static int usb_unbind_interface(struct device *dev) + return 0; + } + ++static void usb_shutdown_interface(struct device *dev) ++{ ++ struct usb_interface *intf = to_usb_interface(dev); ++ struct usb_driver *driver; ++ ++ if (!dev->driver) ++ return; ++ ++ driver = to_usb_driver(dev->driver); ++ if (driver->shutdown) ++ driver->shutdown(intf); ++} ++ + /** + * usb_driver_claim_interface - bind a driver to an interface + * @driver: the driver to be bound +@@ -1053,6 +1066,7 @@ int usb_register_driver(struct usb_driver *new_driver, struct module *owner, + new_driver->drvwrap.driver.bus = &usb_bus_type; + new_driver->drvwrap.driver.probe = usb_probe_interface; + new_driver->drvwrap.driver.remove = usb_unbind_interface; ++ new_driver->drvwrap.driver.shutdown = usb_shutdown_interface; + new_driver->drvwrap.driver.owner = owner; + new_driver->drvwrap.driver.mod_name = mod_name; + new_driver->drvwrap.driver.dev_groups = new_driver->dev_groups; +diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c +index 2583ee981..591fa0379 100644 +--- a/drivers/usb/storage/uas.c ++++ b/drivers/usb/storage/uas.c +@@ -1221,9 +1221,8 @@ static void uas_disconnect(struct usb_interface *intf) + * hang on reboot when the device is still in uas mode. Note the reset is + * necessary as some devices won't revert to usb-storage mode without it. + */ +-static void uas_shutdown(struct device *dev) ++static void uas_shutdown(struct usb_interface *intf) + { +- struct usb_interface *intf = to_usb_interface(dev); + struct usb_device *udev = interface_to_usbdev(intf); + struct Scsi_Host *shost = usb_get_intfdata(intf); + struct uas_dev_info *devinfo = (struct uas_dev_info *)shost->hostdata; +@@ -1246,7 +1245,7 @@ static struct usb_driver uas_driver = { + .suspend = uas_suspend, + .resume = uas_resume, + .reset_resume = uas_reset_resume, +- .drvwrap.driver.shutdown = uas_shutdown, ++ .shutdown = uas_shutdown, + .id_table = uas_usb_ids, + }; + +diff --git a/include/linux/usb.h b/include/linux/usb.h +index 25f8e62a3..5f3ae2186 100644 +--- a/include/linux/usb.h ++++ b/include/linux/usb.h +@@ -1194,6 +1194,7 @@ struct usbdrv_wrap { + * post_reset method is called. + * @post_reset: Called by usb_reset_device() after the device + * has been reset ++ * @shutdown: Called at shut-down time to quiesce the device. + * @id_table: USB drivers use ID table to support hotplugging. + * Export this with MODULE_DEVICE_TABLE(usb,...). This must be set + * or your driver's probe function will never get called. +@@ -1245,6 +1246,8 @@ struct usb_driver { + int (*pre_reset)(struct usb_interface *intf); + int (*post_reset)(struct usb_interface *intf); + ++ void (*shutdown)(struct usb_interface *intf); ++ + const struct usb_device_id *id_table; + const struct attribute_group **dev_groups; + +-- +2.42.0 + +From 337d6f6e34daaa786a0fb70d0dbd553288cd5ecd Mon Sep 17 00:00:00 2001 +From: Kerem Karabay +Date: Fri, 4 Aug 2023 17:49:25 +0300 +Subject: [PATCH 11/12] drm/format-helper: add helper for BGR888 to XRGB8888 + conversion + +Add XRGB8888 emulation helper for devices that only support BGR888. + +Signed-off-by: Kerem Karabay +--- + drivers/gpu/drm/drm_format_helper.c | 53 ++++++++++++++ + .../gpu/drm/tests/drm_format_helper_test.c | 69 +++++++++++++++++++ + include/drm/drm_format_helper.h | 3 + + 3 files changed, 125 insertions(+) + +diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c +index f93a4efce..5508fbde1 100644 +--- a/drivers/gpu/drm/drm_format_helper.c ++++ b/drivers/gpu/drm/drm_format_helper.c +@@ -601,6 +601,56 @@ void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pi + } + EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb888); + ++static void drm_fb_xrgb8888_to_bgr888_line(void *dbuf, const void *sbuf, unsigned int pixels) ++{ ++ u8 *dbuf8 = dbuf; ++ const __le32 *sbuf32 = sbuf; ++ unsigned int x; ++ u32 pix; ++ ++ for (x = 0; x < pixels; x++) { ++ pix = le32_to_cpu(sbuf32[x]); ++ /* write red-green-blue to output in little endianness */ ++ *dbuf8++ = (pix & 0x00FF0000) >> 16; ++ *dbuf8++ = (pix & 0x0000FF00) >> 8; ++ *dbuf8++ = (pix & 0x000000FF) >> 0; ++ } ++} ++ ++/** ++ * drm_fb_xrgb8888_to_bgr888 - Convert XRGB8888 to BGR888 clip buffer ++ * @dst: Array of BGR888 destination buffers ++ * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines ++ * within @dst; can be NULL if scanlines are stored next to each other. ++ * @src: Array of XRGB8888 source buffers ++ * @fb: DRM framebuffer ++ * @clip: Clip rectangle area to copy ++ * ++ * This function copies parts of a framebuffer to display memory and converts the ++ * color format during the process. Destination and framebuffer formats must match. The ++ * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at ++ * least as many entries as there are planes in @fb's format. Each entry stores the ++ * value for the format's respective color plane at the same index. ++ * ++ * This function does not apply clipping on @dst (i.e. the destination is at the ++ * top-left corner). ++ * ++ * Drivers can use this function for BGR888 devices that don't natively ++ * support XRGB8888. ++ */ ++void drm_fb_xrgb8888_to_bgr888(struct iosys_map *dst, const unsigned int *dst_pitch, ++ const struct iosys_map *src, const struct drm_framebuffer *fb, ++ const struct drm_rect *clip) ++{ ++ static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { ++ 3, ++ }; ++ ++ drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, ++ drm_fb_xrgb8888_to_bgr888_line); ++} ++EXPORT_SYMBOL(drm_fb_xrgb8888_to_bgr888); ++ + static void drm_fb_xrgb8888_to_argb8888_line(void *dbuf, const void *sbuf, unsigned int pixels) + { + __le32 *dbuf32 = dbuf; +@@ -925,6 +975,9 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d + } else if (dst_format == DRM_FORMAT_RGB888) { + drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip); + return 0; ++ } else if (dst_format == DRM_FORMAT_BGR888) { ++ drm_fb_xrgb8888_to_bgr888(dst, dst_pitch, src, fb, clip); ++ return 0; + } else if (dst_format == DRM_FORMAT_ARGB8888) { + drm_fb_xrgb8888_to_argb8888(dst, dst_pitch, src, fb, clip); + return 0; +diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c +index 474bb7a1c..dff7fabd9 100644 +--- a/drivers/gpu/drm/tests/drm_format_helper_test.c ++++ b/drivers/gpu/drm/tests/drm_format_helper_test.c +@@ -52,6 +52,11 @@ struct convert_to_rgb888_result { + const u8 expected[TEST_BUF_SIZE]; + }; + ++struct convert_to_bgr888_result { ++ unsigned int dst_pitch; ++ const u8 expected[TEST_BUF_SIZE]; ++}; ++ + struct convert_to_argb8888_result { + unsigned int dst_pitch; + const u32 expected[TEST_BUF_SIZE]; +@@ -84,6 +89,7 @@ struct convert_xrgb8888_case { + struct convert_to_argb1555_result argb1555_result; + struct convert_to_rgba5551_result rgba5551_result; + struct convert_to_rgb888_result rgb888_result; ++ struct convert_to_bgr888_result bgr888_result; + struct convert_to_argb8888_result argb8888_result; + struct convert_to_xrgb2101010_result xrgb2101010_result; + struct convert_to_argb2101010_result argb2101010_result; +@@ -125,6 +131,10 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = { + .dst_pitch = TEST_USE_DEFAULT_PITCH, + .expected = { 0x00, 0x00, 0xFF }, + }, ++ .bgr888_result = { ++ .dst_pitch = TEST_USE_DEFAULT_PITCH, ++ .expected = { 0xFF, 0x00, 0x00 }, ++ }, + .argb8888_result = { + .dst_pitch = TEST_USE_DEFAULT_PITCH, + .expected = { 0xFFFF0000 }, +@@ -179,6 +189,10 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = { + .dst_pitch = TEST_USE_DEFAULT_PITCH, + .expected = { 0x00, 0x00, 0xFF }, + }, ++ .bgr888_result = { ++ .dst_pitch = TEST_USE_DEFAULT_PITCH, ++ .expected = { 0xFF, 0x00, 0x00 }, ++ }, + .argb8888_result = { + .dst_pitch = TEST_USE_DEFAULT_PITCH, + .expected = { 0xFFFF0000 }, +@@ -280,6 +294,15 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = { + 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, + }, + }, ++ .bgr888_result = { ++ .dst_pitch = TEST_USE_DEFAULT_PITCH, ++ .expected = { ++ 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, ++ 0xFF, 0x00, 0x00, 0x00, 0xFF, 0x00, ++ 0x00, 0x00, 0xFF, 0xFF, 0x00, 0xFF, ++ 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF, ++ }, ++ }, + .argb8888_result = { + .dst_pitch = TEST_USE_DEFAULT_PITCH, + .expected = { +@@ -391,6 +414,17 @@ static struct convert_xrgb8888_case convert_xrgb8888_cases[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + }, ++ .bgr888_result = { ++ .dst_pitch = 15, ++ .expected = { ++ 0x0E, 0x44, 0x9C, 0x11, 0x4D, 0x05, 0xA8, 0xF3, 0x03, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x6C, 0xF0, 0x73, 0x0E, 0x44, 0x9C, 0x11, 0x4D, 0x05, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0xA8, 0x03, 0x03, 0x6C, 0xF0, 0x73, 0x0E, 0x44, 0x9C, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ }, ++ }, + .argb8888_result = { + .dst_pitch = 20, + .expected = { +@@ -727,6 +761,40 @@ static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test) + KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); + } + ++static void drm_test_fb_xrgb8888_to_bgr888(struct kunit *test) ++{ ++ const struct convert_xrgb8888_case *params = test->param_value; ++ const struct convert_to_bgr888_result *result = ¶ms->bgr888_result; ++ size_t dst_size; ++ u8 *buf = NULL; ++ __le32 *xrgb8888 = NULL; ++ struct iosys_map dst, src; ++ ++ struct drm_framebuffer fb = { ++ .format = drm_format_info(DRM_FORMAT_XRGB8888), ++ .pitches = { params->pitch, 0, 0 }, ++ }; ++ ++ dst_size = conversion_buf_size(DRM_FORMAT_BGR888, result->dst_pitch, ++ ¶ms->clip, 0); ++ KUNIT_ASSERT_GT(test, dst_size, 0); ++ ++ buf = kunit_kzalloc(test, dst_size, GFP_KERNEL); ++ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf); ++ iosys_map_set_vaddr(&dst, buf); ++ ++ xrgb8888 = cpubuf_to_le32(test, params->xrgb8888, TEST_BUF_SIZE); ++ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xrgb8888); ++ iosys_map_set_vaddr(&src, xrgb8888); ++ ++ /* ++ * BGR888 expected results are already in little-endian ++ * order, so there's no need to convert the test output. ++ */ ++ drm_fb_xrgb8888_to_bgr888(&dst, &result->dst_pitch, &src, &fb, ¶ms->clip); ++ KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); ++} ++ + static void drm_test_fb_xrgb8888_to_argb8888(struct kunit *test) + { + const struct convert_xrgb8888_case *params = test->param_value; +@@ -858,6 +926,7 @@ static struct kunit_case drm_format_helper_test_cases[] = { + KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb1555, convert_xrgb8888_gen_params), + KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_rgba5551, convert_xrgb8888_gen_params), + KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_rgb888, convert_xrgb8888_gen_params), ++ KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_bgr888, convert_xrgb8888_gen_params), + KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb8888, convert_xrgb8888_gen_params), + KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_xrgb2101010, convert_xrgb8888_gen_params), + KUNIT_CASE_PARAM(drm_test_fb_xrgb8888_to_argb2101010, convert_xrgb8888_gen_params), +diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h +index 291deb094..7fc553318 100644 +--- a/include/drm/drm_format_helper.h ++++ b/include/drm/drm_format_helper.h +@@ -42,6 +42,9 @@ void drm_fb_xrgb8888_to_rgba5551(struct iosys_map *dst, const unsigned int *dst_ + void drm_fb_xrgb8888_to_rgb888(struct iosys_map *dst, const unsigned int *dst_pitch, + const struct iosys_map *src, const struct drm_framebuffer *fb, + const struct drm_rect *clip); ++void drm_fb_xrgb8888_to_bgr888(struct iosys_map *dst, const unsigned int *dst_pitch, ++ const struct iosys_map *src, const struct drm_framebuffer *fb, ++ const struct drm_rect *clip); + void drm_fb_xrgb8888_to_argb8888(struct iosys_map *dst, const unsigned int *dst_pitch, + const struct iosys_map *src, const struct drm_framebuffer *fb, + const struct drm_rect *clip); +-- +2.42.0 + +From 1f0b6c21c4d56f5be74c4d7d0665525862e307c3 Mon Sep 17 00:00:00 2001 +From: Kerem Karabay +Date: Sat, 6 May 2023 17:30:09 +0300 +Subject: [PATCH 12/12] drm/tiny: add driver for Apple Touch Bars in x86 Macs + +The Touch Bars found on x86 Macs support two USB configurations: one +where the device presents itself as a HID keyboard and can display +predefined sets of keys, and one where the operating system has full +control over what is displayed. This commit adds support for the display +functionality of the second configuration. + +Note that this driver has only been tested on T2 Macs, and only includes +the USB device ID for these devices. Testing on T1 Macs would be +appreciated. + +Credit goes to @imbushuo on GitHub for reverse engineering most of the +protocol. + +Signed-off-by: Kerem Karabay +--- + MAINTAINERS | 6 + + drivers/gpu/drm/tiny/Kconfig | 12 + + drivers/gpu/drm/tiny/Makefile | 1 + + drivers/gpu/drm/tiny/appletbdrm.c | 624 ++++++++++++++++++++++++++++++ + 4 files changed, 643 insertions(+) + create mode 100644 drivers/gpu/drm/tiny/appletbdrm.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index 519b3b736..dfc63d257 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -6372,6 +6372,12 @@ S: Supported + T: git git://anongit.freedesktop.org/drm/drm-misc + F: drivers/gpu/drm/sun4i/sun8i* + ++DRM DRIVER FOR APPLE TOUCH BARS ++M: Kerem Karabay ++L: dri-devel@lists.freedesktop.org ++S: Maintained ++F: drivers/gpu/drm/tiny/appletbdrm.c ++ + DRM DRIVER FOR ARM PL111 CLCD + M: Emma Anholt + S: Supported +diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig +index f6889f649..559a97bce 100644 +--- a/drivers/gpu/drm/tiny/Kconfig ++++ b/drivers/gpu/drm/tiny/Kconfig +@@ -1,5 +1,17 @@ + # SPDX-License-Identifier: GPL-2.0-only + ++config DRM_APPLETBDRM ++ tristate "DRM support for Apple Touch Bars" ++ depends on DRM && USB && MMU ++ select DRM_KMS_HELPER ++ select DRM_GEM_SHMEM_HELPER ++ help ++ Say Y here if you want support for the display of Touch Bars on x86 ++ MacBook Pros. ++ ++ To compile this driver as a module, choose M here: the ++ module will be called appletbdrm. ++ + config DRM_ARCPGU + tristate "ARC PGU" + depends on DRM && OF +diff --git a/drivers/gpu/drm/tiny/Makefile b/drivers/gpu/drm/tiny/Makefile +index 76dde89a0..9a1b412e7 100644 +--- a/drivers/gpu/drm/tiny/Makefile ++++ b/drivers/gpu/drm/tiny/Makefile +@@ -1,5 +1,6 @@ + # SPDX-License-Identifier: GPL-2.0-only + ++obj-$(CONFIG_DRM_APPLETBDRM) += appletbdrm.o + obj-$(CONFIG_DRM_ARCPGU) += arcpgu.o + obj-$(CONFIG_DRM_BOCHS) += bochs.o + obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus.o +diff --git a/drivers/gpu/drm/tiny/appletbdrm.c b/drivers/gpu/drm/tiny/appletbdrm.c +new file mode 100644 +index 000000000..33a99436b +--- /dev/null ++++ b/drivers/gpu/drm/tiny/appletbdrm.c +@@ -0,0 +1,624 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Apple Touch Bar DRM Driver ++ * ++ * Copyright (c) 2023 Kerem Karabay ++ */ ++ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define _APPLETBDRM_FOURCC(s) (((s)[0] << 24) | ((s)[1] << 16) | ((s)[2] << 8) | (s)[3]) ++#define APPLETBDRM_FOURCC(s) _APPLETBDRM_FOURCC(#s) ++ ++#define APPLETBDRM_PIXEL_FORMAT APPLETBDRM_FOURCC(RGBA) /* The actual format is BGR888 */ ++#define APPLETBDRM_BITS_PER_PIXEL 24 ++ ++#define APPLETBDRM_MSG_CLEAR_DISPLAY APPLETBDRM_FOURCC(CLRD) ++#define APPLETBDRM_MSG_GET_INFORMATION APPLETBDRM_FOURCC(GINF) ++#define APPLETBDRM_MSG_UPDATE_COMPLETE APPLETBDRM_FOURCC(UDCL) ++#define APPLETBDRM_MSG_SIGNAL_READINESS APPLETBDRM_FOURCC(REDY) ++ ++#define APPLETBDRM_BULK_MSG_TIMEOUT 1000 ++ ++#define drm_to_adev(_drm) container_of(_drm, struct appletbdrm_device, drm) ++#define adev_to_udev(adev) interface_to_usbdev(to_usb_interface(adev->dev)) ++ ++struct appletbdrm_device { ++ struct device *dev; ++ ++ u8 in_ep; ++ u8 out_ep; ++ ++ u32 width; ++ u32 height; ++ ++ struct drm_device drm; ++ struct drm_display_mode mode; ++ struct drm_connector connector; ++ struct drm_simple_display_pipe pipe; ++ ++ bool readiness_signal_received; ++}; ++ ++struct appletbdrm_request_header { ++ __le16 unk_00; ++ __le16 unk_02; ++ __le32 unk_04; ++ __le32 unk_08; ++ __le32 size; ++} __packed; ++ ++struct appletbdrm_response_header { ++ u8 unk_00[16]; ++ u32 msg; ++} __packed; ++ ++struct appletbdrm_simple_request { ++ struct appletbdrm_request_header header; ++ u32 msg; ++ u8 unk_14[8]; ++ __le32 size; ++} __packed; ++ ++struct appletbdrm_information { ++ struct appletbdrm_response_header header; ++ u8 unk_14[12]; ++ __le32 width; ++ __le32 height; ++ u8 bits_per_pixel; ++ __le32 bytes_per_row; ++ __le32 orientation; ++ __le32 bitmap_info; ++ u32 pixel_format; ++ __le32 width_inches; /* floating point */ ++ __le32 height_inches; /* floating point */ ++} __packed; ++ ++struct appletbdrm_frame { ++ __le16 begin_x; ++ __le16 begin_y; ++ __le16 width; ++ __le16 height; ++ __le32 buf_size; ++ u8 buf[]; ++} __packed; ++ ++struct appletbdrm_fb_request_footer { ++ u8 unk_00[12]; ++ __le32 unk_0c; ++ u8 unk_10[12]; ++ __le32 unk_1c; ++ __le64 timestamp; ++ u8 unk_28[12]; ++ __le32 unk_34; ++ u8 unk_38[20]; ++ __le32 unk_4c; ++} __packed; ++ ++struct appletbdrm_fb_request { ++ struct appletbdrm_request_header header; ++ __le16 unk_10; ++ u8 msg_id; ++ u8 unk_13[29]; ++ /* ++ * Contents of `data`: ++ * - struct appletbdrm_frame frames[]; ++ * - struct appletbdrm_fb_request_footer footer; ++ * - padding to make the total size a multiple of 16 ++ */ ++ u8 data[]; ++} __packed; ++ ++struct appletbdrm_fb_request_response { ++ struct appletbdrm_response_header header; ++ u8 unk_14[12]; ++ __le64 timestamp; ++} __packed; ++ ++static int appletbdrm_send_request(struct appletbdrm_device *adev, ++ struct appletbdrm_request_header *request, size_t size) ++{ ++ struct usb_device *udev = adev_to_udev(adev); ++ struct drm_device *drm = &adev->drm; ++ int ret, actual_size; ++ ++ ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, adev->out_ep), ++ request, size, &actual_size, APPLETBDRM_BULK_MSG_TIMEOUT); ++ if (ret) { ++ drm_err(drm, "Failed to send message (%pe)\n", ERR_PTR(ret)); ++ return ret; ++ } ++ ++ if (actual_size != size) { ++ drm_err(drm, "Actual size (%d) doesn't match expected size (%lu)\n", ++ actual_size, size); ++ return -EIO; ++ } ++ ++ return ret; ++} ++ ++static int appletbdrm_read_response(struct appletbdrm_device *adev, ++ struct appletbdrm_response_header *response, ++ size_t size, u32 expected_response) ++{ ++ struct usb_device *udev = adev_to_udev(adev); ++ struct drm_device *drm = &adev->drm; ++ int ret, actual_size; ++ ++retry: ++ ret = usb_bulk_msg(udev, usb_rcvbulkpipe(udev, adev->in_ep), ++ response, size, &actual_size, APPLETBDRM_BULK_MSG_TIMEOUT); ++ if (ret) { ++ drm_err(drm, "Failed to read response (%pe)\n", ERR_PTR(ret)); ++ return ret; ++ } ++ ++ /* ++ * The device responds to the first request sent in a particular ++ * timeframe after the USB device configuration is set with a readiness ++ * signal, in which case the response should be read again ++ */ ++ if (response->msg == APPLETBDRM_MSG_SIGNAL_READINESS) { ++ if (!adev->readiness_signal_received) { ++ adev->readiness_signal_received = true; ++ goto retry; ++ } ++ ++ drm_err(drm, "Encountered unexpected readiness signal\n"); ++ return -EIO; ++ } ++ ++ if (actual_size != size) { ++ drm_err(drm, "Actual size (%d) doesn't match expected size (%lu)\n", ++ actual_size, size); ++ return -EIO; ++ } ++ ++ if (response->msg != expected_response) { ++ drm_err(drm, "Unexpected response from device (expected %p4ch found %p4ch)\n", ++ &expected_response, &response->msg); ++ return -EIO; ++ } ++ ++ return 0; ++} ++ ++static int appletbdrm_send_msg(struct appletbdrm_device *adev, u32 msg) ++{ ++ struct appletbdrm_simple_request *request; ++ int ret; ++ ++ request = kzalloc(sizeof(*request), GFP_KERNEL); ++ if (!request) ++ return -ENOMEM; ++ ++ request->header.unk_00 = cpu_to_le16(2); ++ request->header.unk_02 = cpu_to_le16(0x1512); ++ request->header.size = cpu_to_le32(sizeof(*request) - sizeof(request->header)); ++ request->msg = msg; ++ request->size = request->header.size; ++ ++ ret = appletbdrm_send_request(adev, &request->header, sizeof(*request)); ++ ++ kfree(request); ++ ++ return ret; ++} ++ ++static int appletbdrm_clear_display(struct appletbdrm_device *adev) ++{ ++ return appletbdrm_send_msg(adev, APPLETBDRM_MSG_CLEAR_DISPLAY); ++} ++ ++static int appletbdrm_signal_readiness(struct appletbdrm_device *adev) ++{ ++ return appletbdrm_send_msg(adev, APPLETBDRM_MSG_SIGNAL_READINESS); ++} ++ ++static int appletbdrm_get_information(struct appletbdrm_device *adev) ++{ ++ struct appletbdrm_information *info; ++ struct drm_device *drm = &adev->drm; ++ u8 bits_per_pixel; ++ u32 pixel_format; ++ int ret; ++ ++ info = kzalloc(sizeof(*info), GFP_KERNEL); ++ if (!info) ++ return -ENOMEM; ++ ++ ret = appletbdrm_send_msg(adev, APPLETBDRM_MSG_GET_INFORMATION); ++ if (ret) ++ return ret; ++ ++ ret = appletbdrm_read_response(adev, &info->header, sizeof(*info), ++ APPLETBDRM_MSG_GET_INFORMATION); ++ if (ret) ++ goto free_info; ++ ++ bits_per_pixel = info->bits_per_pixel; ++ pixel_format = get_unaligned(&info->pixel_format); ++ ++ adev->width = get_unaligned_le32(&info->width); ++ adev->height = get_unaligned_le32(&info->height); ++ ++ if (bits_per_pixel != APPLETBDRM_BITS_PER_PIXEL) { ++ drm_err(drm, "Encountered unexpected bits per pixel value (%d)\n", bits_per_pixel); ++ ret = -EINVAL; ++ goto free_info; ++ } ++ ++ if (pixel_format != APPLETBDRM_PIXEL_FORMAT) { ++ drm_err(drm, "Encountered unknown pixel format (%p4ch)\n", &pixel_format); ++ ret = -EINVAL; ++ goto free_info; ++ } ++ ++free_info: ++ kfree(info); ++ ++ return ret; ++} ++ ++static u32 rect_size(struct drm_rect *rect) ++{ ++ return drm_rect_width(rect) * drm_rect_height(rect) * (APPLETBDRM_BITS_PER_PIXEL / 8); ++} ++ ++static int appletbdrm_flush_damage(struct appletbdrm_device *adev, ++ struct drm_plane_state *old_state, ++ struct drm_plane_state *state) ++{ ++ struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(state); ++ struct appletbdrm_fb_request_response *response; ++ struct appletbdrm_fb_request_footer *footer; ++ struct drm_atomic_helper_damage_iter iter; ++ struct drm_framebuffer *fb = state->fb; ++ struct appletbdrm_fb_request *request; ++ struct drm_device *drm = &adev->drm; ++ struct appletbdrm_frame *frame; ++ u64 timestamp = ktime_get_ns(); ++ struct drm_rect damage; ++ size_t frames_size = 0; ++ size_t request_size; ++ int ret; ++ ++ drm_atomic_helper_damage_iter_init(&iter, old_state, state); ++ drm_atomic_for_each_plane_damage(&iter, &damage) { ++ frames_size += struct_size(frame, buf, rect_size(&damage)); ++ } ++ ++ if (!frames_size) ++ return 0; ++ ++ request_size = ALIGN(sizeof(*request) + frames_size + sizeof(*footer), 16); ++ ++ request = kzalloc(request_size, GFP_KERNEL); ++ if (!request) ++ return -ENOMEM; ++ ++ response = kzalloc(sizeof(*response), GFP_KERNEL); ++ if (!response) { ++ ret = -ENOMEM; ++ goto free_request; ++ } ++ ++ ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); ++ if (ret) { ++ drm_err(drm, "Failed to start CPU framebuffer access (%pe)\n", ERR_PTR(ret)); ++ goto free_response; ++ } ++ ++ request->header.unk_00 = cpu_to_le16(2); ++ request->header.unk_02 = cpu_to_le16(0x12); ++ request->header.unk_04 = cpu_to_le32(9); ++ request->header.size = cpu_to_le32(request_size - sizeof(request->header)); ++ request->unk_10 = cpu_to_le16(1); ++ request->msg_id = timestamp & 0xff; ++ ++ frame = (struct appletbdrm_frame *)request->data; ++ ++ drm_atomic_helper_damage_iter_init(&iter, old_state, state); ++ drm_atomic_for_each_plane_damage(&iter, &damage) { ++ struct iosys_map dst = IOSYS_MAP_INIT_VADDR(frame->buf); ++ u32 buf_size = rect_size(&damage); ++ ++ /* ++ * The coordinates need to be translated to the coordinate ++ * system the device expects, see the comment in ++ * appletbdrm_setup_mode_config ++ */ ++ frame->begin_x = cpu_to_le16(damage.y1); ++ frame->begin_y = cpu_to_le16(adev->height - damage.x2); ++ frame->width = cpu_to_le16(drm_rect_height(&damage)); ++ frame->height = cpu_to_le16(drm_rect_width(&damage)); ++ frame->buf_size = cpu_to_le32(buf_size); ++ ++ ret = drm_fb_blit(&dst, NULL, DRM_FORMAT_BGR888, ++ &shadow_plane_state->data[0], fb, &damage); ++ if (ret) { ++ drm_err(drm, "Failed to copy damage clip (%pe)\n", ERR_PTR(ret)); ++ goto end_fb_cpu_access; ++ } ++ ++ frame = (void *)frame + struct_size(frame, buf, buf_size); ++ } ++ ++ footer = (struct appletbdrm_fb_request_footer *)&request->data[frames_size]; ++ ++ footer->unk_0c = cpu_to_le32(0xfffe); ++ footer->unk_1c = cpu_to_le32(0x80001); ++ footer->unk_34 = cpu_to_le32(0x80002); ++ footer->unk_4c = cpu_to_le32(0xffff); ++ footer->timestamp = cpu_to_le64(timestamp); ++ ++ ret = appletbdrm_send_request(adev, &request->header, request_size); ++ if (ret) ++ goto end_fb_cpu_access; ++ ++ ret = appletbdrm_read_response(adev, &response->header, sizeof(*response), ++ APPLETBDRM_MSG_UPDATE_COMPLETE); ++ if (ret) ++ goto end_fb_cpu_access; ++ ++ if (response->timestamp != footer->timestamp) { ++ drm_err(drm, "Response timestamp (%llu) doesn't match request timestamp (%llu)\n", ++ le64_to_cpu(response->timestamp), timestamp); ++ goto end_fb_cpu_access; ++ } ++ ++end_fb_cpu_access: ++ drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); ++free_response: ++ kfree(response); ++free_request: ++ kfree(request); ++ ++ return ret; ++} ++ ++static int appletbdrm_connector_helper_get_modes(struct drm_connector *connector) ++{ ++ struct appletbdrm_device *adev = drm_to_adev(connector->dev); ++ ++ return drm_connector_helper_get_modes_fixed(connector, &adev->mode); ++} ++ ++static enum drm_mode_status appletbdrm_pipe_mode_valid(struct drm_simple_display_pipe *pipe, ++ const struct drm_display_mode *mode) ++{ ++ struct drm_crtc *crtc = &pipe->crtc; ++ struct appletbdrm_device *adev = drm_to_adev(crtc->dev); ++ ++ return drm_crtc_helper_mode_valid_fixed(crtc, mode, &adev->mode); ++} ++ ++static void appletbdrm_pipe_disable(struct drm_simple_display_pipe *pipe) ++{ ++ struct appletbdrm_device *adev = drm_to_adev(pipe->crtc.dev); ++ int idx; ++ ++ if (!drm_dev_enter(&adev->drm, &idx)) ++ return; ++ ++ appletbdrm_clear_display(adev); ++ ++ drm_dev_exit(idx); ++} ++ ++static void appletbdrm_pipe_update(struct drm_simple_display_pipe *pipe, ++ struct drm_plane_state *old_state) ++{ ++ struct drm_crtc *crtc = &pipe->crtc; ++ struct appletbdrm_device *adev = drm_to_adev(crtc->dev); ++ int idx; ++ ++ if (!crtc->state->active || !drm_dev_enter(&adev->drm, &idx)) ++ return; ++ ++ appletbdrm_flush_damage(adev, old_state, pipe->plane.state); ++ ++ drm_dev_exit(idx); ++} ++ ++static const u32 appletbdrm_formats[] = { ++ DRM_FORMAT_BGR888, ++ DRM_FORMAT_XRGB8888, /* emulated */ ++}; ++ ++static const struct drm_mode_config_funcs appletbdrm_mode_config_funcs = { ++ .fb_create = drm_gem_fb_create_with_dirty, ++ .atomic_check = drm_atomic_helper_check, ++ .atomic_commit = drm_atomic_helper_commit, ++}; ++ ++static const struct drm_connector_funcs appletbdrm_connector_funcs = { ++ .reset = drm_atomic_helper_connector_reset, ++ .destroy = drm_connector_cleanup, ++ .fill_modes = drm_helper_probe_single_connector_modes, ++ .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, ++ .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, ++}; ++ ++static const struct drm_connector_helper_funcs appletbdrm_connector_helper_funcs = { ++ .get_modes = appletbdrm_connector_helper_get_modes, ++}; ++ ++static const struct drm_simple_display_pipe_funcs appletbdrm_pipe_funcs = { ++ DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS, ++ .update = appletbdrm_pipe_update, ++ .disable = appletbdrm_pipe_disable, ++ .mode_valid = appletbdrm_pipe_mode_valid, ++}; ++ ++DEFINE_DRM_GEM_FOPS(appletbdrm_drm_fops); ++ ++static const struct drm_driver appletbdrm_drm_driver = { ++ DRM_GEM_SHMEM_DRIVER_OPS, ++ .name = "appletbdrm", ++ .desc = "Apple Touch Bar DRM Driver", ++ .date = "20230910", ++ .major = 1, ++ .minor = 0, ++ .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, ++ .fops = &appletbdrm_drm_fops, ++}; ++ ++static int appletbdrm_setup_mode_config(struct appletbdrm_device *adev) ++{ ++ struct drm_connector *connector = &adev->connector; ++ struct drm_device *drm = &adev->drm; ++ struct device *dev = adev->dev; ++ int ret; ++ ++ ret = drmm_mode_config_init(drm); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to initialize mode configuration\n"); ++ ++ /* ++ * The coordinate system used by the device is different from the ++ * coordinate system of the framebuffer in that the x and y axes are ++ * swapped, and that the y axis is inverted; so what the device reports ++ * as the height is actually the width of the framebuffer and vice ++ * versa ++ */ ++ drm->mode_config.min_width = 0; ++ drm->mode_config.min_height = 0; ++ drm->mode_config.max_width = max(adev->height, DRM_SHADOW_PLANE_MAX_WIDTH); ++ drm->mode_config.max_height = max(adev->width, DRM_SHADOW_PLANE_MAX_HEIGHT); ++ drm->mode_config.preferred_depth = APPLETBDRM_BITS_PER_PIXEL; ++ drm->mode_config.funcs = &appletbdrm_mode_config_funcs; ++ ++ adev->mode = (struct drm_display_mode) { ++ DRM_MODE_INIT(60, adev->height, adev->width, ++ DRM_MODE_RES_MM(adev->height, 218), ++ DRM_MODE_RES_MM(adev->width, 218)) ++ }; ++ ++ ret = drm_connector_init(drm, connector, ++ &appletbdrm_connector_funcs, DRM_MODE_CONNECTOR_USB); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to initialize connector\n"); ++ ++ drm_connector_helper_add(connector, &appletbdrm_connector_helper_funcs); ++ ++ ret = drm_connector_set_panel_orientation(connector, ++ DRM_MODE_PANEL_ORIENTATION_RIGHT_UP); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to set panel orientation\n"); ++ ++ connector->display_info.non_desktop = true; ++ ret = drm_object_property_set_value(&connector->base, ++ drm->mode_config.non_desktop_property, true); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to set non-desktop property\n"); ++ ++ ret = drm_simple_display_pipe_init(drm, &adev->pipe, &appletbdrm_pipe_funcs, ++ appletbdrm_formats, ARRAY_SIZE(appletbdrm_formats), ++ NULL, &adev->connector); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to initialize simple display pipe\n"); ++ ++ drm_plane_enable_fb_damage_clips(&adev->pipe.plane); ++ ++ drm_mode_config_reset(drm); ++ ++ ret = drm_dev_register(drm, 0); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to register DRM device\n"); ++ ++ return 0; ++} ++ ++static int appletbdrm_probe(struct usb_interface *intf, ++ const struct usb_device_id *id) ++{ ++ struct usb_endpoint_descriptor *bulk_in, *bulk_out; ++ struct device *dev = &intf->dev; ++ struct appletbdrm_device *adev; ++ int ret; ++ ++ ret = usb_find_common_endpoints(intf->cur_altsetting, &bulk_in, &bulk_out, NULL, NULL); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to find bulk endpoints\n"); ++ ++ adev = devm_drm_dev_alloc(dev, &appletbdrm_drm_driver, struct appletbdrm_device, drm); ++ if (IS_ERR(adev)) ++ return PTR_ERR(adev); ++ ++ adev->dev = dev; ++ adev->in_ep = bulk_in->bEndpointAddress; ++ adev->out_ep = bulk_out->bEndpointAddress; ++ ++ usb_set_intfdata(intf, adev); ++ ++ ret = appletbdrm_get_information(adev); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to get display information\n"); ++ ++ ret = appletbdrm_signal_readiness(adev); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to signal readiness\n"); ++ ++ ret = appletbdrm_clear_display(adev); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to clear display\n"); ++ ++ return appletbdrm_setup_mode_config(adev); ++} ++ ++static void appletbdrm_disconnect(struct usb_interface *intf) ++{ ++ struct appletbdrm_device *adev = usb_get_intfdata(intf); ++ struct drm_device *drm = &adev->drm; ++ ++ drm_dev_unplug(drm); ++ drm_atomic_helper_shutdown(drm); ++} ++ ++static void appletbdrm_shutdown(struct usb_interface *intf) ++{ ++ struct appletbdrm_device *adev = usb_get_intfdata(intf); ++ ++ /* ++ * The framebuffer needs to be cleared on shutdown since its content ++ * persists across boots ++ */ ++ drm_atomic_helper_shutdown(&adev->drm); ++} ++ ++static const struct usb_device_id appletbdrm_usb_id_table[] = { ++ { USB_DEVICE_INTERFACE_CLASS(0x05ac, 0x8302, USB_CLASS_AUDIO_VIDEO) }, ++ {} ++}; ++MODULE_DEVICE_TABLE(usb, appletbdrm_usb_id_table); ++ ++static struct usb_driver appletbdrm_usb_driver = { ++ .name = "appletbdrm", ++ .probe = appletbdrm_probe, ++ .disconnect = appletbdrm_disconnect, ++ .shutdown = appletbdrm_shutdown, ++ .id_table = appletbdrm_usb_id_table, ++}; ++module_usb_driver(appletbdrm_usb_driver); ++ ++MODULE_AUTHOR("Kerem Karabay "); ++MODULE_DESCRIPTION("Apple Touch Bar DRM Driver"); ++MODULE_LICENSE("GPL"); +-- +2.42.0 + +From e34c6d09241ba826a6e9b2b0e50e306b273b7bda Mon Sep 17 00:00:00 2001 +From: Orlando Chamberlain +Date: Thu, 16 Feb 2023 12:32:34 +1100 +Subject: [PATCH 5/8] Documentation: leds: standardise keyboard backlight led + names + +Advice use of either "input*:*:kbd_backlight" or ":*:kbd_backlight". We +don't want people using vendor or product name (e.g. "smc", "apple", +"asus") as this information is available from sysfs anyway, and it made the +folder names inconsistent. + +Signed-off-by: Orlando Chamberlain +--- + Documentation/leds/well-known-leds.txt | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/Documentation/leds/well-known-leds.txt b/Documentation/leds/well-known-leds.txt +index 2160382c86be..4e5429fce4d8 100644 +--- a/Documentation/leds/well-known-leds.txt ++++ b/Documentation/leds/well-known-leds.txt +@@ -44,6 +44,14 @@ Legacy: "lp5523:kb{1,2,3,4,5,6}" (Nokia N900) + + Frontlight/backlight of main keyboard. + ++Good: ":*:kbd_backlight" ++Good: "input*:*:kbd_backlight" ++Legacy: "*:*:kbd_backlight" ++ ++Many drivers have the vendor or product name as the first field of the led name, ++this makes names inconsistent and is redundant as that information is already in ++sysfs. ++ + Legacy: "button-backlight" (Motorola Droid 4) + + Some phones have touch buttons below screen; it is different from main +-- +2.39.1 + +From c124f5401040d02abd6d349979be29acd1e88545 Mon Sep 17 00:00:00 2001 +From: Orlando Chamberlain +Date: Fri, 10 Feb 2023 23:14:31 +1100 +Subject: [PATCH 6/8] HID: hid-apple-magic-backlight: Add driver for keyboard + backlight on internal Magic Keyboards +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This driver adds support for the keyboard backlight on Intel T2 Macs +with internal Magic Keyboards (MacBookPro16,x and MacBookAir9,1) + +Co-developed-by: Kerem Karabay +Signed-off-by: Kerem Karabay +Signed-off-by: Orlando Chamberlain +Reviewed-by: Andy Shevchenko +Reviewed-by: Thomas Weißschuh +--- + MAINTAINERS | 6 ++ + drivers/hid/Kconfig | 13 +++ + drivers/hid/Makefile | 1 + + drivers/hid/hid-apple-magic-backlight.c | 120 ++++++++++++++++++++++++ + 4 files changed, 140 insertions(+) + create mode 100644 drivers/hid/hid-apple-magic-backlight.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index dfc63d257..9148bda0a 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -9169,6 +9169,12 @@ L: linux-input@vger.kernel.org + S: Maintained + F: drivers/hid/hid-appletb-* + ++HID APPLE MAGIC BACKLIGHT DRIVER ++M: Orlando Chamberlain ++L: linux-input@vger.kernel.org ++S: Maintained ++F: drivers/hid/apple-magic-backlight.c ++ + HID CORE LAYER + M: Jiri Kosina + M: Benjamin Tissoires +diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig +index 4e238df87..83fbab6d4 100644 +--- a/drivers/hid/Kconfig ++++ b/drivers/hid/Kconfig +@@ -169,6 +169,19 @@ config HID_APPLETB_KBD + To compile this driver as a module, choose M here: the + module will be called hid-appletb-kbd. + ++config HID_APPLE_MAGIC_BACKLIGHT ++ tristate "Apple Magic Keyboard Backlight" ++ depends on USB_HID ++ depends on LEDS_CLASS ++ depends on NEW_LEDS ++ help ++ Say Y here if you want support for the keyboard backlight on Macs with ++ the magic keyboard (MacBookPro16,x and MacBookAir9,1). Note that this ++ driver is not for external magic keyboards. ++ ++ To compile this driver as a module, choose M here: the ++ module will be called hid-apple-magic-backlight. ++ + config HID_ASUS + tristate "Asus" + depends on USB_HID +diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile +index 5b60015fd..581f5e720 100644 +--- a/drivers/hid/Makefile ++++ b/drivers/hid/Makefile +@@ -31,6 +31,7 @@ obj-$(CONFIG_HID_APPLE) += hid-apple.o + obj-$(CONFIG_HID_APPLEIR) += hid-appleir.o + obj-$(CONFIG_HID_APPLETB_BL) += hid-appletb-bl.o + obj-$(CONFIG_HID_APPLETB_KBD) += hid-appletb-kbd.o ++obj-$(CONFIG_HID_APPLE_MAGIC_BACKLIGHT) += hid-apple-magic-backlight.o + obj-$(CONFIG_HID_CREATIVE_SB0540) += hid-creative-sb0540.o + obj-$(CONFIG_HID_ASUS) += hid-asus.o + obj-$(CONFIG_HID_AUREAL) += hid-aureal.o +diff --git a/drivers/hid/hid-apple-magic-backlight.c b/drivers/hid/hid-apple-magic-backlight.c +new file mode 100644 +index 000000000..f0fc02ff3 +--- /dev/null ++++ b/drivers/hid/hid-apple-magic-backlight.c +@@ -0,0 +1,120 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Apple Magic Keyboard Backlight Driver ++ * ++ * For Intel Macs with internal Magic Keyboard (MacBookPro16,1-4 and MacBookAir9,1) ++ * ++ * Copyright (c) 2022 Kerem Karabay ++ * Copyright (c) 2023 Orlando Chamberlain ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "hid-ids.h" ++ ++#define HID_USAGE_MAGIC_BL 0xff00000f ++ ++#define APPLE_MAGIC_REPORT_ID_POWER 3 ++#define APPLE_MAGIC_REPORT_ID_BRIGHTNESS 1 ++ ++struct apple_magic_backlight { ++ struct led_classdev cdev; ++ struct hid_report *brightness; ++ struct hid_report *power; ++}; ++ ++static void apple_magic_backlight_report_set(struct hid_report *rep, s32 value, u8 rate) ++{ ++ rep->field[0]->value[0] = value; ++ rep->field[1]->value[0] = 0x5e; /* Mimic Windows */ ++ rep->field[1]->value[0] |= rate << 8; ++ ++ hid_hw_request(rep->device, rep, HID_REQ_SET_REPORT); ++} ++ ++static void apple_magic_backlight_set(struct apple_magic_backlight *backlight, ++ int brightness, char rate) ++{ ++ apple_magic_backlight_report_set(backlight->power, brightness ? 1 : 0, rate); ++ if (brightness) ++ apple_magic_backlight_report_set(backlight->brightness, brightness, rate); ++} ++ ++static int apple_magic_backlight_led_set(struct led_classdev *led_cdev, ++ enum led_brightness brightness) ++{ ++ struct apple_magic_backlight *backlight = container_of(led_cdev, ++ struct apple_magic_backlight, cdev); ++ ++ apple_magic_backlight_set(backlight, brightness, 1); ++ return 0; ++} ++ ++static int apple_magic_backlight_probe(struct hid_device *hdev, ++ const struct hid_device_id *id) ++{ ++ struct apple_magic_backlight *backlight; ++ int rc; ++ ++ rc = hid_parse(hdev); ++ if (rc) ++ return rc; ++ ++ /* ++ * Ensure this usb endpoint is for the keyboard backlight, not touchbar ++ * backlight. ++ */ ++ if (hdev->collection[0].usage != HID_USAGE_MAGIC_BL) ++ return -ENODEV; ++ ++ backlight = devm_kzalloc(&hdev->dev, sizeof(*backlight), GFP_KERNEL); ++ if (!backlight) ++ return -ENOMEM; ++ ++ rc = hid_hw_start(hdev, HID_CONNECT_DEFAULT); ++ if (rc) ++ return rc; ++ ++ backlight->brightness = hid_register_report(hdev, HID_FEATURE_REPORT, ++ APPLE_MAGIC_REPORT_ID_BRIGHTNESS, 0); ++ backlight->power = hid_register_report(hdev, HID_FEATURE_REPORT, ++ APPLE_MAGIC_REPORT_ID_POWER, 0); ++ ++ if (!backlight->brightness || !backlight->power) { ++ rc = -ENODEV; ++ goto hw_stop; ++ } ++ ++ backlight->cdev.name = ":white:" LED_FUNCTION_KBD_BACKLIGHT; ++ backlight->cdev.max_brightness = backlight->brightness->field[0]->logical_maximum; ++ backlight->cdev.brightness_set_blocking = apple_magic_backlight_led_set; ++ ++ apple_magic_backlight_set(backlight, 0, 0); ++ ++ return devm_led_classdev_register(&hdev->dev, &backlight->cdev); ++ ++hw_stop: ++ hid_hw_stop(hdev); ++ return rc; ++} ++ ++static const struct hid_device_id apple_magic_backlight_hid_ids[] = { ++ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT) }, ++ { } ++}; ++MODULE_DEVICE_TABLE(hid, apple_magic_backlight_hid_ids); ++ ++static struct hid_driver apple_magic_backlight_hid_driver = { ++ .name = "hid-apple-magic-backlight", ++ .id_table = apple_magic_backlight_hid_ids, ++ .probe = apple_magic_backlight_probe, ++}; ++module_hid_driver(apple_magic_backlight_hid_driver); ++ ++MODULE_DESCRIPTION("MacBook Magic Keyboard Backlight"); ++MODULE_AUTHOR("Orlando Chamberlain "); ++MODULE_LICENSE("GPL"); +-- +2.39.2 + +From 12c7a3306a631a651464ef56318a218dc4cdb157 Mon Sep 17 00:00:00 2001 +From: Orlando Chamberlain +Date: Sat, 18 Feb 2023 23:05:05 +1100 +Subject: [PATCH 8/9] i915: 4 lane quirk for mbp15,1 + +Needed to use iGPU when dGPU was boot GPU + +Patch written by Kerem Karabay +--- + drivers/gpu/drm/i915/display/intel_ddi.c | 3 +++ + drivers/gpu/drm/i915/display/intel_quirks.c | 15 +++++++++++++++ + drivers/gpu/drm/i915/display/intel_quirks.h | 1 + + 3 files changed, 19 insertions(+) + +diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c +index 0f1ec2a98cc8..1ec67390f623 100644 +--- a/drivers/gpu/drm/i915/display/intel_ddi.c ++++ b/drivers/gpu/drm/i915/display/intel_ddi.c +@@ -4097,6 +4097,9 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port) + if (dig_port->saved_port_bits & DDI_A_4_LANES) + return false; + ++ if (intel_has_quirk(dev_priv, QUIRK_DDI_A_FORCE_4_LANES)) ++ return true; ++ + /* Broxton/Geminilake: Bspec says that DDI_A_4_LANES is the only + * supported configuration + */ +diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c +index 6e48d3bcdfec..a8c55e165b46 100644 +--- a/drivers/gpu/drm/i915/display/intel_quirks.c ++++ b/drivers/gpu/drm/i915/display/intel_quirks.c +@@ -59,6 +59,18 @@ static void quirk_increase_ddi_disabled_time(struct drm_i915_private *i915) + drm_info(&i915->drm, "Applying Increase DDI Disabled quirk\n"); + } + ++/* ++ * In some cases, the firmware might not set the lane count to 4 (for example, ++ * when booting in some dual GPU Macs with the dGPU as the default GPU), this ++ * quirk is used to force it as otherwise it might not be possible to compute a ++ * valid link configuration. ++ */ ++static void quirk_ddi_a_force_4_lanes(struct drm_i915_private *i915) ++{ ++ intel_set_quirk(i915, QUIRK_DDI_A_FORCE_4_LANES); ++ drm_info(&i915->drm, "Applying DDI A Forced 4 Lanes quirk\n"); ++} ++ + static void quirk_no_pps_backlight_power_hook(struct drm_i915_private *i915) + { + intel_set_quirk(i915, QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK); +@@ -199,6 +211,9 @@ static struct intel_quirk intel_quirks[] = { + { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, + /* HP Notebook - 14-r206nv */ + { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness }, ++ ++ /* Apple MacBookPro15,1 */ ++ { 0x3e9b, 0x106b, 0x0176, quirk_ddi_a_force_4_lanes }, + }; + + void intel_init_quirks(struct drm_i915_private *i915) +diff --git a/drivers/gpu/drm/i915/display/intel_quirks.h b/drivers/gpu/drm/i915/display/intel_quirks.h +index 10a4d163149f..78aacf1f6f5c 100644 +--- a/drivers/gpu/drm/i915/display/intel_quirks.h ++++ b/drivers/gpu/drm/i915/display/intel_quirks.h +@@ -17,6 +17,7 @@ enum intel_quirk_id { + QUIRK_INVERT_BRIGHTNESS, + QUIRK_LVDS_SSC_DISABLE, + QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK, ++ QUIRK_DDI_A_FORCE_4_LANES, + }; + + void intel_init_quirks(struct drm_i915_private *i915); +-- +2.39.1 + +From bd8e785c74e22978648ced004552eb9c137f1eb6 Mon Sep 17 00:00:00 2001 +From: Orlando Chamberlain +Date: Fri, 10 Feb 2023 22:45:00 +1100 +Subject: [PATCH 9/9] apple-gmux: allow switching to igpu at probe + +This means user don't need to set the gpu-power-prefs efivar to use the +igpu while runtime switching isn't working, so macOS will be unaffected. + +This isn't really upstreamable, what we want upstream is the ability to +switch at runtime (so both gpus need to be able to probe the eDP panel). + +Based off of work by Kerem Karabay +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +++ + drivers/gpu/vga/vga_switcheroo.c | 7 +------ + drivers/pci/vgaarb.c | 1 + + drivers/platform/x86/apple-gmux.c | 18 ++++++++++++++++++ + 4 files changed, 23 insertions(+), 6 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +index 81edf66dbea8..8f3daf28665b 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +@@ -2051,6 +2051,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, + int ret, retry = 0, i; + bool supports_atomic = false; + ++ if (vga_switcheroo_client_probe_defer(pdev)) ++ return -EPROBE_DEFER; ++ + /* skip devices which are owned by radeon */ + for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) { + if (amdgpu_unsupported_pciidlist[i] == pdev->device) +diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c +index 365e6ddbe90f..cf357cd3389d 100644 +--- a/drivers/gpu/vga/vga_switcheroo.c ++++ b/drivers/gpu/vga/vga_switcheroo.c +@@ -438,12 +438,7 @@ find_active_client(struct list_head *head) + bool vga_switcheroo_client_probe_defer(struct pci_dev *pdev) + { + if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) { +- /* +- * apple-gmux is needed on pre-retina MacBook Pro +- * to probe the panel if pdev is the inactive GPU. +- */ +- if (apple_gmux_present() && pdev != vga_default_device() && +- !vgasr_priv.handler_flags) ++ if (apple_gmux_present() && !vgasr_priv.handler_flags) + return true; + } + +diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c +index 5e6b1eb54c64..1f11701d37d1 100644 +--- a/drivers/pci/vgaarb.c ++++ b/drivers/pci/vgaarb.c +@@ -143,6 +143,7 @@ void vga_set_default_device(struct pci_dev *pdev) + pci_dev_put(vga_default); + vga_default = pci_dev_get(pdev); + } ++EXPORT_SYMBOL_GPL(vga_set_default_device); + + /** + * vga_remove_vgacon - deactivate VGA console +diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c +index 1417e230edbd..e69785af8e1d 100644 +--- a/drivers/platform/x86/apple-gmux.c ++++ b/drivers/platform/x86/apple-gmux.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -107,6 +108,10 @@ struct apple_gmux_config { + + # define MMIO_GMUX_MAX_BRIGHTNESS 0xffff + ++static bool force_igd; ++module_param(force_igd, bool, 0); ++MODULE_PARM_DESC(force_idg, "Switch gpu to igd on module load. Make sure that you have apple-set-os set up and the iGPU is in `lspci -s 00:02.0`. (default: false) (bool)"); ++ + static u8 gmux_pio_read8(struct apple_gmux_data *gmux_data, int port) + { + return inb(gmux_data->iostart + port); +@@ -945,6 +950,19 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) + gmux_enable_interrupts(gmux_data); + gmux_read_switch_state(gmux_data); + ++ if (force_igd) { ++ struct pci_dev *pdev; ++ ++ pdev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(2, 0)); ++ if (pdev) { ++ pr_info("Switching to IGD"); ++ gmux_switchto(VGA_SWITCHEROO_IGD); ++ vga_set_default_device(pdev); ++ } else { ++ pr_err("force_idg is true, but couldn't find iGPU at 00:02.0! Is apple-set-os working?"); ++ } ++ } ++ + /* + * Retina MacBook Pros cannot switch the panel's AUX separately + * and need eDP pre-calibration. They are distinguishable from +-- +2.43.0 + +From 6adb501c697cd0e3246e75237ee8e43eb5a92cc3 Mon Sep 17 00:00:00 2001 +From: Kerem Karabay +Date: Thu, 23 Nov 2023 18:58:51 +0530 +Subject: [PATCH] efi: libstub: add support for the apple_set_os protocol + +On dual GPU EFI Macs, the EFI stub needs to report that it is booting +macOS in order to prevent the firmware from disabling the iGPU. + +See also this patch for GRUB by Andreas Heider : +https://lists.gnu.org/archive/html/grub-devel/2013-12/msg00442.html +--- + .../admin-guide/kernel-parameters.txt | 2 ++ + .../firmware/efi/libstub/efi-stub-helper.c | 3 +++ + drivers/firmware/efi/libstub/efistub.h | 14 ++++++++++ + drivers/firmware/efi/libstub/x86-stub.c | 27 +++++++++++++++++++ + include/linux/efi.h | 1 + + 5 files changed, 47 insertions(+) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 41644336e..cbd4697a5 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -399,6 +399,8 @@ + useful so that a dump capture kernel won't be + shot down by NMI + ++ apple_set_os [KNL] Report that macOS is being booted to the firmware ++ + autoconf= [IPV6] + See Documentation/networking/ipv6.rst. + +diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c +index bfa30625f..3d99acc1a 100644 +--- a/drivers/firmware/efi/libstub/efi-stub-helper.c ++++ b/drivers/firmware/efi/libstub/efi-stub-helper.c +@@ -19,6 +19,7 @@ + bool efi_nochunk; + bool efi_nokaslr = !IS_ENABLED(CONFIG_RANDOMIZE_BASE); + bool efi_novamap; ++bool efi_apple_set_os; + + static bool efi_noinitrd; + static bool efi_nosoftreserve; +@@ -73,6 +74,8 @@ efi_status_t efi_parse_options(char const *cmdline) + efi_loglevel = CONSOLE_LOGLEVEL_QUIET; + } else if (!strcmp(param, "noinitrd")) { + efi_noinitrd = true; ++ } else if (!strcmp(param, "apple_set_os")) { ++ efi_apple_set_os = true; + } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) { + efi_no5lvl = true; + } else if (!strcmp(param, "efi") && val) { +diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h +index 212687c30..21b414d09 100644 +--- a/drivers/firmware/efi/libstub/efistub.h ++++ b/drivers/firmware/efi/libstub/efistub.h +@@ -38,6 +38,7 @@ extern bool efi_nochunk; + extern bool efi_nokaslr; + extern int efi_loglevel; + extern bool efi_novamap; ++extern bool efi_apple_set_os; + + extern const efi_system_table_t *efi_system_table; + +@@ -825,6 +826,19 @@ union apple_properties_protocol { + } mixed_mode; + }; + ++typedef struct apple_set_os_protocol apple_set_os_protocol_t; ++ ++struct apple_set_os_protocol { ++ u64 version; ++ efi_status_t (__efiapi *set_os_version) (const char *); ++ efi_status_t (__efiapi *set_os_vendor) (const char *); ++ struct { ++ u32 version; ++ u32 set_os_version; ++ u32 set_os_vendor; ++ } mixed_mode; ++}; ++ + typedef u32 efi_tcg2_event_log_format; + + #define INITRD_EVENT_TAG_ID 0x8F3B22ECU +diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c +index 70b325a2f..2131f8543 100644 +--- a/drivers/firmware/efi/libstub/x86-stub.c ++++ b/drivers/firmware/efi/libstub/x86-stub.c +@@ -223,6 +223,30 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) + } + } + ++static void apple_set_os(void) ++{ ++ efi_guid_t guid = APPLE_SET_OS_PROTOCOL_GUID; ++ apple_set_os_protocol_t *set_os; ++ efi_status_t status; ++ ++ status = efi_bs_call(locate_protocol, &guid, NULL, (void **)&set_os); ++ if (status != EFI_SUCCESS) ++ return; ++ ++ if (efi_table_attr(set_os, version) >= 2) { ++ status = efi_fn_call(set_os, set_os_vendor, "Apple Inc."); ++ if (status != EFI_SUCCESS) ++ efi_err("Failed to set OS vendor via apple_set_os\n"); ++ } ++ ++ /* The version being set doesn't seem to matter */ ++ if (efi_table_attr(set_os, version) > 0) { ++ status = efi_fn_call(set_os, set_os_version, "Mac OS X 10.9"); ++ if (status != EFI_SUCCESS) ++ efi_err("Failed to set OS version via apple_set_os\n"); ++ } ++} ++ + void efi_adjust_memory_range_protection(unsigned long start, + unsigned long size) + { +@@ -321,6 +345,9 @@ static void setup_quirks(struct boot_params *boot_params) + if (IS_ENABLED(CONFIG_APPLE_PROPERTIES) && + !memcmp(efistub_fw_vendor(), apple, sizeof(apple))) + retrieve_apple_device_properties(boot_params); ++ ++ if (efi_apple_set_os) ++ apple_set_os(); + } + + /* +diff --git a/include/linux/efi.h b/include/linux/efi.h +index 80b21d1c6..f1e58e027 100644 +--- a/include/linux/efi.h ++++ b/include/linux/efi.h +@@ -387,6 +387,7 @@ void efi_native_runtime_setup(void); + #define EFI_MEMORY_ATTRIBUTES_TABLE_GUID EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20) + #define EFI_CONSOLE_OUT_DEVICE_GUID EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) + #define APPLE_PROPERTIES_PROTOCOL_GUID EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb, 0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0) ++#define APPLE_SET_OS_PROTOCOL_GUID EFI_GUID(0xc5c5da95, 0x7d5c, 0x45e6, 0xb2, 0xf1, 0x3f, 0xd5, 0x2b, 0xb1, 0x00, 0x77) + #define EFI_TCG2_PROTOCOL_GUID EFI_GUID(0x607f766c, 0x7455, 0x42be, 0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f) + #define EFI_LOAD_FILE_PROTOCOL_GUID EFI_GUID(0x56ec3091, 0x954c, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) + #define EFI_LOAD_FILE2_PROTOCOL_GUID EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e, 0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d) +-- +2.34.1 + +From 09dd6c563cd73d72e917de07e8d59358c41e051d Mon Sep 17 00:00:00 2001 +From: Paul Pawlowski +Date: Sun, 17 Nov 2019 23:12:55 +0100 +Subject: [PATCH 1/6] applesmc: convert static structures to drvdata + +All static data structures have been moved to an applesmc_device struct, +which is then associated with the platform device. +This change is intended to ease the migration to an acpi_device, where +static data would preferably be avoided. + +Signed-off-by: Aun-Ali Zaidi +--- + drivers/hwmon/applesmc.c | 540 +++++++++++++++++++++++---------------- + 1 file changed, 319 insertions(+), 221 deletions(-) + +diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c +index 79b498f816fe..62211b590a61 100644 +--- a/drivers/hwmon/applesmc.c ++++ b/drivers/hwmon/applesmc.c +@@ -6,6 +6,7 @@ + * + * Copyright (C) 2007 Nicolas Boichat + * Copyright (C) 2010 Henrik Rydberg ++ * Copyright (C) 2019 Paul Pawlowski + * + * Based on hdaps.c driver: + * Copyright (C) 2005 Robert Love +@@ -119,7 +120,7 @@ struct applesmc_entry { + }; + + /* Register lookup and registers common to all SMCs */ +-static struct applesmc_registers { ++struct applesmc_registers { + struct mutex mutex; /* register read/write mutex */ + unsigned int key_count; /* number of SMC registers */ + unsigned int fan_count; /* number of fans */ +@@ -133,26 +134,32 @@ static struct applesmc_registers { + bool init_complete; /* true when fully initialized */ + struct applesmc_entry *cache; /* cached key entries */ + const char **index; /* temperature key index */ +-} smcreg = { +- .mutex = __MUTEX_INITIALIZER(smcreg.mutex), + }; + +-static const int debug; +-static struct platform_device *pdev; +-static s16 rest_x; +-static s16 rest_y; +-static u8 backlight_state[2]; ++struct applesmc_device { ++ struct platform_device *dev; ++ struct applesmc_registers reg; + +-static struct device *hwmon_dev; +-static struct input_dev *applesmc_idev; ++ s16 rest_x; ++ s16 rest_y; + +-/* +- * Last index written to key_at_index sysfs file, and value to use for all other +- * key_at_index_* sysfs files. +- */ +-static unsigned int key_at_index; ++ u8 backlight_state[2]; ++ ++ struct device *hwmon_dev; ++ struct input_dev *idev; ++ ++ /* ++ * Last index written to key_at_index sysfs file, and value to use for all other ++ * key_at_index_* sysfs files. ++ */ ++ unsigned int key_at_index; ++ ++ struct workqueue_struct *backlight_wq; ++ struct work_struct backlight_work; ++ struct led_classdev backlight_dev; ++}; + +-static struct workqueue_struct *applesmc_led_wq; ++static const int debug; + + /* + * Wait for specific status bits with a mask on the SMC. +@@ -338,36 +345,37 @@ static int read_register_count(unsigned int *count) + * All functions below are concurrency safe - callers should NOT hold lock. + */ + +-static int applesmc_read_entry(const struct applesmc_entry *entry, +- u8 *buf, u8 len) ++static int applesmc_read_entry(struct applesmc_device *smc, ++ const struct applesmc_entry *entry, u8 *buf, u8 len) + { + int ret; + + if (entry->len != len) + return -EINVAL; +- mutex_lock(&smcreg.mutex); ++ mutex_lock(&smc->reg.mutex); + ret = read_smc(APPLESMC_READ_CMD, entry->key, buf, len); +- mutex_unlock(&smcreg.mutex); ++ mutex_unlock(&smc->reg.mutex); + + return ret; + } + +-static int applesmc_write_entry(const struct applesmc_entry *entry, +- const u8 *buf, u8 len) ++static int applesmc_write_entry(struct applesmc_device *smc, ++ const struct applesmc_entry *entry, const u8 *buf, u8 len) + { + int ret; + + if (entry->len != len) + return -EINVAL; +- mutex_lock(&smcreg.mutex); ++ mutex_lock(&smc->reg.mutex); + ret = write_smc(APPLESMC_WRITE_CMD, entry->key, buf, len); +- mutex_unlock(&smcreg.mutex); ++ mutex_unlock(&smc->reg.mutex); + return ret; + } + +-static const struct applesmc_entry *applesmc_get_entry_by_index(int index) ++static const struct applesmc_entry *applesmc_get_entry_by_index( ++ struct applesmc_device *smc, int index) + { +- struct applesmc_entry *cache = &smcreg.cache[index]; ++ struct applesmc_entry *cache = &smc->reg.cache[index]; + u8 key[4], info[6]; + __be32 be; + int ret = 0; +@@ -375,7 +383,7 @@ static const struct applesmc_entry *applesmc_get_entry_by_index(int index) + if (cache->valid) + return cache; + +- mutex_lock(&smcreg.mutex); ++ mutex_lock(&smc->reg.mutex); + + if (cache->valid) + goto out; +@@ -394,20 +402,21 @@ static const struct applesmc_entry *applesmc_get_entry_by_index(int index) + cache->valid = true; + + out: +- mutex_unlock(&smcreg.mutex); ++ mutex_unlock(&smc->reg.mutex); + if (ret) + return ERR_PTR(ret); + return cache; + } + +-static int applesmc_get_lower_bound(unsigned int *lo, const char *key) ++static int applesmc_get_lower_bound(struct applesmc_device *smc, ++ unsigned int *lo, const char *key) + { +- int begin = 0, end = smcreg.key_count; ++ int begin = 0, end = smc->reg.key_count; + const struct applesmc_entry *entry; + + while (begin != end) { + int middle = begin + (end - begin) / 2; +- entry = applesmc_get_entry_by_index(middle); ++ entry = applesmc_get_entry_by_index(smc, middle); + if (IS_ERR(entry)) { + *lo = 0; + return PTR_ERR(entry); +@@ -422,16 +431,17 @@ static int applesmc_get_lower_bound(unsigned int *lo, const char *key) + return 0; + } + +-static int applesmc_get_upper_bound(unsigned int *hi, const char *key) ++static int applesmc_get_upper_bound(struct applesmc_device *smc, ++ unsigned int *hi, const char *key) + { +- int begin = 0, end = smcreg.key_count; ++ int begin = 0, end = smc->reg.key_count; + const struct applesmc_entry *entry; + + while (begin != end) { + int middle = begin + (end - begin) / 2; +- entry = applesmc_get_entry_by_index(middle); ++ entry = applesmc_get_entry_by_index(smc, middle); + if (IS_ERR(entry)) { +- *hi = smcreg.key_count; ++ *hi = smc->reg.key_count; + return PTR_ERR(entry); + } + if (strcmp(key, entry->key) < 0) +@@ -444,50 +454,54 @@ static int applesmc_get_upper_bound(unsigned int *hi, const char *key) + return 0; + } + +-static const struct applesmc_entry *applesmc_get_entry_by_key(const char *key) ++static const struct applesmc_entry *applesmc_get_entry_by_key( ++ struct applesmc_device *smc, const char *key) + { + int begin, end; + int ret; + +- ret = applesmc_get_lower_bound(&begin, key); ++ ret = applesmc_get_lower_bound(smc, &begin, key); + if (ret) + return ERR_PTR(ret); +- ret = applesmc_get_upper_bound(&end, key); ++ ret = applesmc_get_upper_bound(smc, &end, key); + if (ret) + return ERR_PTR(ret); + if (end - begin != 1) + return ERR_PTR(-EINVAL); + +- return applesmc_get_entry_by_index(begin); ++ return applesmc_get_entry_by_index(smc, begin); + } + +-static int applesmc_read_key(const char *key, u8 *buffer, u8 len) ++static int applesmc_read_key(struct applesmc_device *smc, ++ const char *key, u8 *buffer, u8 len) + { + const struct applesmc_entry *entry; + +- entry = applesmc_get_entry_by_key(key); ++ entry = applesmc_get_entry_by_key(smc, key); + if (IS_ERR(entry)) + return PTR_ERR(entry); + +- return applesmc_read_entry(entry, buffer, len); ++ return applesmc_read_entry(smc, entry, buffer, len); + } + +-static int applesmc_write_key(const char *key, const u8 *buffer, u8 len) ++static int applesmc_write_key(struct applesmc_device *smc, ++ const char *key, const u8 *buffer, u8 len) + { + const struct applesmc_entry *entry; + +- entry = applesmc_get_entry_by_key(key); ++ entry = applesmc_get_entry_by_key(smc, key); + if (IS_ERR(entry)) + return PTR_ERR(entry); + +- return applesmc_write_entry(entry, buffer, len); ++ return applesmc_write_entry(smc, entry, buffer, len); + } + +-static int applesmc_has_key(const char *key, bool *value) ++static int applesmc_has_key(struct applesmc_device *smc, ++ const char *key, bool *value) + { + const struct applesmc_entry *entry; + +- entry = applesmc_get_entry_by_key(key); ++ entry = applesmc_get_entry_by_key(smc, key); + if (IS_ERR(entry) && PTR_ERR(entry) != -EINVAL) + return PTR_ERR(entry); + +@@ -498,12 +512,13 @@ static int applesmc_has_key(const char *key, bool *value) + /* + * applesmc_read_s16 - Read 16-bit signed big endian register + */ +-static int applesmc_read_s16(const char *key, s16 *value) ++static int applesmc_read_s16(struct applesmc_device *smc, ++ const char *key, s16 *value) + { + u8 buffer[2]; + int ret; + +- ret = applesmc_read_key(key, buffer, 2); ++ ret = applesmc_read_key(smc, key, buffer, 2); + if (ret) + return ret; + +@@ -514,28 +529,29 @@ static int applesmc_read_s16(const char *key, s16 *value) + /* + * applesmc_device_init - initialize the accelerometer. Can sleep. + */ +-static void applesmc_device_init(void) ++static void applesmc_device_init(struct applesmc_device *smc) + { + int total; + u8 buffer[2]; + +- if (!smcreg.has_accelerometer) ++ if (!smc->reg.has_accelerometer) + return; + + for (total = INIT_TIMEOUT_MSECS; total > 0; total -= INIT_WAIT_MSECS) { +- if (!applesmc_read_key(MOTION_SENSOR_KEY, buffer, 2) && ++ if (!applesmc_read_key(smc, MOTION_SENSOR_KEY, buffer, 2) && + (buffer[0] != 0x00 || buffer[1] != 0x00)) + return; + buffer[0] = 0xe0; + buffer[1] = 0x00; +- applesmc_write_key(MOTION_SENSOR_KEY, buffer, 2); ++ applesmc_write_key(smc, MOTION_SENSOR_KEY, buffer, 2); + msleep(INIT_WAIT_MSECS); + } + + pr_warn("failed to init the device\n"); + } + +-static int applesmc_init_index(struct applesmc_registers *s) ++static int applesmc_init_index(struct applesmc_device *smc, ++ struct applesmc_registers *s) + { + const struct applesmc_entry *entry; + unsigned int i; +@@ -548,7 +564,7 @@ static int applesmc_init_index(struct applesmc_registers *s) + return -ENOMEM; + + for (i = s->temp_begin; i < s->temp_end; i++) { +- entry = applesmc_get_entry_by_index(i); ++ entry = applesmc_get_entry_by_index(smc, i); + if (IS_ERR(entry)) + continue; + if (strcmp(entry->type, TEMP_SENSOR_TYPE)) +@@ -562,9 +578,9 @@ static int applesmc_init_index(struct applesmc_registers *s) + /* + * applesmc_init_smcreg_try - Try to initialize register cache. Idempotent. + */ +-static int applesmc_init_smcreg_try(void) ++static int applesmc_init_smcreg_try(struct applesmc_device *smc) + { +- struct applesmc_registers *s = &smcreg; ++ struct applesmc_registers *s = &smc->reg; + bool left_light_sensor = false, right_light_sensor = false; + unsigned int count; + u8 tmp[1]; +@@ -590,35 +606,35 @@ static int applesmc_init_smcreg_try(void) + if (!s->cache) + return -ENOMEM; + +- ret = applesmc_read_key(FANS_COUNT, tmp, 1); ++ ret = applesmc_read_key(smc, FANS_COUNT, tmp, 1); + if (ret) + return ret; + s->fan_count = tmp[0]; + if (s->fan_count > 10) + s->fan_count = 10; + +- ret = applesmc_get_lower_bound(&s->temp_begin, "T"); ++ ret = applesmc_get_lower_bound(smc, &s->temp_begin, "T"); + if (ret) + return ret; +- ret = applesmc_get_lower_bound(&s->temp_end, "U"); ++ ret = applesmc_get_lower_bound(smc, &s->temp_end, "U"); + if (ret) + return ret; + s->temp_count = s->temp_end - s->temp_begin; + +- ret = applesmc_init_index(s); ++ ret = applesmc_init_index(smc, s); + if (ret) + return ret; + +- ret = applesmc_has_key(LIGHT_SENSOR_LEFT_KEY, &left_light_sensor); ++ ret = applesmc_has_key(smc, LIGHT_SENSOR_LEFT_KEY, &left_light_sensor); + if (ret) + return ret; +- ret = applesmc_has_key(LIGHT_SENSOR_RIGHT_KEY, &right_light_sensor); ++ ret = applesmc_has_key(smc, LIGHT_SENSOR_RIGHT_KEY, &right_light_sensor); + if (ret) + return ret; +- ret = applesmc_has_key(MOTION_SENSOR_KEY, &s->has_accelerometer); ++ ret = applesmc_has_key(smc, MOTION_SENSOR_KEY, &s->has_accelerometer); + if (ret) + return ret; +- ret = applesmc_has_key(BACKLIGHT_KEY, &s->has_key_backlight); ++ ret = applesmc_has_key(smc, BACKLIGHT_KEY, &s->has_key_backlight); + if (ret) + return ret; + +@@ -634,13 +650,13 @@ static int applesmc_init_smcreg_try(void) + return 0; + } + +-static void applesmc_destroy_smcreg(void) ++static void applesmc_destroy_smcreg(struct applesmc_device *smc) + { +- kfree(smcreg.index); +- smcreg.index = NULL; +- kfree(smcreg.cache); +- smcreg.cache = NULL; +- smcreg.init_complete = false; ++ kfree(smc->reg.index); ++ smc->reg.index = NULL; ++ kfree(smc->reg.cache); ++ smc->reg.cache = NULL; ++ smc->reg.init_complete = false; + } + + /* +@@ -649,12 +665,12 @@ static void applesmc_destroy_smcreg(void) + * Retries until initialization is successful, or the operation times out. + * + */ +-static int applesmc_init_smcreg(void) ++static int applesmc_init_smcreg(struct applesmc_device *smc) + { + int ms, ret; + + for (ms = 0; ms < INIT_TIMEOUT_MSECS; ms += INIT_WAIT_MSECS) { +- ret = applesmc_init_smcreg_try(); ++ ret = applesmc_init_smcreg_try(smc); + if (!ret) { + if (ms) + pr_info("init_smcreg() took %d ms\n", ms); +@@ -663,21 +679,58 @@ static int applesmc_init_smcreg(void) + msleep(INIT_WAIT_MSECS); + } + +- applesmc_destroy_smcreg(); ++ applesmc_destroy_smcreg(smc); + + return ret; + } + + /* Device model stuff */ ++static int applesmc_create_modules(struct applesmc_device *smc); ++static void applesmc_destroy_modules(struct applesmc_device *smc); + static int applesmc_probe(struct platform_device *dev) + { ++ struct applesmc_device *smc; + int ret; + +- ret = applesmc_init_smcreg(); ++ smc = kzalloc(sizeof(struct applesmc_device), GFP_KERNEL); ++ if (!smc) ++ return -ENOMEM; ++ smc->dev = dev; ++ mutex_init(&smc->reg.mutex); ++ ++ platform_set_drvdata(dev, smc); ++ ++ ret = applesmc_init_smcreg(smc); + if (ret) +- return ret; ++ goto out_mem; ++ ++ applesmc_device_init(smc); ++ ++ ret = applesmc_create_modules(smc); ++ if (ret) ++ goto out_reg; ++ ++ return 0; ++ ++out_reg: ++ applesmc_destroy_smcreg(smc); ++out_mem: ++ platform_set_drvdata(dev, NULL); ++ mutex_destroy(&smc->reg.mutex); ++ kfree(smc); + +- applesmc_device_init(); ++ return ret; ++} ++ ++static int applesmc_remove(struct platform_device *dev) ++{ ++ struct applesmc_device *smc = platform_get_drvdata(dev); ++ ++ applesmc_destroy_modules(smc); ++ applesmc_destroy_smcreg(smc); ++ ++ mutex_destroy(&smc->reg.mutex); ++ kfree(smc); + + return 0; + } +@@ -685,15 +738,21 @@ static int applesmc_probe(struct platform_device *dev) + /* Synchronize device with memorized backlight state */ + static int applesmc_pm_resume(struct device *dev) + { +- if (smcreg.has_key_backlight) +- applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2); ++ struct applesmc_device *smc = dev_get_drvdata(dev); ++ ++ if (smc->reg.has_key_backlight) ++ applesmc_write_key(smc, BACKLIGHT_KEY, smc->backlight_state, 2); ++ + return 0; + } + + /* Reinitialize device on resume from hibernation */ + static int applesmc_pm_restore(struct device *dev) + { +- applesmc_device_init(); ++ struct applesmc_device *smc = dev_get_drvdata(dev); ++ ++ applesmc_device_init(smc); ++ + return applesmc_pm_resume(dev); + } + +@@ -704,6 +763,7 @@ static const struct dev_pm_ops applesmc_pm_ops = { + + static struct platform_driver applesmc_driver = { + .probe = applesmc_probe, ++ .remove = applesmc_remove, + .driver = { + .name = "applesmc", + .pm = &applesmc_pm_ops, +@@ -714,25 +774,26 @@ static struct platform_driver applesmc_driver = { + * applesmc_calibrate - Set our "resting" values. Callers must + * hold applesmc_lock. + */ +-static void applesmc_calibrate(void) ++static void applesmc_calibrate(struct applesmc_device *smc) + { +- applesmc_read_s16(MOTION_SENSOR_X_KEY, &rest_x); +- applesmc_read_s16(MOTION_SENSOR_Y_KEY, &rest_y); +- rest_x = -rest_x; ++ applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &smc->rest_x); ++ applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &smc->rest_y); ++ smc->rest_x = -smc->rest_x; + } + + static void applesmc_idev_poll(struct input_dev *idev) + { ++ struct applesmc_device *smc = dev_get_drvdata(&idev->dev); + s16 x, y; + +- if (applesmc_read_s16(MOTION_SENSOR_X_KEY, &x)) ++ if (applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &x)) + return; +- if (applesmc_read_s16(MOTION_SENSOR_Y_KEY, &y)) ++ if (applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &y)) + return; + + x = -x; +- input_report_abs(idev, ABS_X, x - rest_x); +- input_report_abs(idev, ABS_Y, y - rest_y); ++ input_report_abs(idev, ABS_X, x - smc->rest_x); ++ input_report_abs(idev, ABS_Y, y - smc->rest_y); + input_sync(idev); + } + +@@ -747,16 +808,17 @@ static ssize_t applesmc_name_show(struct device *dev, + static ssize_t applesmc_position_show(struct device *dev, + struct device_attribute *attr, char *buf) + { ++ struct applesmc_device *smc = dev_get_drvdata(dev); + int ret; + s16 x, y, z; + +- ret = applesmc_read_s16(MOTION_SENSOR_X_KEY, &x); ++ ret = applesmc_read_s16(smc, MOTION_SENSOR_X_KEY, &x); + if (ret) + goto out; +- ret = applesmc_read_s16(MOTION_SENSOR_Y_KEY, &y); ++ ret = applesmc_read_s16(smc, MOTION_SENSOR_Y_KEY, &y); + if (ret) + goto out; +- ret = applesmc_read_s16(MOTION_SENSOR_Z_KEY, &z); ++ ret = applesmc_read_s16(smc, MOTION_SENSOR_Z_KEY, &z); + if (ret) + goto out; + +@@ -770,6 +832,7 @@ static ssize_t applesmc_position_show(struct device *dev, + static ssize_t applesmc_light_show(struct device *dev, + struct device_attribute *attr, char *sysfsbuf) + { ++ struct applesmc_device *smc = dev_get_drvdata(dev); + const struct applesmc_entry *entry; + static int data_length; + int ret; +@@ -777,7 +840,7 @@ static ssize_t applesmc_light_show(struct device *dev, + u8 buffer[10]; + + if (!data_length) { +- entry = applesmc_get_entry_by_key(LIGHT_SENSOR_LEFT_KEY); ++ entry = applesmc_get_entry_by_key(smc, LIGHT_SENSOR_LEFT_KEY); + if (IS_ERR(entry)) + return PTR_ERR(entry); + if (entry->len > 10) +@@ -786,7 +849,7 @@ static ssize_t applesmc_light_show(struct device *dev, + pr_info("light sensor data length set to %d\n", data_length); + } + +- ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, data_length); ++ ret = applesmc_read_key(smc, LIGHT_SENSOR_LEFT_KEY, buffer, data_length); + if (ret) + goto out; + /* newer macbooks report a single 10-bit bigendian value */ +@@ -796,7 +859,7 @@ static ssize_t applesmc_light_show(struct device *dev, + } + left = buffer[2]; + +- ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, data_length); ++ ret = applesmc_read_key(smc, LIGHT_SENSOR_RIGHT_KEY, buffer, data_length); + if (ret) + goto out; + right = buffer[2]; +@@ -812,7 +875,8 @@ static ssize_t applesmc_light_show(struct device *dev, + static ssize_t applesmc_show_sensor_label(struct device *dev, + struct device_attribute *devattr, char *sysfsbuf) + { +- const char *key = smcreg.index[to_index(devattr)]; ++ struct applesmc_device *smc = dev_get_drvdata(dev); ++ const char *key = smc->reg.index[to_index(devattr)]; + + return sysfs_emit(sysfsbuf, "%s\n", key); + } +@@ -821,12 +885,13 @@ static ssize_t applesmc_show_sensor_label(struct device *dev, + static ssize_t applesmc_show_temperature(struct device *dev, + struct device_attribute *devattr, char *sysfsbuf) + { +- const char *key = smcreg.index[to_index(devattr)]; ++ struct applesmc_device *smc = dev_get_drvdata(dev); ++ const char *key = smc->reg.index[to_index(devattr)]; + int ret; + s16 value; + int temp; + +- ret = applesmc_read_s16(key, &value); ++ ret = applesmc_read_s16(smc, key, &value); + if (ret) + return ret; + +@@ -838,6 +903,7 @@ static ssize_t applesmc_show_temperature(struct device *dev, + static ssize_t applesmc_show_fan_speed(struct device *dev, + struct device_attribute *attr, char *sysfsbuf) + { ++ struct applesmc_device *smc = dev_get_drvdata(dev); + int ret; + unsigned int speed = 0; + char newkey[5]; +@@ -846,7 +912,7 @@ static ssize_t applesmc_show_fan_speed(struct device *dev, + scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)], + to_index(attr)); + +- ret = applesmc_read_key(newkey, buffer, 2); ++ ret = applesmc_read_key(smc, newkey, buffer, 2); + if (ret) + return ret; + +@@ -858,6 +924,7 @@ static ssize_t applesmc_store_fan_speed(struct device *dev, + struct device_attribute *attr, + const char *sysfsbuf, size_t count) + { ++ struct applesmc_device *smc = dev_get_drvdata(dev); + int ret; + unsigned long speed; + char newkey[5]; +@@ -871,7 +938,7 @@ static ssize_t applesmc_store_fan_speed(struct device *dev, + + buffer[0] = (speed >> 6) & 0xff; + buffer[1] = (speed << 2) & 0xff; +- ret = applesmc_write_key(newkey, buffer, 2); ++ ret = applesmc_write_key(smc, newkey, buffer, 2); + + if (ret) + return ret; +@@ -882,11 +949,12 @@ static ssize_t applesmc_store_fan_speed(struct device *dev, + static ssize_t applesmc_show_fan_manual(struct device *dev, + struct device_attribute *attr, char *sysfsbuf) + { ++ struct applesmc_device *smc = dev_get_drvdata(dev); + int ret; + u16 manual = 0; + u8 buffer[2]; + +- ret = applesmc_read_key(FANS_MANUAL, buffer, 2); ++ ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2); + if (ret) + return ret; + +@@ -898,6 +966,7 @@ static ssize_t applesmc_store_fan_manual(struct device *dev, + struct device_attribute *attr, + const char *sysfsbuf, size_t count) + { ++ struct applesmc_device *smc = dev_get_drvdata(dev); + int ret; + u8 buffer[2]; + unsigned long input; +@@ -906,7 +975,7 @@ static ssize_t applesmc_store_fan_manual(struct device *dev, + if (kstrtoul(sysfsbuf, 10, &input) < 0) + return -EINVAL; + +- ret = applesmc_read_key(FANS_MANUAL, buffer, 2); ++ ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2); + if (ret) + goto out; + +@@ -920,7 +989,7 @@ static ssize_t applesmc_store_fan_manual(struct device *dev, + buffer[0] = (val >> 8) & 0xFF; + buffer[1] = val & 0xFF; + +- ret = applesmc_write_key(FANS_MANUAL, buffer, 2); ++ ret = applesmc_write_key(smc, FANS_MANUAL, buffer, 2); + + out: + if (ret) +@@ -932,13 +1001,14 @@ static ssize_t applesmc_store_fan_manual(struct device *dev, + static ssize_t applesmc_show_fan_position(struct device *dev, + struct device_attribute *attr, char *sysfsbuf) + { ++ struct applesmc_device *smc = dev_get_drvdata(dev); + int ret; + char newkey[5]; + u8 buffer[17]; + + scnprintf(newkey, sizeof(newkey), FAN_ID_FMT, to_index(attr)); + +- ret = applesmc_read_key(newkey, buffer, 16); ++ ret = applesmc_read_key(smc, newkey, buffer, 16); + buffer[16] = 0; + + if (ret) +@@ -950,30 +1020,36 @@ static ssize_t applesmc_show_fan_position(struct device *dev, + static ssize_t applesmc_calibrate_show(struct device *dev, + struct device_attribute *attr, char *sysfsbuf) + { +- return sysfs_emit(sysfsbuf, "(%d,%d)\n", rest_x, rest_y); ++ struct applesmc_device *smc = dev_get_drvdata(dev); ++ ++ return sysfs_emit(sysfsbuf, "(%d,%d)\n", smc->rest_x, smc->rest_y); + } + + static ssize_t applesmc_calibrate_store(struct device *dev, + struct device_attribute *attr, const char *sysfsbuf, size_t count) + { +- applesmc_calibrate(); ++ struct applesmc_device *smc = dev_get_drvdata(dev); ++ ++ applesmc_calibrate(smc); + + return count; + } + + static void applesmc_backlight_set(struct work_struct *work) + { +- applesmc_write_key(BACKLIGHT_KEY, backlight_state, 2); ++ struct applesmc_device *smc = container_of(work, struct applesmc_device, backlight_work); ++ ++ applesmc_write_key(smc, BACKLIGHT_KEY, smc->backlight_state, 2); + } +-static DECLARE_WORK(backlight_work, &applesmc_backlight_set); + + static void applesmc_brightness_set(struct led_classdev *led_cdev, + enum led_brightness value) + { ++ struct applesmc_device *smc = dev_get_drvdata(led_cdev->dev); + int ret; + +- backlight_state[0] = value; +- ret = queue_work(applesmc_led_wq, &backlight_work); ++ smc->backlight_state[0] = value; ++ ret = queue_work(smc->backlight_wq, &smc->backlight_work); + + if (debug && (!ret)) + dev_dbg(led_cdev->dev, "work was already on the queue.\n"); +@@ -982,11 +1058,12 @@ static void applesmc_brightness_set(struct led_classdev *led_cdev, + static ssize_t applesmc_key_count_show(struct device *dev, + struct device_attribute *attr, char *sysfsbuf) + { ++ struct applesmc_device *smc = dev_get_drvdata(dev); + int ret; + u8 buffer[4]; + u32 count; + +- ret = applesmc_read_key(KEY_COUNT_KEY, buffer, 4); ++ ret = applesmc_read_key(smc, KEY_COUNT_KEY, buffer, 4); + if (ret) + return ret; + +@@ -998,13 +1075,14 @@ static ssize_t applesmc_key_count_show(struct device *dev, + static ssize_t applesmc_key_at_index_read_show(struct device *dev, + struct device_attribute *attr, char *sysfsbuf) + { ++ struct applesmc_device *smc = dev_get_drvdata(dev); + const struct applesmc_entry *entry; + int ret; + +- entry = applesmc_get_entry_by_index(key_at_index); ++ entry = applesmc_get_entry_by_index(smc, smc->key_at_index); + if (IS_ERR(entry)) + return PTR_ERR(entry); +- ret = applesmc_read_entry(entry, sysfsbuf, entry->len); ++ ret = applesmc_read_entry(smc, entry, sysfsbuf, entry->len); + if (ret) + return ret; + +@@ -1014,9 +1092,10 @@ static ssize_t applesmc_key_at_index_read_show(struct device *dev, + static ssize_t applesmc_key_at_index_data_length_show(struct device *dev, + struct device_attribute *attr, char *sysfsbuf) + { ++ struct applesmc_device *smc = dev_get_drvdata(dev); + const struct applesmc_entry *entry; + +- entry = applesmc_get_entry_by_index(key_at_index); ++ entry = applesmc_get_entry_by_index(smc, smc->key_at_index); + if (IS_ERR(entry)) + return PTR_ERR(entry); + +@@ -1026,9 +1105,10 @@ static ssize_t applesmc_key_at_index_data_length_show(struct device *dev, + static ssize_t applesmc_key_at_index_type_show(struct device *dev, + struct device_attribute *attr, char *sysfsbuf) + { ++ struct applesmc_device *smc = dev_get_drvdata(dev); + const struct applesmc_entry *entry; + +- entry = applesmc_get_entry_by_index(key_at_index); ++ entry = applesmc_get_entry_by_index(smc, smc->key_at_index); + if (IS_ERR(entry)) + return PTR_ERR(entry); + +@@ -1038,9 +1118,10 @@ static ssize_t applesmc_key_at_index_type_show(struct device *dev, + static ssize_t applesmc_key_at_index_name_show(struct device *dev, + struct device_attribute *attr, char *sysfsbuf) + { ++ struct applesmc_device *smc = dev_get_drvdata(dev); + const struct applesmc_entry *entry; + +- entry = applesmc_get_entry_by_index(key_at_index); ++ entry = applesmc_get_entry_by_index(smc, smc->key_at_index); + if (IS_ERR(entry)) + return PTR_ERR(entry); + +@@ -1050,28 +1131,25 @@ static ssize_t applesmc_key_at_index_name_show(struct device *dev, + static ssize_t applesmc_key_at_index_show(struct device *dev, + struct device_attribute *attr, char *sysfsbuf) + { +- return sysfs_emit(sysfsbuf, "%d\n", key_at_index); ++ struct applesmc_device *smc = dev_get_drvdata(dev); ++ ++ return sysfs_emit(sysfsbuf, "%d\n", smc->key_at_index); + } + + static ssize_t applesmc_key_at_index_store(struct device *dev, + struct device_attribute *attr, const char *sysfsbuf, size_t count) + { ++ struct applesmc_device *smc = dev_get_drvdata(dev); + unsigned long newkey; + + if (kstrtoul(sysfsbuf, 10, &newkey) < 0 +- || newkey >= smcreg.key_count) ++ || newkey >= smc->reg.key_count) + return -EINVAL; + +- key_at_index = newkey; ++ smc->key_at_index = newkey; + return count; + } + +-static struct led_classdev applesmc_backlight = { +- .name = "smc::kbd_backlight", +- .default_trigger = "nand-disk", +- .brightness_set = applesmc_brightness_set, +-}; +- + static struct applesmc_node_group info_group[] = { + { "name", applesmc_name_show }, + { "key_count", applesmc_key_count_show }, +@@ -1116,14 +1194,15 @@ static struct applesmc_node_group temp_group[] = { + /* + * applesmc_destroy_nodes - remove files and free associated memory + */ +-static void applesmc_destroy_nodes(struct applesmc_node_group *groups) ++static void applesmc_destroy_nodes(struct applesmc_device *smc, ++ struct applesmc_node_group *groups) + { + struct applesmc_node_group *grp; + struct applesmc_dev_attr *node; + + for (grp = groups; grp->nodes; grp++) { + for (node = grp->nodes; node->sda.dev_attr.attr.name; node++) +- sysfs_remove_file(&pdev->dev.kobj, ++ sysfs_remove_file(&smc->dev->dev.kobj, + &node->sda.dev_attr.attr); + kfree(grp->nodes); + grp->nodes = NULL; +@@ -1133,7 +1212,8 @@ static void applesmc_destroy_nodes(struct applesmc_node_group *groups) + /* + * applesmc_create_nodes - create a two-dimensional group of sysfs files + */ +-static int applesmc_create_nodes(struct applesmc_node_group *groups, int num) ++static int applesmc_create_nodes(struct applesmc_device *smc, ++ struct applesmc_node_group *groups, int num) + { + struct applesmc_node_group *grp; + struct applesmc_dev_attr *node; +@@ -1157,7 +1237,7 @@ static int applesmc_create_nodes(struct applesmc_node_group *groups, int num) + sysfs_attr_init(attr); + attr->name = node->name; + attr->mode = 0444 | (grp->store ? 0200 : 0); +- ret = sysfs_create_file(&pdev->dev.kobj, attr); ++ ret = sysfs_create_file(&smc->dev->dev.kobj, attr); + if (ret) { + attr->name = NULL; + goto out; +@@ -1167,57 +1247,57 @@ static int applesmc_create_nodes(struct applesmc_node_group *groups, int num) + + return 0; + out: +- applesmc_destroy_nodes(groups); ++ applesmc_destroy_nodes(smc, groups); + return ret; + } + + /* Create accelerometer resources */ +-static int applesmc_create_accelerometer(void) ++static int applesmc_create_accelerometer(struct applesmc_device *smc) + { + int ret; + +- if (!smcreg.has_accelerometer) ++ if (!smc->reg.has_accelerometer) + return 0; + +- ret = applesmc_create_nodes(accelerometer_group, 1); ++ ret = applesmc_create_nodes(smc, accelerometer_group, 1); + if (ret) + goto out; + +- applesmc_idev = input_allocate_device(); +- if (!applesmc_idev) { ++ smc->idev = input_allocate_device(); ++ if (!smc->idev) { + ret = -ENOMEM; + goto out_sysfs; + } + + /* initial calibrate for the input device */ +- applesmc_calibrate(); ++ applesmc_calibrate(smc); + + /* initialize the input device */ +- applesmc_idev->name = "applesmc"; +- applesmc_idev->id.bustype = BUS_HOST; +- applesmc_idev->dev.parent = &pdev->dev; +- input_set_abs_params(applesmc_idev, ABS_X, ++ smc->idev->name = "applesmc"; ++ smc->idev->id.bustype = BUS_HOST; ++ smc->idev->dev.parent = &smc->dev->dev; ++ input_set_abs_params(smc->idev, ABS_X, + -256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT); +- input_set_abs_params(applesmc_idev, ABS_Y, ++ input_set_abs_params(smc->idev, ABS_Y, + -256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT); + +- ret = input_setup_polling(applesmc_idev, applesmc_idev_poll); ++ ret = input_setup_polling(smc->idev, applesmc_idev_poll); + if (ret) + goto out_idev; + +- input_set_poll_interval(applesmc_idev, APPLESMC_POLL_INTERVAL); ++ input_set_poll_interval(smc->idev, APPLESMC_POLL_INTERVAL); + +- ret = input_register_device(applesmc_idev); ++ ret = input_register_device(smc->idev); + if (ret) + goto out_idev; + + return 0; + + out_idev: +- input_free_device(applesmc_idev); ++ input_free_device(smc->idev); + + out_sysfs: +- applesmc_destroy_nodes(accelerometer_group); ++ applesmc_destroy_nodes(smc, accelerometer_group); + + out: + pr_warn("driver init failed (ret=%d)!\n", ret); +@@ -1225,44 +1305,55 @@ static int applesmc_create_accelerometer(void) + } + + /* Release all resources used by the accelerometer */ +-static void applesmc_release_accelerometer(void) ++static void applesmc_release_accelerometer(struct applesmc_device *smc) + { +- if (!smcreg.has_accelerometer) ++ if (!smc->reg.has_accelerometer) + return; +- input_unregister_device(applesmc_idev); +- applesmc_destroy_nodes(accelerometer_group); ++ input_unregister_device(smc->idev); ++ applesmc_destroy_nodes(smc, accelerometer_group); + } + +-static int applesmc_create_light_sensor(void) ++static int applesmc_create_light_sensor(struct applesmc_device *smc) + { +- if (!smcreg.num_light_sensors) ++ if (!smc->reg.num_light_sensors) + return 0; +- return applesmc_create_nodes(light_sensor_group, 1); ++ return applesmc_create_nodes(smc, light_sensor_group, 1); + } + +-static void applesmc_release_light_sensor(void) ++static void applesmc_release_light_sensor(struct applesmc_device *smc) + { +- if (!smcreg.num_light_sensors) ++ if (!smc->reg.num_light_sensors) + return; +- applesmc_destroy_nodes(light_sensor_group); ++ applesmc_destroy_nodes(smc, light_sensor_group); + } + +-static int applesmc_create_key_backlight(void) ++static int applesmc_create_key_backlight(struct applesmc_device *smc) + { +- if (!smcreg.has_key_backlight) ++ int ret; ++ ++ if (!smc->reg.has_key_backlight) + return 0; +- applesmc_led_wq = create_singlethread_workqueue("applesmc-led"); +- if (!applesmc_led_wq) ++ smc->backlight_wq = create_singlethread_workqueue("applesmc-led"); ++ if (!smc->backlight_wq) + return -ENOMEM; +- return led_classdev_register(&pdev->dev, &applesmc_backlight); ++ ++ INIT_WORK(&smc->backlight_work, applesmc_backlight_set); ++ smc->backlight_dev.name = "smc::kbd_backlight"; ++ smc->backlight_dev.default_trigger = "nand-disk"; ++ smc->backlight_dev.brightness_set = applesmc_brightness_set; ++ ret = led_classdev_register(&smc->dev->dev, &smc->backlight_dev); ++ if (ret) ++ destroy_workqueue(smc->backlight_wq); ++ ++ return ret; + } + +-static void applesmc_release_key_backlight(void) ++static void applesmc_release_key_backlight(struct applesmc_device *smc) + { +- if (!smcreg.has_key_backlight) ++ if (!smc->reg.has_key_backlight) + return; +- led_classdev_unregister(&applesmc_backlight); +- destroy_workqueue(applesmc_led_wq); ++ led_classdev_unregister(&smc->backlight_dev); ++ destroy_workqueue(smc->backlight_wq); + } + + static int applesmc_dmi_match(const struct dmi_system_id *id) +@@ -1302,86 +1393,100 @@ static const struct dmi_system_id applesmc_whitelist[] __initconst = { + { .ident = NULL } + }; + +-static int __init applesmc_init(void) ++static int applesmc_create_modules(struct applesmc_device *smc) + { + int ret; + +- if (!dmi_check_system(applesmc_whitelist)) { +- pr_warn("supported laptop not found!\n"); +- ret = -ENODEV; +- goto out; +- } +- +- if (!request_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS, +- "applesmc")) { +- ret = -ENXIO; +- goto out; +- } +- +- ret = platform_driver_register(&applesmc_driver); +- if (ret) +- goto out_region; +- +- pdev = platform_device_register_simple("applesmc", APPLESMC_DATA_PORT, +- NULL, 0); +- if (IS_ERR(pdev)) { +- ret = PTR_ERR(pdev); +- goto out_driver; +- } +- +- /* create register cache */ +- ret = applesmc_init_smcreg(); +- if (ret) +- goto out_device; +- +- ret = applesmc_create_nodes(info_group, 1); ++ ret = applesmc_create_nodes(smc, info_group, 1); + if (ret) +- goto out_smcreg; ++ goto out; + +- ret = applesmc_create_nodes(fan_group, smcreg.fan_count); ++ ret = applesmc_create_nodes(smc, fan_group, smc->reg.fan_count); + if (ret) + goto out_info; + +- ret = applesmc_create_nodes(temp_group, smcreg.index_count); ++ ret = applesmc_create_nodes(smc, temp_group, smc->reg.index_count); + if (ret) + goto out_fans; + +- ret = applesmc_create_accelerometer(); ++ ret = applesmc_create_accelerometer(smc); + if (ret) + goto out_temperature; + +- ret = applesmc_create_light_sensor(); ++ ret = applesmc_create_light_sensor(smc); + if (ret) + goto out_accelerometer; + +- ret = applesmc_create_key_backlight(); ++ ret = applesmc_create_key_backlight(smc); + if (ret) + goto out_light_sysfs; + +- hwmon_dev = hwmon_device_register(&pdev->dev); +- if (IS_ERR(hwmon_dev)) { +- ret = PTR_ERR(hwmon_dev); ++ smc->hwmon_dev = hwmon_device_register(&smc->dev->dev); ++ if (IS_ERR(smc->hwmon_dev)) { ++ ret = PTR_ERR(smc->hwmon_dev); + goto out_light_ledclass; + } + + return 0; + + out_light_ledclass: +- applesmc_release_key_backlight(); ++ applesmc_release_key_backlight(smc); + out_light_sysfs: +- applesmc_release_light_sensor(); ++ applesmc_release_light_sensor(smc); + out_accelerometer: +- applesmc_release_accelerometer(); ++ applesmc_release_accelerometer(smc); + out_temperature: +- applesmc_destroy_nodes(temp_group); ++ applesmc_destroy_nodes(smc, temp_group); + out_fans: +- applesmc_destroy_nodes(fan_group); ++ applesmc_destroy_nodes(smc, fan_group); + out_info: +- applesmc_destroy_nodes(info_group); +-out_smcreg: +- applesmc_destroy_smcreg(); +-out_device: +- platform_device_unregister(pdev); ++ applesmc_destroy_nodes(smc, info_group); ++out: ++ return ret; ++} ++ ++static void applesmc_destroy_modules(struct applesmc_device *smc) ++{ ++ hwmon_device_unregister(smc->hwmon_dev); ++ applesmc_release_key_backlight(smc); ++ applesmc_release_light_sensor(smc); ++ applesmc_release_accelerometer(smc); ++ applesmc_destroy_nodes(smc, temp_group); ++ applesmc_destroy_nodes(smc, fan_group); ++ applesmc_destroy_nodes(smc, info_group); ++} ++ ++static struct platform_device *pdev; ++ ++static int __init applesmc_init(void) ++{ ++ int ret; ++ ++ if (!dmi_check_system(applesmc_whitelist)) { ++ pr_warn("supported laptop not found!\n"); ++ ret = -ENODEV; ++ goto out; ++ } ++ ++ if (!request_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS, ++ "applesmc")) { ++ ret = -ENXIO; ++ goto out; ++ } ++ ++ ret = platform_driver_register(&applesmc_driver); ++ if (ret) ++ goto out_region; ++ ++ pdev = platform_device_register_simple("applesmc", APPLESMC_DATA_PORT, ++ NULL, 0); ++ if (IS_ERR(pdev)) { ++ ret = PTR_ERR(pdev); ++ goto out_driver; ++ } ++ ++ return 0; ++ + out_driver: + platform_driver_unregister(&applesmc_driver); + out_region: +@@ -1393,14 +1498,6 @@ static int __init applesmc_init(void) + + static void __exit applesmc_exit(void) + { +- hwmon_device_unregister(hwmon_dev); +- applesmc_release_key_backlight(); +- applesmc_release_light_sensor(); +- applesmc_release_accelerometer(); +- applesmc_destroy_nodes(temp_group); +- applesmc_destroy_nodes(fan_group); +- applesmc_destroy_nodes(info_group); +- applesmc_destroy_smcreg(); + platform_device_unregister(pdev); + platform_driver_unregister(&applesmc_driver); + release_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS); +@@ -1410,6 +1507,7 @@ module_init(applesmc_init); + module_exit(applesmc_exit); + + MODULE_AUTHOR("Nicolas Boichat"); ++MODULE_AUTHOR("Paul Pawlowski"); + MODULE_DESCRIPTION("Apple SMC"); + MODULE_LICENSE("GPL v2"); + MODULE_DEVICE_TABLE(dmi, applesmc_whitelist); +-- +2.30.0 + +From 713e78b8dbb8adb92d4ee09ea11e726b05577689 Mon Sep 17 00:00:00 2001 +From: Paul Pawlowski +Date: Sun, 17 Nov 2019 23:11:56 +0100 +Subject: [PATCH 2/6] applesmc: make io port base addr dynamic + +This change makes the port base runtime configurable. +The reason why this change is made is so that when we switch to an +acpi_device we can resolve the port base addr from ACPI. + +This change is not strictly required for T2 support - the base +address is still 0x300 on T2 Macs. + +Signed-off-by: Aun-Ali Zaidi +--- + drivers/hwmon/applesmc.c | 91 +++++++++++++++++++++------------------- + 1 file changed, 49 insertions(+), 42 deletions(-) + +diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c +index 62211b590a61..39ed0bb21365 100644 +--- a/drivers/hwmon/applesmc.c ++++ b/drivers/hwmon/applesmc.c +@@ -35,10 +35,11 @@ + #include + #include + ++#define APPLESMC_PORT_BASE 0x300 + /* data port used by Apple SMC */ +-#define APPLESMC_DATA_PORT 0x300 ++#define APPLESMC_DATA_PORT 0 + /* command/status port used by Apple SMC */ +-#define APPLESMC_CMD_PORT 0x304 ++#define APPLESMC_CMD_PORT 4 + + #define APPLESMC_NR_PORTS 32 /* 0x300-0x31f */ + +@@ -140,6 +141,8 @@ struct applesmc_device { + struct platform_device *dev; + struct applesmc_registers reg; + ++ u16 port_base; ++ + s16 rest_x; + s16 rest_y; + +@@ -169,7 +172,7 @@ static const int debug; + * run out past 500ms. + */ + +-static int wait_status(u8 val, u8 mask) ++static int wait_status(struct applesmc_device *smc, u8 val, u8 mask) + { + u8 status; + int us; +@@ -177,7 +180,7 @@ static int wait_status(u8 val, u8 mask) + + us = APPLESMC_MIN_WAIT; + for (i = 0; i < 24 ; i++) { +- status = inb(APPLESMC_CMD_PORT); ++ status = inb(smc->port_base + APPLESMC_CMD_PORT); + if ((status & mask) == val) + return 0; + usleep_range(us, us * 2); +@@ -189,11 +192,11 @@ static int wait_status(u8 val, u8 mask) + + /* send_byte - Write to SMC data port. Callers must hold applesmc_lock. */ + +-static int send_byte(u8 cmd, u16 port) ++static int send_byte(struct applesmc_device *smc, u8 cmd, u16 port) + { + int status; + +- status = wait_status(0, SMC_STATUS_IB_CLOSED); ++ status = wait_status(smc, 0, SMC_STATUS_IB_CLOSED); + if (status) + return status; + /* +@@ -202,24 +205,24 @@ static int send_byte(u8 cmd, u16 port) + * this extra read may not happen if status returns both + * simultaneously and this would appear to be required. + */ +- status = wait_status(SMC_STATUS_BUSY, SMC_STATUS_BUSY); ++ status = wait_status(smc, SMC_STATUS_BUSY, SMC_STATUS_BUSY); + if (status) + return status; + +- outb(cmd, port); ++ outb(cmd, smc->port_base + port); + return 0; + } + + /* send_command - Write a command to the SMC. Callers must hold applesmc_lock. */ + +-static int send_command(u8 cmd) ++static int send_command(struct applesmc_device *smc, u8 cmd) + { + int ret; + +- ret = wait_status(0, SMC_STATUS_IB_CLOSED); ++ ret = wait_status(smc, 0, SMC_STATUS_IB_CLOSED); + if (ret) + return ret; +- outb(cmd, APPLESMC_CMD_PORT); ++ outb(cmd, smc->port_base + APPLESMC_CMD_PORT); + return 0; + } + +@@ -229,108 +232,112 @@ static int send_command(u8 cmd) + * If busy is stuck high after the command then the SMC is jammed. + */ + +-static int smc_sane(void) ++static int smc_sane(struct applesmc_device *smc) + { + int ret; + +- ret = wait_status(0, SMC_STATUS_BUSY); ++ ret = wait_status(smc, 0, SMC_STATUS_BUSY); + if (!ret) + return ret; +- ret = send_command(APPLESMC_READ_CMD); ++ ret = send_command(smc, APPLESMC_READ_CMD); + if (ret) + return ret; +- return wait_status(0, SMC_STATUS_BUSY); ++ return wait_status(smc, 0, SMC_STATUS_BUSY); + } + +-static int send_argument(const char *key) ++static int send_argument(struct applesmc_device *smc, const char *key) + { + int i; + + for (i = 0; i < 4; i++) +- if (send_byte(key[i], APPLESMC_DATA_PORT)) ++ if (send_byte(smc, key[i], APPLESMC_DATA_PORT)) + return -EIO; + return 0; + } + +-static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) ++static int read_smc(struct applesmc_device *smc, u8 cmd, const char *key, ++ u8 *buffer, u8 len) + { + u8 status, data = 0; + int i; + int ret; + +- ret = smc_sane(); ++ ret = smc_sane(smc); + if (ret) + return ret; + +- if (send_command(cmd) || send_argument(key)) { ++ if (send_command(smc, cmd) || send_argument(smc, key)) { + pr_warn("%.4s: read arg fail\n", key); + return -EIO; + } + + /* This has no effect on newer (2012) SMCs */ +- if (send_byte(len, APPLESMC_DATA_PORT)) { ++ if (send_byte(smc, len, APPLESMC_DATA_PORT)) { + pr_warn("%.4s: read len fail\n", key); + return -EIO; + } + + for (i = 0; i < len; i++) { +- if (wait_status(SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY, ++ if (wait_status(smc, ++ SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY, + SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY)) { + pr_warn("%.4s: read data[%d] fail\n", key, i); + return -EIO; + } +- buffer[i] = inb(APPLESMC_DATA_PORT); ++ buffer[i] = inb(smc->port_base + APPLESMC_DATA_PORT); + } + + /* Read the data port until bit0 is cleared */ + for (i = 0; i < 16; i++) { + udelay(APPLESMC_MIN_WAIT); +- status = inb(APPLESMC_CMD_PORT); ++ status = inb(smc->port_base + APPLESMC_CMD_PORT); + if (!(status & SMC_STATUS_AWAITING_DATA)) + break; +- data = inb(APPLESMC_DATA_PORT); ++ data = inb(smc->port_base + APPLESMC_DATA_PORT); + } + if (i) + pr_warn("flushed %d bytes, last value is: %d\n", i, data); + +- return wait_status(0, SMC_STATUS_BUSY); ++ return wait_status(smc, 0, SMC_STATUS_BUSY); + } + +-static int write_smc(u8 cmd, const char *key, const u8 *buffer, u8 len) ++static int write_smc(struct applesmc_device *smc, u8 cmd, const char *key, ++ const u8 *buffer, u8 len) + { + int i; + int ret; + +- ret = smc_sane(); ++ ret = smc_sane(smc); + if (ret) + return ret; + +- if (send_command(cmd) || send_argument(key)) { ++ if (send_command(smc, cmd) || send_argument(smc, key)) { + pr_warn("%s: write arg fail\n", key); + return -EIO; + } + +- if (send_byte(len, APPLESMC_DATA_PORT)) { ++ if (send_byte(smc, len, APPLESMC_DATA_PORT)) { + pr_warn("%.4s: write len fail\n", key); + return -EIO; + } + + for (i = 0; i < len; i++) { +- if (send_byte(buffer[i], APPLESMC_DATA_PORT)) { ++ if (send_byte(smc, buffer[i], APPLESMC_DATA_PORT)) { + pr_warn("%s: write data fail\n", key); + return -EIO; + } + } + +- return wait_status(0, SMC_STATUS_BUSY); ++ return wait_status(smc, 0, SMC_STATUS_BUSY); + } + +-static int read_register_count(unsigned int *count) ++static int read_register_count(struct applesmc_device *smc, ++ unsigned int *count) + { + __be32 be; + int ret; + +- ret = read_smc(APPLESMC_READ_CMD, KEY_COUNT_KEY, (u8 *)&be, 4); ++ ret = read_smc(smc, APPLESMC_READ_CMD, KEY_COUNT_KEY, (u8 *)&be, 4); + if (ret) + return ret; + +@@ -353,7 +360,7 @@ static int applesmc_read_entry(struct applesmc_device *smc, + if (entry->len != len) + return -EINVAL; + mutex_lock(&smc->reg.mutex); +- ret = read_smc(APPLESMC_READ_CMD, entry->key, buf, len); ++ ret = read_smc(smc, APPLESMC_READ_CMD, entry->key, buf, len); + mutex_unlock(&smc->reg.mutex); + + return ret; +@@ -367,7 +374,7 @@ static int applesmc_write_entry(struct applesmc_device *smc, + if (entry->len != len) + return -EINVAL; + mutex_lock(&smc->reg.mutex); +- ret = write_smc(APPLESMC_WRITE_CMD, entry->key, buf, len); ++ ret = write_smc(smc, APPLESMC_WRITE_CMD, entry->key, buf, len); + mutex_unlock(&smc->reg.mutex); + return ret; + } +@@ -388,10 +395,10 @@ static const struct applesmc_entry *applesmc_get_entry_by_index( + if (cache->valid) + goto out; + be = cpu_to_be32(index); +- ret = read_smc(APPLESMC_GET_KEY_BY_INDEX_CMD, (u8 *)&be, key, 4); ++ ret = read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD, (u8 *)&be, key, 4); + if (ret) + goto out; +- ret = read_smc(APPLESMC_GET_KEY_TYPE_CMD, key, info, 6); ++ ret = read_smc(smc, APPLESMC_GET_KEY_TYPE_CMD, key, info, 6); + if (ret) + goto out; + +@@ -589,7 +596,7 @@ static int applesmc_init_smcreg_try(struct applesmc_device *smc) + if (s->init_complete) + return 0; + +- ret = read_register_count(&count); ++ ret = read_register_count(smc, &count); + if (ret) + return ret; + +@@ -1468,7 +1475,7 @@ static int __init applesmc_init(void) + goto out; + } + +- if (!request_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS, ++ if (!request_region(APPLESMC_PORT_BASE, APPLESMC_NR_PORTS, + "applesmc")) { + ret = -ENXIO; + goto out; +@@ -1490,7 +1497,7 @@ static int __init applesmc_init(void) + out_driver: + platform_driver_unregister(&applesmc_driver); + out_region: +- release_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS); ++ release_region(APPLESMC_PORT_BASE, APPLESMC_NR_PORTS); + out: + pr_warn("driver init failed (ret=%d)!\n", ret); + return ret; +@@ -1500,7 +1507,7 @@ static void __exit applesmc_exit(void) + { + platform_device_unregister(pdev); + platform_driver_unregister(&applesmc_driver); +- release_region(APPLESMC_DATA_PORT, APPLESMC_NR_PORTS); ++ release_region(APPLESMC_PORT_BASE, APPLESMC_NR_PORTS); + } + + module_init(applesmc_init); +-- +2.30.0 + +From ee3d4bf4a01bc94553bde2ae3e806a63a13faa12 Mon Sep 17 00:00:00 2001 +From: Paul Pawlowski +Date: Sun, 17 Nov 2019 23:12:08 +0100 +Subject: [PATCH 3/6] applesmc: switch to acpi_device (from platform) + +This change makes the change from platform_device +to acpi_device. The rationale for this change is +that on T2 Macs, an additional FixedMemory32 +region is needed for device operation, and it can +be easily resolved via ACPI tables (this will be +done in another commit). + +Additionally, on older Macs, the OS X driver also +looks for the specified ACPI device to resolve +its memory regions, and therefore this change +should not result in any incompatibilities. + +Signed-off-by: Aun-Ali Zaidi +--- + drivers/hwmon/applesmc.c | 125 ++++++++++++++++++++++++++------------- + 1 file changed, 85 insertions(+), 40 deletions(-) + +diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c +index 39ed0bb21365..bdaaf696f7b6 100644 +--- a/drivers/hwmon/applesmc.c ++++ b/drivers/hwmon/applesmc.c +@@ -19,7 +19,7 @@ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + + #include +-#include ++#include + #include + #include + #include +@@ -35,7 +35,6 @@ + #include + #include + +-#define APPLESMC_PORT_BASE 0x300 + /* data port used by Apple SMC */ + #define APPLESMC_DATA_PORT 0 + /* command/status port used by Apple SMC */ +@@ -138,9 +137,10 @@ struct applesmc_registers { + }; + + struct applesmc_device { +- struct platform_device *dev; ++ struct acpi_device *dev; + struct applesmc_registers reg; + ++ bool port_base_set; + u16 port_base; + + s16 rest_x; +@@ -692,9 +692,13 @@ static int applesmc_init_smcreg(struct applesmc_device *smc) + } + + /* Device model stuff */ ++ ++static int applesmc_init_resources(struct applesmc_device *smc); ++static void applesmc_free_resources(struct applesmc_device *smc); + static int applesmc_create_modules(struct applesmc_device *smc); + static void applesmc_destroy_modules(struct applesmc_device *smc); +-static int applesmc_probe(struct platform_device *dev) ++ ++static int applesmc_add(struct acpi_device *dev) + { + struct applesmc_device *smc; + int ret; +@@ -705,12 +709,16 @@ static int applesmc_probe(struct platform_device *dev) + smc->dev = dev; + mutex_init(&smc->reg.mutex); + +- platform_set_drvdata(dev, smc); ++ dev_set_drvdata(&dev->dev, smc); + +- ret = applesmc_init_smcreg(smc); ++ ret = applesmc_init_resources(smc); + if (ret) + goto out_mem; + ++ ret = applesmc_init_smcreg(smc); ++ if (ret) ++ goto out_res; ++ + applesmc_device_init(smc); + + ret = applesmc_create_modules(smc); +@@ -721,20 +729,23 @@ static int applesmc_probe(struct platform_device *dev) + + out_reg: + applesmc_destroy_smcreg(smc); ++out_res: ++ applesmc_free_resources(smc); + out_mem: +- platform_set_drvdata(dev, NULL); ++ dev_set_drvdata(&dev->dev, NULL); + mutex_destroy(&smc->reg.mutex); + kfree(smc); + + return ret; + } + +-static int applesmc_remove(struct platform_device *dev) ++static int applesmc_remove(struct acpi_device *dev) + { +- struct applesmc_device *smc = platform_get_drvdata(dev); ++ struct applesmc_device *smc = dev_get_drvdata(&dev->dev); + + applesmc_destroy_modules(smc); + applesmc_destroy_smcreg(smc); ++ applesmc_free_resources(smc); + + mutex_destroy(&smc->reg.mutex); + kfree(smc); +@@ -742,6 +753,52 @@ static int applesmc_remove(struct platform_device *dev) + return 0; + } + ++static acpi_status applesmc_walk_resources(struct acpi_resource *res, ++ void *data) ++{ ++ struct applesmc_device *smc = data; ++ ++ switch (res->type) { ++ case ACPI_RESOURCE_TYPE_IO: ++ if (!smc->port_base_set) { ++ if (res->data.io.address_length < APPLESMC_NR_PORTS) ++ return AE_ERROR; ++ smc->port_base = res->data.io.minimum; ++ smc->port_base_set = true; ++ } ++ return AE_OK; ++ ++ case ACPI_RESOURCE_TYPE_END_TAG: ++ if (smc->port_base_set) ++ return AE_OK; ++ else ++ return AE_NOT_FOUND; ++ ++ default: ++ return AE_OK; ++ } ++} ++ ++static int applesmc_init_resources(struct applesmc_device *smc) ++{ ++ int ret; ++ ++ ret = acpi_walk_resources(smc->dev->handle, METHOD_NAME__CRS, ++ applesmc_walk_resources, smc); ++ if (ACPI_FAILURE(ret)) ++ return -ENXIO; ++ ++ if (!request_region(smc->port_base, APPLESMC_NR_PORTS, "applesmc")) ++ return -ENXIO; ++ ++ return 0; ++} ++ ++static void applesmc_free_resources(struct applesmc_device *smc) ++{ ++ release_region(smc->port_base, APPLESMC_NR_PORTS); ++} ++ + /* Synchronize device with memorized backlight state */ + static int applesmc_pm_resume(struct device *dev) + { +@@ -763,18 +820,28 @@ static int applesmc_pm_restore(struct device *dev) + return applesmc_pm_resume(dev); + } + ++static const struct acpi_device_id applesmc_ids[] = { ++ {"APP0001", 0}, ++ {"", 0}, ++}; ++ + static const struct dev_pm_ops applesmc_pm_ops = { + .resume = applesmc_pm_resume, + .restore = applesmc_pm_restore, + }; + +-static struct platform_driver applesmc_driver = { +- .probe = applesmc_probe, +- .remove = applesmc_remove, +- .driver = { +- .name = "applesmc", +- .pm = &applesmc_pm_ops, ++static struct acpi_driver applesmc_driver = { ++ .name = "applesmc", ++ .class = "applesmc", ++ .ids = applesmc_ids, ++ .ops = { ++ .add = applesmc_add, ++ .remove = applesmc_remove + }, ++ .drv = { ++ .pm = &applesmc_pm_ops ++ }, ++ .owner = THIS_MODULE + }; + + /* +@@ -1262,7 +1329,6 @@ static int applesmc_create_nodes(struct applesmc_device *smc, + static int applesmc_create_accelerometer(struct applesmc_device *smc) + { + int ret; +- + if (!smc->reg.has_accelerometer) + return 0; + +@@ -1463,8 +1529,6 @@ static void applesmc_destroy_modules(struct applesmc_device *smc) + applesmc_destroy_nodes(smc, info_group); + } + +-static struct platform_device *pdev; +- + static int __init applesmc_init(void) + { + int ret; +@@ -1475,29 +1539,12 @@ static int __init applesmc_init(void) + goto out; + } + +- if (!request_region(APPLESMC_PORT_BASE, APPLESMC_NR_PORTS, +- "applesmc")) { +- ret = -ENXIO; +- goto out; +- } +- +- ret = platform_driver_register(&applesmc_driver); ++ ret = acpi_bus_register_driver(&applesmc_driver); + if (ret) +- goto out_region; +- +- pdev = platform_device_register_simple("applesmc", APPLESMC_DATA_PORT, +- NULL, 0); +- if (IS_ERR(pdev)) { +- ret = PTR_ERR(pdev); +- goto out_driver; +- } ++ goto out; + + return 0; + +-out_driver: +- platform_driver_unregister(&applesmc_driver); +-out_region: +- release_region(APPLESMC_PORT_BASE, APPLESMC_NR_PORTS); + out: + pr_warn("driver init failed (ret=%d)!\n", ret); + return ret; +@@ -1505,9 +1552,7 @@ static int __init applesmc_init(void) + + static void __exit applesmc_exit(void) + { +- platform_device_unregister(pdev); +- platform_driver_unregister(&applesmc_driver); +- release_region(APPLESMC_PORT_BASE, APPLESMC_NR_PORTS); ++ acpi_bus_unregister_driver(&applesmc_driver); + } + + module_init(applesmc_init); +-- +2.30.0 + +From 43df89a1377782788760808d8ea4bcf0730effbb Mon Sep 17 00:00:00 2001 +From: Paul Pawlowski +Date: Sun, 17 Nov 2019 23:12:14 +0100 +Subject: [PATCH 4/6] applesmc: key interface wrappers + +This change replaces the read_smc and write_smc +methods with wrappers, additionally removing the +command id parameter from them (and introducing +get_smc_key_by_index and get_smc_key_info). + +This is done as to allow simple implementation +replacement on T2 Macs. The newly introduced +methods mentioned in the previous paragraph need +special handling on T2 and as such had to be +separated. + +Signed-off-by: Aun-Ali Zaidi +--- + drivers/hwmon/applesmc.c | 119 ++++++++++++++++++++++++++------------- + 1 file changed, 79 insertions(+), 40 deletions(-) + +diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c +index bdaaf696f7b6..3017d8ca2c79 100644 +--- a/drivers/hwmon/applesmc.c ++++ b/drivers/hwmon/applesmc.c +@@ -172,7 +172,7 @@ static const int debug; + * run out past 500ms. + */ + +-static int wait_status(struct applesmc_device *smc, u8 val, u8 mask) ++static int port_wait_status(struct applesmc_device *smc, u8 val, u8 mask) + { + u8 status; + int us; +@@ -190,13 +190,13 @@ static int wait_status(struct applesmc_device *smc, u8 val, u8 mask) + return -EIO; + } + +-/* send_byte - Write to SMC data port. Callers must hold applesmc_lock. */ ++/* port_send_byte - Write to SMC data port. Callers must hold applesmc_lock. */ + +-static int send_byte(struct applesmc_device *smc, u8 cmd, u16 port) ++static int port_send_byte(struct applesmc_device *smc, u8 cmd, u16 port) + { + int status; + +- status = wait_status(smc, 0, SMC_STATUS_IB_CLOSED); ++ status = port_wait_status(smc, 0, SMC_STATUS_IB_CLOSED); + if (status) + return status; + /* +@@ -205,7 +205,7 @@ static int send_byte(struct applesmc_device *smc, u8 cmd, u16 port) + * this extra read may not happen if status returns both + * simultaneously and this would appear to be required. + */ +- status = wait_status(smc, SMC_STATUS_BUSY, SMC_STATUS_BUSY); ++ status = port_wait_status(smc, SMC_STATUS_BUSY, SMC_STATUS_BUSY); + if (status) + return status; + +@@ -213,15 +213,16 @@ static int send_byte(struct applesmc_device *smc, u8 cmd, u16 port) + return 0; + } + +-/* send_command - Write a command to the SMC. Callers must hold applesmc_lock. */ ++/* port_send_command - Write a command to the SMC. Callers must hold applesmc_lock. */ + +-static int send_command(struct applesmc_device *smc, u8 cmd) ++static int port_send_command(struct applesmc_device *smc, u8 cmd) + { + int ret; + +- ret = wait_status(smc, 0, SMC_STATUS_IB_CLOSED); ++ ret = port_wait_status(smc, 0, SMC_STATUS_IB_CLOSED); + if (ret) + return ret; ++ + outb(cmd, smc->port_base + APPLESMC_CMD_PORT); + return 0; + } +@@ -232,53 +233,53 @@ static int send_command(struct applesmc_device *smc, u8 cmd) + * If busy is stuck high after the command then the SMC is jammed. + */ + +-static int smc_sane(struct applesmc_device *smc) ++static int port_smc_sane(struct applesmc_device *smc) + { + int ret; + +- ret = wait_status(smc, 0, SMC_STATUS_BUSY); ++ ret = port_wait_status(smc, 0, SMC_STATUS_BUSY); + if (!ret) + return ret; +- ret = send_command(smc, APPLESMC_READ_CMD); ++ ret = port_send_command(smc, APPLESMC_READ_CMD); + if (ret) + return ret; +- return wait_status(smc, 0, SMC_STATUS_BUSY); ++ return port_wait_status(smc, 0, SMC_STATUS_BUSY); + } + +-static int send_argument(struct applesmc_device *smc, const char *key) ++static int port_send_argument(struct applesmc_device *smc, const char *key) + { + int i; + + for (i = 0; i < 4; i++) +- if (send_byte(smc, key[i], APPLESMC_DATA_PORT)) ++ if (port_send_byte(smc, key[i], APPLESMC_DATA_PORT)) + return -EIO; + return 0; + } + +-static int read_smc(struct applesmc_device *smc, u8 cmd, const char *key, ++static int port_read_smc(struct applesmc_device *smc, u8 cmd, const char *key, + u8 *buffer, u8 len) + { + u8 status, data = 0; + int i; + int ret; + +- ret = smc_sane(smc); ++ ret = port_smc_sane(smc); + if (ret) + return ret; + +- if (send_command(smc, cmd) || send_argument(smc, key)) { ++ if (port_send_command(smc, cmd) || port_send_argument(smc, key)) { + pr_warn("%.4s: read arg fail\n", key); + return -EIO; + } + + /* This has no effect on newer (2012) SMCs */ +- if (send_byte(smc, len, APPLESMC_DATA_PORT)) { ++ if (port_send_byte(smc, len, APPLESMC_DATA_PORT)) { + pr_warn("%.4s: read len fail\n", key); + return -EIO; + } + + for (i = 0; i < len; i++) { +- if (wait_status(smc, ++ if (port_wait_status(smc, + SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY, + SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY)) { + pr_warn("%.4s: read data[%d] fail\n", key, i); +@@ -298,37 +299,80 @@ static int read_smc(struct applesmc_device *smc, u8 cmd, const char *key, + if (i) + pr_warn("flushed %d bytes, last value is: %d\n", i, data); + +- return wait_status(smc, 0, SMC_STATUS_BUSY); ++ return port_wait_status(smc, 0, SMC_STATUS_BUSY); + } + +-static int write_smc(struct applesmc_device *smc, u8 cmd, const char *key, ++static int port_write_smc(struct applesmc_device *smc, u8 cmd, const char *key, + const u8 *buffer, u8 len) + { + int i; + int ret; + +- ret = smc_sane(smc); ++ ret = port_smc_sane(smc); + if (ret) + return ret; + +- if (send_command(smc, cmd) || send_argument(smc, key)) { ++ if (port_send_command(smc, cmd) || port_send_argument(smc, key)) { + pr_warn("%s: write arg fail\n", key); + return -EIO; + } + +- if (send_byte(smc, len, APPLESMC_DATA_PORT)) { ++ if (port_send_byte(smc, len, APPLESMC_DATA_PORT)) { + pr_warn("%.4s: write len fail\n", key); + return -EIO; + } + + for (i = 0; i < len; i++) { +- if (send_byte(smc, buffer[i], APPLESMC_DATA_PORT)) { ++ if (port_send_byte(smc, buffer[i], APPLESMC_DATA_PORT)) { + pr_warn("%s: write data fail\n", key); + return -EIO; + } + } + +- return wait_status(smc, 0, SMC_STATUS_BUSY); ++ return port_wait_status(smc, 0, SMC_STATUS_BUSY); ++} ++ ++static int port_get_smc_key_info(struct applesmc_device *smc, ++ const char *key, struct applesmc_entry *info) ++{ ++ int ret; ++ u8 raw[6]; ++ ++ ret = port_read_smc(smc, APPLESMC_GET_KEY_TYPE_CMD, key, raw, 6); ++ if (ret) ++ return ret; ++ info->len = raw[0]; ++ memcpy(info->type, &raw[1], 4); ++ info->flags = raw[5]; ++ return 0; ++} ++ ++static int read_smc(struct applesmc_device *smc, const char *key, ++ u8 *buffer, u8 len) ++{ ++ return port_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len); ++} ++ ++static int write_smc(struct applesmc_device *smc, const char *key, ++ const u8 *buffer, u8 len) ++{ ++ return port_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len); ++} ++ ++static int get_smc_key_by_index(struct applesmc_device *smc, ++ unsigned int index, char *key) ++{ ++ __be32 be; ++ ++ be = cpu_to_be32(index); ++ return port_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD, ++ (const char *) &be, (u8 *) key, 4); ++} ++ ++static int get_smc_key_info(struct applesmc_device *smc, const char *key, ++ struct applesmc_entry *info) ++{ ++ return port_get_smc_key_info(smc, key, info); + } + + static int read_register_count(struct applesmc_device *smc, +@@ -337,8 +381,8 @@ static int read_register_count(struct applesmc_device *smc, + __be32 be; + int ret; + +- ret = read_smc(smc, APPLESMC_READ_CMD, KEY_COUNT_KEY, (u8 *)&be, 4); +- if (ret) ++ ret = read_smc(smc, KEY_COUNT_KEY, (u8 *)&be, 4); ++ if (ret < 0) + return ret; + + *count = be32_to_cpu(be); +@@ -360,7 +404,7 @@ static int applesmc_read_entry(struct applesmc_device *smc, + if (entry->len != len) + return -EINVAL; + mutex_lock(&smc->reg.mutex); +- ret = read_smc(smc, APPLESMC_READ_CMD, entry->key, buf, len); ++ ret = read_smc(smc, entry->key, buf, len); + mutex_unlock(&smc->reg.mutex); + + return ret; +@@ -374,7 +418,7 @@ static int applesmc_write_entry(struct applesmc_device *smc, + if (entry->len != len) + return -EINVAL; + mutex_lock(&smc->reg.mutex); +- ret = write_smc(smc, APPLESMC_WRITE_CMD, entry->key, buf, len); ++ ret = write_smc(smc, entry->key, buf, len); + mutex_unlock(&smc->reg.mutex); + return ret; + } +@@ -383,8 +427,7 @@ static const struct applesmc_entry *applesmc_get_entry_by_index( + struct applesmc_device *smc, int index) + { + struct applesmc_entry *cache = &smc->reg.cache[index]; +- u8 key[4], info[6]; +- __be32 be; ++ char key[4]; + int ret = 0; + + if (cache->valid) +@@ -394,18 +437,14 @@ static const struct applesmc_entry *applesmc_get_entry_by_index( + + if (cache->valid) + goto out; +- be = cpu_to_be32(index); +- ret = read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD, (u8 *)&be, key, 4); ++ ret = get_smc_key_by_index(smc, index, key); + if (ret) + goto out; +- ret = read_smc(smc, APPLESMC_GET_KEY_TYPE_CMD, key, info, 6); ++ memcpy(cache->key, key, 4); ++ ++ ret = get_smc_key_info(smc, key, cache); + if (ret) + goto out; +- +- memcpy(cache->key, key, 4); +- cache->len = info[0]; +- memcpy(cache->type, &info[1], 4); +- cache->flags = info[5]; + cache->valid = true; + + out: +-- +2.30.0 + +From 799e7a54c62a36007f7874c58d7dac87c9651759 Mon Sep 17 00:00:00 2001 +From: Aun-Ali Zaidi +Date: Sun, 17 Nov 2019 23:12:16 +0100 +Subject: [PATCH 5/6] applesmc: basic mmio interface implementation + +This change introduces a basic MMIO-based +interface implementation required to communicate +with the SMC on T2 Macs. The MMIO interface is +enabled only when it's supported on the running +system. + +The MMIO interface replaces legacy port-based SMC +key reads, writes and metadata requests (getting +key by index and getting key info). + +(Based on patch by @mcmrarm) + +Signed-off-by: Aun-Ali Zaidi +--- + drivers/hwmon/applesmc.c | 237 ++++++++++++++++++++++++++++++++++++++- + 1 file changed, 231 insertions(+), 6 deletions(-) + +diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c +index 3017d8ca2c79..2d23bb9ad9dd 100644 +--- a/drivers/hwmon/applesmc.c ++++ b/drivers/hwmon/applesmc.c +@@ -42,6 +42,18 @@ + + #define APPLESMC_NR_PORTS 32 /* 0x300-0x31f */ + ++#define APPLESMC_IOMEM_KEY_DATA 0 ++#define APPLESMC_IOMEM_KEY_STATUS 0x4005 ++#define APPLESMC_IOMEM_KEY_NAME 0x78 ++#define APPLESMC_IOMEM_KEY_DATA_LEN 0x7D ++#define APPLESMC_IOMEM_KEY_SMC_ID 0x7E ++#define APPLESMC_IOMEM_KEY_CMD 0x7F ++#define APPLESMC_IOMEM_MIN_SIZE 0x4006 ++ ++#define APPLESMC_IOMEM_KEY_TYPE_CODE 0 ++#define APPLESMC_IOMEM_KEY_TYPE_DATA_LEN 5 ++#define APPLESMC_IOMEM_KEY_TYPE_FLAGS 6 ++ + #define APPLESMC_MAX_DATA_LENGTH 32 + + /* Apple SMC status bits */ +@@ -138,10 +150,13 @@ struct applesmc_registers { + + struct applesmc_device { + struct acpi_device *dev; ++ struct device *ldev; + struct applesmc_registers reg; + +- bool port_base_set; ++ bool port_base_set, iomem_base_set; + u16 port_base; ++ u8 *__iomem iomem_base; ++ u32 iomem_base_addr, iomem_base_size; + + s16 rest_x; + s16 rest_y; +@@ -347,16 +362,156 @@ static int port_get_smc_key_info(struct applesmc_device *smc, + return 0; + } + ++ ++/* ++ * MMIO based communication. ++ * TODO: Use updated mechanism for cmd timeout/retry ++ */ ++ ++static void iomem_clear_status(struct applesmc_device *smc) ++{ ++ if (ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS)) ++ iowrite8(0, smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS); ++} ++ ++static int iomem_wait_read(struct applesmc_device *smc) ++{ ++ u8 status; ++ int us; ++ int i; ++ ++ us = APPLESMC_MIN_WAIT; ++ for (i = 0; i < 24 ; i++) { ++ status = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS); ++ if (status & 0x20) ++ return 0; ++ usleep_range(us, us * 2); ++ if (i > 9) ++ us <<= 1; ++ } ++ ++ dev_warn(smc->ldev, "%s... timeout\n", __func__); ++ return -EIO; ++} ++ ++static int iomem_read_smc(struct applesmc_device *smc, u8 cmd, const char *key, ++ u8 *buffer, u8 len) ++{ ++ u8 err, remote_len; ++ u32 key_int = *((u32 *) key); ++ ++ iomem_clear_status(smc); ++ iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME); ++ iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID); ++ iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD); ++ ++ if (iomem_wait_read(smc)) ++ return -EIO; ++ ++ err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD); ++ if (err != 0) { ++ dev_warn(smc->ldev, "read_smc_mmio(%x %8x/%.4s) failed: %u\n", ++ cmd, key_int, key, err); ++ return -EIO; ++ } ++ ++ if (cmd == APPLESMC_READ_CMD) { ++ remote_len = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_DATA_LEN); ++ if (remote_len != len) { ++ dev_warn(smc->ldev, ++ "read_smc_mmio(%x %8x/%.4s) failed: buffer length mismatch (remote = %u, requested = %u)\n", ++ cmd, key_int, key, remote_len, len); ++ return -EINVAL; ++ } ++ } else { ++ remote_len = len; ++ } ++ ++ memcpy_fromio(buffer, smc->iomem_base + APPLESMC_IOMEM_KEY_DATA, ++ remote_len); ++ ++ dev_dbg(smc->ldev, "read_smc_mmio(%x %8x/%.4s): buflen=%u reslen=%u\n", ++ cmd, key_int, key, len, remote_len); ++ print_hex_dump_bytes("read_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, remote_len); ++ return 0; ++} ++ ++static int iomem_get_smc_key_type(struct applesmc_device *smc, const char *key, ++ struct applesmc_entry *e) ++{ ++ u8 err; ++ u8 cmd = APPLESMC_GET_KEY_TYPE_CMD; ++ u32 key_int = *((u32 *) key); ++ ++ iomem_clear_status(smc); ++ iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME); ++ iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID); ++ iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD); ++ ++ if (iomem_wait_read(smc)) ++ return -EIO; ++ ++ err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD); ++ if (err != 0) { ++ dev_warn(smc->ldev, "get_smc_key_type_mmio(%.4s) failed: %u\n", key, err); ++ return -EIO; ++ } ++ ++ e->len = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_DATA_LEN); ++ *((uint32_t *) e->type) = ioread32( ++ smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_CODE); ++ e->flags = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_TYPE_FLAGS); ++ ++ dev_dbg(smc->ldev, "get_smc_key_type_mmio(%.4s): len=%u type=%.4s flags=%x\n", ++ key, e->len, e->type, e->flags); ++ return 0; ++} ++ ++static int iomem_write_smc(struct applesmc_device *smc, u8 cmd, const char *key, ++ const u8 *buffer, u8 len) ++{ ++ u8 err; ++ u32 key_int = *((u32 *) key); ++ ++ iomem_clear_status(smc); ++ iowrite32(key_int, smc->iomem_base + APPLESMC_IOMEM_KEY_NAME); ++ memcpy_toio(smc->iomem_base + APPLESMC_IOMEM_KEY_DATA, buffer, len); ++ iowrite32(len, smc->iomem_base + APPLESMC_IOMEM_KEY_DATA_LEN); ++ iowrite32(0, smc->iomem_base + APPLESMC_IOMEM_KEY_SMC_ID); ++ iowrite32(cmd, smc->iomem_base + APPLESMC_IOMEM_KEY_CMD); ++ ++ if (iomem_wait_read(smc)) ++ return -EIO; ++ ++ err = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_CMD); ++ if (err != 0) { ++ dev_warn(smc->ldev, "write_smc_mmio(%x %.4s) failed: %u\n", cmd, key, err); ++ print_hex_dump_bytes("write_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, len); ++ return -EIO; ++ } ++ ++ dev_dbg(smc->ldev, "write_smc_mmio(%x %.4s): buflen=%u\n", cmd, key, len); ++ print_hex_dump_bytes("write_smc_mmio(): ", DUMP_PREFIX_NONE, buffer, len); ++ return 0; ++} ++ ++ + static int read_smc(struct applesmc_device *smc, const char *key, + u8 *buffer, u8 len) + { +- return port_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len); ++ if (smc->iomem_base_set) ++ return iomem_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len); ++ else ++ return port_read_smc(smc, APPLESMC_READ_CMD, key, buffer, len); + } + + static int write_smc(struct applesmc_device *smc, const char *key, + const u8 *buffer, u8 len) + { +- return port_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len); ++ if (smc->iomem_base_set) ++ return iomem_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len); ++ else ++ return port_write_smc(smc, APPLESMC_WRITE_CMD, key, buffer, len); + } + + static int get_smc_key_by_index(struct applesmc_device *smc, +@@ -365,14 +520,21 @@ static int get_smc_key_by_index(struct applesmc_device *smc, + __be32 be; + + be = cpu_to_be32(index); +- return port_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD, +- (const char *) &be, (u8 *) key, 4); ++ if (smc->iomem_base_set) ++ return iomem_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD, ++ (const char *) &be, (u8 *) key, 4); ++ else ++ return port_read_smc(smc, APPLESMC_GET_KEY_BY_INDEX_CMD, ++ (const char *) &be, (u8 *) key, 4); + } + + static int get_smc_key_info(struct applesmc_device *smc, const char *key, + struct applesmc_entry *info) + { +- return port_get_smc_key_info(smc, key, info); ++ if (smc->iomem_base_set) ++ return iomem_get_smc_key_type(smc, key, info); ++ else ++ return port_get_smc_key_info(smc, key, info); + } + + static int read_register_count(struct applesmc_device *smc, +@@ -746,6 +908,7 @@ static int applesmc_add(struct acpi_device *dev) + if (!smc) + return -ENOMEM; + smc->dev = dev; ++ smc->ldev = &dev->dev; + mutex_init(&smc->reg.mutex); + + dev_set_drvdata(&dev->dev, smc); +@@ -807,6 +970,20 @@ static acpi_status applesmc_walk_resources(struct acpi_resource *res, + } + return AE_OK; + ++ case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: ++ if (!smc->iomem_base_set) { ++ if (res->data.fixed_memory32.address_length < ++ APPLESMC_IOMEM_MIN_SIZE) { ++ dev_warn(smc->ldev, "found iomem but it's too small: %u\n", ++ res->data.fixed_memory32.address_length); ++ return AE_OK; ++ } ++ smc->iomem_base_addr = res->data.fixed_memory32.address; ++ smc->iomem_base_size = res->data.fixed_memory32.address_length; ++ smc->iomem_base_set = true; ++ } ++ return AE_OK; ++ + case ACPI_RESOURCE_TYPE_END_TAG: + if (smc->port_base_set) + return AE_OK; +@@ -818,6 +995,8 @@ static acpi_status applesmc_walk_resources(struct acpi_resource *res, + } + } + ++static int applesmc_try_enable_iomem(struct applesmc_device *smc); ++ + static int applesmc_init_resources(struct applesmc_device *smc) + { + int ret; +@@ -830,11 +1009,57 @@ static int applesmc_init_resources(struct applesmc_device *smc) + if (!request_region(smc->port_base, APPLESMC_NR_PORTS, "applesmc")) + return -ENXIO; + ++ if (smc->iomem_base_set) { ++ if (applesmc_try_enable_iomem(smc)) ++ smc->iomem_base_set = false; ++ } ++ + return 0; + } + ++static int applesmc_try_enable_iomem(struct applesmc_device *smc) ++{ ++ u8 test_val, ldkn_version; ++ ++ dev_dbg(smc->ldev, "Trying to enable iomem based communication\n"); ++ smc->iomem_base = ioremap(smc->iomem_base_addr, smc->iomem_base_size); ++ if (!smc->iomem_base) ++ goto out; ++ ++ /* Apple's driver does this check for some reason */ ++ test_val = ioread8(smc->iomem_base + APPLESMC_IOMEM_KEY_STATUS); ++ if (test_val == 0xff) { ++ dev_warn(smc->ldev, ++ "iomem enable failed: initial status is 0xff (is %x)\n", ++ test_val); ++ goto out_iomem; ++ } ++ ++ if (read_smc(smc, "LDKN", &ldkn_version, 1)) { ++ dev_warn(smc->ldev, "iomem enable failed: ldkn read failed\n"); ++ goto out_iomem; ++ } ++ ++ if (ldkn_version < 2) { ++ dev_warn(smc->ldev, ++ "iomem enable failed: ldkn version %u is less than minimum (2)\n", ++ ldkn_version); ++ goto out_iomem; ++ } ++ ++ return 0; ++ ++out_iomem: ++ iounmap(smc->iomem_base); ++ ++out: ++ return -ENXIO; ++} ++ + static void applesmc_free_resources(struct applesmc_device *smc) + { ++ if (smc->iomem_base_set) ++ iounmap(smc->iomem_base); + release_region(smc->port_base, APPLESMC_NR_PORTS); + } + +-- +2.30.0 + +From 4e63e9b77422aae8e7411ddc7a8458c2585c86df Mon Sep 17 00:00:00 2001 +From: Paul Pawlowski +Date: Sun, 17 Nov 2019 23:12:18 +0100 +Subject: [PATCH 6/6] applesmc: fan support on T2 Macs + +T2 Macs changed the fan values from shorts to +floats, and changed the fan manual override +setting from a bitmask to a per-fan boolean +named F0Md (thanks to @kleuter for mentioning +it). + +A minimal soft-float implementation has been +written for convert floats to integers (and vice +versa). + +Signed-off-by: Aun-Ali Zaidi +--- + drivers/hwmon/applesmc.c | 119 +++++++++++++++++++++++++++++++++------ + 1 file changed, 102 insertions(+), 17 deletions(-) + +diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c +index 2d23bb9ad9dd..0938227be612 100644 +--- a/drivers/hwmon/applesmc.c ++++ b/drivers/hwmon/applesmc.c +@@ -87,6 +87,7 @@ + #define FAN_ID_FMT "F%dID" /* r-o char[16] */ + + #define TEMP_SENSOR_TYPE "sp78" ++#define FLOAT_TYPE "flt " + + /* List of keys used to read/write fan speeds */ + static const char *const fan_speed_fmt[] = { +@@ -96,6 +97,7 @@ static const char *const fan_speed_fmt[] = { + "F%dSf", /* safe speed - not all models */ + "F%dTg", /* target speed (manual: rw) */ + }; ++#define FAN_MANUAL_FMT "F%dMd" + + #define INIT_TIMEOUT_MSECS 5000 /* wait up to 5s for device init ... */ + #define INIT_WAIT_MSECS 50 /* ... in 50ms increments */ +@@ -734,6 +736,42 @@ static int applesmc_read_s16(struct applesmc_device *smc, + return 0; + } + ++/** ++ * applesmc_float_to_u32 - Retrieve the integral part of a float. ++ * This is needed because Apple made fans use float values in the T2. ++ * The fractional point is not significantly useful though, and the integral ++ * part can be easily extracted. ++ */ ++static inline u32 applesmc_float_to_u32(u32 d) ++{ ++ u8 sign = (u8) ((d >> 31) & 1); ++ s32 exp = (s32) ((d >> 23) & 0xff) - 0x7f; ++ u32 fr = d & ((1u << 23) - 1); ++ ++ if (sign || exp < 0) ++ return 0; ++ ++ return (u32) ((1u << exp) + (fr >> (23 - exp))); ++} ++ ++/** ++ * applesmc_u32_to_float - Convert an u32 into a float. ++ * See applesmc_float_to_u32 for a rationale. ++ */ ++static inline u32 applesmc_u32_to_float(u32 d) ++{ ++ u32 dc = d, bc = 0, exp; ++ ++ if (!d) ++ return 0; ++ ++ while (dc >>= 1) ++ ++bc; ++ exp = 0x7f + bc; ++ ++ return (u32) ((exp << 23) | ++ ((d << (23 - (exp - 0x7f))) & ((1u << 23) - 1))); ++} + /* + * applesmc_device_init - initialize the accelerometer. Can sleep. + */ +@@ -1242,6 +1280,7 @@ static ssize_t applesmc_show_fan_speed(struct device *dev, + struct device_attribute *attr, char *sysfsbuf) + { + struct applesmc_device *smc = dev_get_drvdata(dev); ++ const struct applesmc_entry *entry; + int ret; + unsigned int speed = 0; + char newkey[5]; +@@ -1250,11 +1289,21 @@ static ssize_t applesmc_show_fan_speed(struct device *dev, + scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)], + to_index(attr)); + +- ret = applesmc_read_key(smc, newkey, buffer, 2); ++ entry = applesmc_get_entry_by_key(smc, newkey); ++ if (IS_ERR(entry)) ++ return PTR_ERR(entry); ++ ++ if (!strcmp(entry->type, FLOAT_TYPE)) { ++ ret = applesmc_read_entry(smc, entry, (u8 *) &speed, 4); ++ speed = applesmc_float_to_u32(speed); ++ } else { ++ ret = applesmc_read_entry(smc, entry, buffer, 2); ++ speed = ((buffer[0] << 8 | buffer[1]) >> 2); ++ } ++ + if (ret) + return ret; + +- speed = ((buffer[0] << 8 | buffer[1]) >> 2); + return sysfs_emit(sysfsbuf, "%u\n", speed); + } + +@@ -1263,6 +1312,7 @@ static ssize_t applesmc_store_fan_speed(struct device *dev, + const char *sysfsbuf, size_t count) + { + struct applesmc_device *smc = dev_get_drvdata(dev); ++ const struct applesmc_entry *entry; + int ret; + unsigned long speed; + char newkey[5]; +@@ -1274,9 +1324,18 @@ static ssize_t applesmc_store_fan_speed(struct device *dev, + scnprintf(newkey, sizeof(newkey), fan_speed_fmt[to_option(attr)], + to_index(attr)); + +- buffer[0] = (speed >> 6) & 0xff; +- buffer[1] = (speed << 2) & 0xff; +- ret = applesmc_write_key(smc, newkey, buffer, 2); ++ entry = applesmc_get_entry_by_key(smc, newkey); ++ if (IS_ERR(entry)) ++ return PTR_ERR(entry); ++ ++ if (!strcmp(entry->type, FLOAT_TYPE)) { ++ speed = applesmc_u32_to_float(speed); ++ ret = applesmc_write_entry(smc, entry, (u8 *) &speed, 4); ++ } else { ++ buffer[0] = (speed >> 6) & 0xff; ++ buffer[1] = (speed << 2) & 0xff; ++ ret = applesmc_write_key(smc, newkey, buffer, 2); ++ } + + if (ret) + return ret; +@@ -1291,12 +1350,26 @@ static ssize_t applesmc_show_fan_manual(struct device *dev, + int ret; + u16 manual = 0; + u8 buffer[2]; ++ char newkey[5]; ++ bool has_newkey = false; ++ ++ scnprintf(newkey, sizeof(newkey), FAN_MANUAL_FMT, to_index(attr)); ++ ++ ret = applesmc_has_key(smc, newkey, &has_newkey); ++ if (ret) ++ return ret; ++ ++ if (has_newkey) { ++ ret = applesmc_read_key(smc, newkey, buffer, 1); ++ manual = buffer[0]; ++ } else { ++ ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2); ++ manual = ((buffer[0] << 8 | buffer[1]) >> to_index(attr)) & 0x01; ++ } + +- ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2); + if (ret) + return ret; + +- manual = ((buffer[0] << 8 | buffer[1]) >> to_index(attr)) & 0x01; + return sysfs_emit(sysfsbuf, "%d\n", manual); + } + +@@ -1307,27 +1380,39 @@ static ssize_t applesmc_store_fan_manual(struct device *dev, + struct applesmc_device *smc = dev_get_drvdata(dev); + int ret; + u8 buffer[2]; ++ char newkey[5]; ++ bool has_newkey = false; + unsigned long input; + u16 val; + + if (kstrtoul(sysfsbuf, 10, &input) < 0) + return -EINVAL; + +- ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2); ++ scnprintf(newkey, sizeof(newkey), FAN_MANUAL_FMT, to_index(attr)); ++ ++ ret = applesmc_has_key(smc, newkey, &has_newkey); + if (ret) +- goto out; ++ return ret; + +- val = (buffer[0] << 8 | buffer[1]); ++ if (has_newkey) { ++ buffer[0] = input & 1; ++ ret = applesmc_write_key(smc, newkey, buffer, 1); ++ } else { ++ ret = applesmc_read_key(smc, FANS_MANUAL, buffer, 2); ++ val = (buffer[0] << 8 | buffer[1]); ++ if (ret) ++ goto out; + +- if (input) +- val = val | (0x01 << to_index(attr)); +- else +- val = val & ~(0x01 << to_index(attr)); ++ if (input) ++ val = val | (0x01 << to_index(attr)); ++ else ++ val = val & ~(0x01 << to_index(attr)); + +- buffer[0] = (val >> 8) & 0xFF; +- buffer[1] = val & 0xFF; ++ buffer[0] = (val >> 8) & 0xFF; ++ buffer[1] = val & 0xFF; + +- ret = applesmc_write_key(smc, FANS_MANUAL, buffer, 2); ++ ret = applesmc_write_key(smc, FANS_MANUAL, buffer, 2); ++ } + + out: + if (ret) +-- +2.30.0 + +From 58868e6f356229eab48cfdee1603011653a19c79 Mon Sep 17 00:00:00 2001 +From: Orlando Chamberlain +Date: Sun, 9 Oct 2022 15:59:01 +0530 +Subject: [PATCH] applesmc: Add iMacPro to applesmc_whitelist + +The iMacPro1,1 is the only iMacPro released before the line was +discontinued. Add it to the applesmc_whitelist. + +Signed-off-by: Orlando Chamberlain +--- + drivers/hwmon/applesmc.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c +index 8b3f73fcb..493f95bb0 100644 +--- a/drivers/hwmon/applesmc.c ++++ b/drivers/hwmon/applesmc.c +@@ -1804,6 +1804,10 @@ static const struct dmi_system_id applesmc_whitelist[] __initconst = { + DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), + DMI_MATCH(DMI_PRODUCT_NAME, "Macmini") }, + }, ++ { applesmc_dmi_match, "Apple iMacPro", { ++ DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "iMacPro") }, ++ }, + { applesmc_dmi_match, "Apple MacPro", { + DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), + DMI_MATCH(DMI_PRODUCT_NAME, "MacPro") }, +-- +2.34.1 + +From e52b0fad357b6203691942831715fce4f26d66e2 Mon Sep 17 00:00:00 2001 +From: Orlando Chamberlain +Date: Tue, 24 Jan 2023 15:46:48 +1100 +Subject: [PATCH 1/1] applesmc: make applesmc_remove void + +for linux6.2 compatibility +--- + drivers/hwmon/applesmc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c +index d071130ff68d..12be9269a314 100644 +--- a/drivers/hwmon/applesmc.c ++++ b/drivers/hwmon/applesmc.c +@@ -979,7 +979,7 @@ static int applesmc_add(struct acpi_device *dev) + return ret; + } + +-static int applesmc_remove(struct acpi_device *dev) ++static void applesmc_remove(struct acpi_device *dev) + { + struct applesmc_device *smc = dev_get_drvdata(&dev->dev); + +@@ -990,7 +990,7 @@ static int applesmc_remove(struct acpi_device *dev) + mutex_destroy(&smc->reg.mutex); + kfree(smc); + +- return 0; ++ return; + } + + static acpi_status applesmc_walk_resources(struct acpi_resource *res, +-- +2.39.1 + +From 38786c7979c8ece013b5b7d5cb07dc2aa40198be Mon Sep 17 00:00:00 2001 +From: Orlando Chamberlain +Date: Mon, 30 Jan 2023 18:42:21 +1100 +Subject: [PATCH 1/1] applesmc: battery charge limiter + +--- + drivers/hwmon/applesmc.c | 42 +++++++++++++++++++++++++++++++++++++++- + 1 file changed, 41 insertions(+), 1 deletion(-) + +diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c +index 12be9269a314..bc1eec74cfef 100644 +--- a/drivers/hwmon/applesmc.c ++++ b/drivers/hwmon/applesmc.c +@@ -1478,6 +1478,35 @@ static void applesmc_brightness_set(struct led_classdev *led_cdev, + dev_dbg(led_cdev->dev, "work was already on the queue.\n"); + } + ++static ssize_t applesmc_BCLM_store(struct device *dev, ++ struct device_attribute *attr, char *sysfsbuf, size_t count) ++{ ++ struct applesmc_device *smc = dev_get_drvdata(dev); ++ u8 val; ++ ++ if (kstrtou8(sysfsbuf, 10, &val) < 0) ++ return -EINVAL; ++ ++ if (val < 0 || val > 100) ++ return -EINVAL; ++ ++ if (applesmc_write_key(smc, "BCLM", &val, 1)) ++ return -ENODEV; ++ return count; ++} ++ ++static ssize_t applesmc_BCLM_show(struct device *dev, ++ struct device_attribute *attr, char *sysfsbuf) ++{ ++ struct applesmc_device *smc = dev_get_drvdata(dev); ++ u8 val; ++ ++ if (applesmc_read_key(smc, "BCLM", &val, 1)) ++ return -ENODEV; ++ ++ return sysfs_emit(sysfsbuf, "%d\n", val); ++} ++ + static ssize_t applesmc_key_count_show(struct device *dev, + struct device_attribute *attr, char *sysfsbuf) + { +@@ -1612,6 +1641,11 @@ static struct applesmc_node_group temp_group[] = { + { } + }; + ++static struct applesmc_node_group BCLM_group[] = { ++ { "battery_charge_limit", applesmc_BCLM_show, applesmc_BCLM_store }, ++ { } ++}; ++ + /* Module stuff */ + + /* +@@ -1830,10 +1864,13 @@ static int applesmc_create_modules(struct applesmc_device *smc) + ret = applesmc_create_nodes(smc, info_group, 1); + if (ret) + goto out; ++ ret = applesmc_create_nodes(smc, BCLM_group, 1); ++ if (ret) ++ goto out_info; + + ret = applesmc_create_nodes(smc, fan_group, smc->reg.fan_count); + if (ret) +- goto out_info; ++ goto out_bclm; + + ret = applesmc_create_nodes(smc, temp_group, smc->reg.index_count); + if (ret) +@@ -1869,6 +1906,8 @@ static int applesmc_create_modules(struct applesmc_device *smc) + applesmc_destroy_nodes(smc, temp_group); + out_fans: + applesmc_destroy_nodes(smc, fan_group); ++out_bclm: ++ applesmc_destroy_nodes(smc, BCLM_group); + out_info: + applesmc_destroy_nodes(smc, info_group); + out: +@@ -1883,6 +1922,7 @@ static void applesmc_destroy_modules(struct applesmc_device *smc) + applesmc_release_accelerometer(smc); + applesmc_destroy_nodes(smc, temp_group); + applesmc_destroy_nodes(smc, fan_group); ++ applesmc_destroy_nodes(smc, BCLM_group); + applesmc_destroy_nodes(smc, info_group); + } + +-- +2.39.1 + +From 327e6e1d0f6e8db68c124dff4d6a326b381ccedb Mon Sep 17 00:00:00 2001 +From: Aun-Ali Zaidi +Date: Wed, 23 Mar 2022 17:12:21 +0530 +Subject: [PATCH] Input: bcm5974 - Add support for the T2 Macs + +--- + drivers/input/mouse/bcm5974.c | 138 ++++++++++++++++++++++++++++++++++ + 1 file changed, 138 insertions(+) + +diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c +index 59a14505b..88f17f21a 100644 +--- a/drivers/input/mouse/bcm5974.c ++++ b/drivers/input/mouse/bcm5974.c +@@ -83,6 +83,24 @@ + #define USB_DEVICE_ID_APPLE_WELLSPRING9_ISO 0x0273 + #define USB_DEVICE_ID_APPLE_WELLSPRING9_JIS 0x0274 + ++/* T2-Attached Devices */ ++/* MacbookAir8,1 (2018) */ ++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K 0x027a ++/* MacbookPro15,2 (2018) */ ++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132 0x027b ++/* MacbookPro15,1 (2018) */ ++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680 0x027c ++/* MacbookPro15,4 (2019) */ ++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213 0x027d ++/* MacbookPro16,2 (2020) */ ++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K 0x027e ++/* MacbookPro16,3 (2020) */ ++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223 0x027f ++/* MacbookAir9,1 (2020) */ ++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K 0x0280 ++/* MacbookPro16,1 (2019)*/ ++#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F 0x0340 ++ + #define BCM5974_DEVICE(prod) { \ + .match_flags = (USB_DEVICE_ID_MATCH_DEVICE | \ + USB_DEVICE_ID_MATCH_INT_CLASS | \ +@@ -147,6 +165,22 @@ static const struct usb_device_id bcm5974_table[] = { + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_ISO), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING9_JIS), ++ /* MacbookAir8,1 */ ++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K), ++ /* MacbookPro15,2 */ ++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132), ++ /* MacbookPro15,1 */ ++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680), ++ /* MacbookPro15,4 */ ++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213), ++ /* MacbookPro16,2 */ ++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K), ++ /* MacbookPro16,3 */ ++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223), ++ /* MacbookAir9,1 */ ++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K), ++ /* MacbookPro16,1 */ ++ BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F), + /* Terminating entry */ + {} + }; +@@ -483,6 +517,110 @@ static const struct bcm5974_config bcm5974_config_table[] = { + { SN_COORD, -203, 6803 }, + { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } + }, ++ { ++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K, ++ 0, ++ 0, ++ HAS_INTEGRATED_BUTTON, ++ 0, sizeof(struct bt_data), ++ 0x83, DATAFORMAT(TYPE4), ++ { SN_PRESSURE, 0, 300 }, ++ { SN_WIDTH, 0, 2048 }, ++ { SN_COORD, -6243, 6749 }, ++ { SN_COORD, -170, 7685 }, ++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } ++ }, ++ { ++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132, ++ 0, ++ 0, ++ HAS_INTEGRATED_BUTTON, ++ 0, sizeof(struct bt_data), ++ 0x83, DATAFORMAT(TYPE4), ++ { SN_PRESSURE, 0, 300 }, ++ { SN_WIDTH, 0, 2048 }, ++ { SN_COORD, -6243, 6749 }, ++ { SN_COORD, -170, 7685 }, ++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } ++ }, ++ { ++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680, ++ 0, ++ 0, ++ HAS_INTEGRATED_BUTTON, ++ 0, sizeof(struct bt_data), ++ 0x83, DATAFORMAT(TYPE4), ++ { SN_PRESSURE, 0, 300 }, ++ { SN_WIDTH, 0, 2048 }, ++ { SN_COORD, -7456, 7976 }, ++ { SN_COORD, -1768, 7685 }, ++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } ++ }, ++ { ++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213, ++ 0, ++ 0, ++ HAS_INTEGRATED_BUTTON, ++ 0, sizeof(struct bt_data), ++ 0x83, DATAFORMAT(TYPE4), ++ { SN_PRESSURE, 0, 300 }, ++ { SN_WIDTH, 0, 2048 }, ++ { SN_COORD, -6243, 6749 }, ++ { SN_COORD, -170, 7685 }, ++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } ++ }, ++ { ++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K, ++ 0, ++ 0, ++ HAS_INTEGRATED_BUTTON, ++ 0, sizeof(struct bt_data), ++ 0x83, DATAFORMAT(TYPE4), ++ { SN_PRESSURE, 0, 300 }, ++ { SN_WIDTH, 0, 2048 }, ++ { SN_COORD, -7823, 8329 }, ++ { SN_COORD, -370, 7925 }, ++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } ++ }, ++ { ++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223, ++ 0, ++ 0, ++ HAS_INTEGRATED_BUTTON, ++ 0, sizeof(struct bt_data), ++ 0x83, DATAFORMAT(TYPE4), ++ { SN_PRESSURE, 0, 300 }, ++ { SN_WIDTH, 0, 2048 }, ++ { SN_COORD, -6243, 6749 }, ++ { SN_COORD, -170, 7685 }, ++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } ++ }, ++ { ++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K, ++ 0, ++ 0, ++ HAS_INTEGRATED_BUTTON, ++ 0, sizeof(struct bt_data), ++ 0x83, DATAFORMAT(TYPE4), ++ { SN_PRESSURE, 0, 300 }, ++ { SN_WIDTH, 0, 2048 }, ++ { SN_COORD, -6243, 6749 }, ++ { SN_COORD, -170, 7685 }, ++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } ++ }, ++ { ++ USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F, ++ 0, ++ 0, ++ HAS_INTEGRATED_BUTTON, ++ 0, sizeof(struct bt_data), ++ 0x83, DATAFORMAT(TYPE4), ++ { SN_PRESSURE, 0, 300 }, ++ { SN_WIDTH, 0, 2048 }, ++ { SN_COORD, -8916, 9918 }, ++ { SN_COORD, -1934, 9835 }, ++ { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } ++ }, + {} + }; + +-- +2.25.1 + +From: Ashish Arora +Subject: Re: [PATCH] drm/i915: Discard large BIOS framebuffers causing display corruption. +Date: Sat, 08 Jan 2022 21:43:18 +1100 + +On certain 4k panels, the BIOS framebuffer is larger than what panel +requires causing display corruption. Introduce a check for the same. + +Signed-off-by: Ashish Arora +--- + drivers/gpu/drm/i915/display/intel_fbdev.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c +index 112aa04..8fb8bcc 100644 +--- a/drivers/gpu/drm/i915/display/intel_fbdev.c ++++ b/drivers/gpu/drm/i915/display/intel_fbdev.c +@@ -217,10 +217,10 @@ static int intelfb_create(struct drm_fb_helper *helper, + return ret; + + if (intel_fb && +- (sizes->fb_width > intel_fb->base.width || +- sizes->fb_height > intel_fb->base.height)) { ++ (sizes->fb_width != intel_fb->base.width || ++ sizes->fb_height != intel_fb->base.height)) { + drm_dbg_kms(&dev_priv->drm, +- "BIOS fb too small (%dx%d), we require (%dx%d)," ++ "BIOS fb not valid (%dx%d), we require (%dx%d)," + " releasing it\n", + intel_fb->base.width, intel_fb->base.height, + sizes->fb_width, sizes->fb_height); +-- +1.8.3.1 + +From 3d4a4a3d62815f90fc65a827a3e2de96c4571350 Mon Sep 17 00:00:00 2001 +From: Orlando Chamberlain +Date: Mon, 20 Nov 2023 10:32:23 +1100 +Subject: [PATCH 1/1] acpi video: force native for some T2 macbooks + +The intel backlight is needed for these. + +MacBookPro15,2/4 or MacBookPro16,3 or MacBookAir8,1/2 might also need +this so I'm not going to be submitting this upstream yet + +mbp16,3 was reported not to have this issue for 6.5.8 at least. +--- + drivers/acpi/video_detect.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c +index 442396f6ed1f..baf7264d7b94 100644 +--- a/drivers/acpi/video_detect.c ++++ b/drivers/acpi/video_detect.c +@@ -513,6 +513,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { + DMI_MATCH(DMI_PRODUCT_NAME, "iMac12,2"), + }, + }, ++ { ++ .callback = video_detect_force_native, ++ /* Apple MacBook Air 9,1 */ ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir9,1"), ++ }, ++ }, + { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1217249 */ + .callback = video_detect_force_native, +@@ -522,6 +530,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { + DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro12,1"), + }, + }, ++ { ++ .callback = video_detect_force_native, ++ /* Apple MacBook Pro 16,2 */ ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro16,2"), ++ }, ++ }, + { + .callback = video_detect_force_native, + /* Dell Inspiron N4010 */ +-- +2.42.1 + +From 923cfe9b86c71761b164f995631817e9af169f29 Mon Sep 17 00:00:00 2001 +From: Hector Martin +Date: Tue, 14 Feb 2023 18:33:19 +0900 +Subject: [PATCH] brcmfmac: cfg80211: Use WSEC to set SAE password + +Using the WSEC command instead of sae_password seems to be the supported +mechanism on newer firmware, and also how the brcmdhd driver does it. + +Signed-off-by: Hector Martin +--- + .../broadcom/brcm80211/brcmfmac/cfg80211.c | 46 ++++++++----------- + .../broadcom/brcm80211/brcmfmac/fwil_types.h | 2 +- + 2 files changed, 20 insertions(+), 28 deletions(-) + +diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +index 87f4d53fb..7ccdbafca 100644 +--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c ++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +@@ -1682,52 +1682,44 @@ static u16 brcmf_map_fw_linkdown_reason(const struct brcmf_event_msg *e) + return reason; + } + +-static int brcmf_set_pmk(struct brcmf_if *ifp, const u8 *pmk_data, u16 pmk_len) ++static int brcmf_set_wsec(struct brcmf_if *ifp, const u8 *key, u16 key_len, u16 flags) + { + struct brcmf_pub *drvr = ifp->drvr; + struct brcmf_wsec_pmk_le pmk; + int err; + ++ if (key_len > sizeof(pmk.key)) { ++ bphy_err(drvr, "key must be less than %zu bytes\n", ++ sizeof(pmk.key)); ++ return -EINVAL; ++ } ++ + memset(&pmk, 0, sizeof(pmk)); + +- /* pass pmk directly */ +- pmk.key_len = cpu_to_le16(pmk_len); +- pmk.flags = cpu_to_le16(0); +- memcpy(pmk.key, pmk_data, pmk_len); ++ /* pass key material directly */ ++ pmk.key_len = cpu_to_le16(key_len); ++ pmk.flags = cpu_to_le16(flags); ++ memcpy(pmk.key, key, key_len); + +- /* store psk in firmware */ ++ /* store key material in firmware */ + err = brcmf_fil_cmd_data_set(ifp, BRCMF_C_SET_WSEC_PMK, + &pmk, sizeof(pmk)); + if (err < 0) + bphy_err(drvr, "failed to change PSK in firmware (len=%u)\n", +- pmk_len); ++ key_len); + + return err; + } + ++static int brcmf_set_pmk(struct brcmf_if *ifp, const u8 *pmk_data, u16 pmk_len) ++{ ++ return brcmf_set_wsec(ifp, pmk_data, pmk_len, 0); ++} ++ + static int brcmf_set_sae_password(struct brcmf_if *ifp, const u8 *pwd_data, + u16 pwd_len) + { +- struct brcmf_pub *drvr = ifp->drvr; +- struct brcmf_wsec_sae_pwd_le sae_pwd; +- int err; +- +- if (pwd_len > BRCMF_WSEC_MAX_SAE_PASSWORD_LEN) { +- bphy_err(drvr, "sae_password must be less than %d\n", +- BRCMF_WSEC_MAX_SAE_PASSWORD_LEN); +- return -EINVAL; +- } +- +- sae_pwd.key_len = cpu_to_le16(pwd_len); +- memcpy(sae_pwd.key, pwd_data, pwd_len); +- +- err = brcmf_fil_iovar_data_set(ifp, "sae_password", &sae_pwd, +- sizeof(sae_pwd)); +- if (err < 0) +- bphy_err(drvr, "failed to set SAE password in firmware (len=%u)\n", +- pwd_len); +- +- return err; ++ return brcmf_set_wsec(ifp, pwd_data, pwd_len, BRCMF_WSEC_PASSPHRASE); + } + + static void brcmf_link_down(struct brcmf_cfg80211_vif *vif, u16 reason, +diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h +index 792adaf88..3ba90878c 100644 +--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h ++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h +@@ -574,7 +574,7 @@ struct brcmf_wsec_key_le { + struct brcmf_wsec_pmk_le { + __le16 key_len; + __le16 flags; +- u8 key[2 * BRCMF_WSEC_MAX_PSK_LEN + 1]; ++ u8 key[BRCMF_WSEC_MAX_SAE_PASSWORD_LEN]; + }; + + /** +-- +2.37.2 + +From patchwork Wed Dec 27 10:10:03 2023 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +X-Patchwork-Submitter: Johan Hovold +X-Patchwork-Id: 13505281 +Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org + [10.30.226.201]) + (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) + (No client certificate requested) + by smtp.subspace.kernel.org (Postfix) with ESMTPS id 731F42D602; + Wed, 27 Dec 2023 10:10:50 +0000 (UTC) +Authentication-Results: smtp.subspace.kernel.org; + dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org + header.b="OE5gY3Rg" +Received: by smtp.kernel.org (Postfix) with ESMTPSA id E1F71C433C8; + Wed, 27 Dec 2023 10:10:49 +0000 (UTC) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; + s=k20201202; t=1703671849; + bh=HNX2qe5wIUjgAOE0bih0cYXbYbw77i5qRYAGTFhWf8Q=; + h=From:To:Cc:Subject:Date:From; + b=OE5gY3RgSNMMNpr/DemitpLvv8B5KUxkea+huKa97KhEilNAbl/OG/gZPSswoI3kl + ifwN2LiGgFt8jyQh8hVsCoIrrOOGgiqeJ9ivyZI86fxAmaICglCBVc65vzpPozQdYn + YsryqO/D6A6i3egHRr7G52DifE/DihYN9uZqhAIHTY+ESsr/mJvwodvV8HNt60TaF9 + dFeWSj4rAgt/QaclFNs1wznkamzzJ3UloOq2NJbzC3F6ILEsWfuPRm8iKBlgwdNTZ+ + bn4JmN3Zh0Mr2uaTVg902uWeLcZ93sY9BmqH1AOBDEXTlUvPd7n6xVrSnOLfdlTR5k + O5JKLTyyjvyTQ== +Received: from johan by xi.lan with local (Exim 4.96.2) + (envelope-from ) + id 1rIQs7-0002kc-0u; + Wed, 27 Dec 2023 11:10:44 +0100 +From: Johan Hovold +To: Luiz Augusto von Dentz , + Marcel Holtmann , + Johan Hedberg +Cc: Hector Martin , + Sven Peter , + Alyssa Rosenzweig , + asahi@lists.linux.dev, + linux-arm-kernel@lists.infradead.org, + linux-bluetooth@vger.kernel.org, + linux-kernel@vger.kernel.org, + Johan Hovold , + stable@vger.kernel.org, + Felix Zhang +Subject: [PATCH] Bluetooth: hci_bcm4377: do not mark valid bd_addr as invalid +Date: Wed, 27 Dec 2023 11:10:03 +0100 +Message-ID: <20231227101003.10534-1-johan+linaro@kernel.org> +X-Mailer: git-send-email 2.41.0 +Precedence: bulk +X-Mailing-List: linux-bluetooth@vger.kernel.org +List-Id: +List-Subscribe: +List-Unsubscribe: +MIME-Version: 1.0 + +A recent commit restored the original (and still documented) semantics +for the HCI_QUIRK_USE_BDADDR_PROPERTY quirk so that the device address +is considered invalid unless an address is provided by firmware. + +This specifically means that this flag must only be set for devices with +invalid addresses, but the Broadcom BCM4377 driver has so far been +setting this flag unconditionally. + +Fortunately the driver already checks for invalid addresses during setup +and sets the HCI_QUIRK_INVALID_BDADDR flag, which can simply be replaced +with HCI_QUIRK_USE_BDADDR_PROPERTY to indicate that the default address +is invalid but can be overridden by firmware (long term, this should +probably just always be allowed). + +Fixes: 6945795bc81a ("Bluetooth: fix use-bdaddr-property quirk") +Cc: stable@vger.kernel.org # 6.5 +Reported-by: Felix Zhang +Link: https://lore.kernel.org/r/77419ffacc5b4875e920e038332575a2a5bff29f.camel@mrman314.tech/ +Signed-off-by: Johan Hovold +Reported-by: Felix Zhang +--- + drivers/bluetooth/hci_bcm4377.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c +index a61757835695..9a7243d5db71 100644 +--- a/drivers/bluetooth/hci_bcm4377.c ++++ b/drivers/bluetooth/hci_bcm4377.c +@@ -1417,7 +1417,7 @@ static int bcm4377_check_bdaddr(struct bcm4377_data *bcm4377) + + bda = (struct hci_rp_read_bd_addr *)skb->data; + if (!bcm4377_is_valid_bdaddr(bcm4377, &bda->bdaddr)) +- set_bit(HCI_QUIRK_INVALID_BDADDR, &bcm4377->hdev->quirks); ++ set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &bcm4377->hdev->quirks); + + kfree_skb(skb); + return 0; +@@ -2368,7 +2368,6 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id) + hdev->set_bdaddr = bcm4377_hci_set_bdaddr; + hdev->setup = bcm4377_hci_setup; + +- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + if (bcm4377->hw->broken_mws_transport_config) + set_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &hdev->quirks); + if (bcm4377->hw->broken_ext_scan) diff --git a/SOURCES/tkg-BBRv2.patch b/SOURCES/tkg-BBRv2.patch deleted file mode 100644 index 35640e3..0000000 --- a/SOURCES/tkg-BBRv2.patch +++ /dev/null @@ -1,3311 +0,0 @@ -From eff7e1edf2fec63bac1a81f8c86295dd3f48422a Mon Sep 17 00:00:00 2001 -From: Oleksandr Natalenko -Date: Mon, 4 Apr 2022 08:23:19 +0200 -Subject: [PATCH] tcp_bbr2: introduce BBRv2 - -Signed-off-by: Oleksandr Natalenko ---- - include/linux/tcp.h | 3 +- - include/net/inet_connection_sock.h | 3 +- - include/net/tcp.h | 41 +- - include/uapi/linux/inet_diag.h | 33 + - net/ipv4/Kconfig | 22 + - net/ipv4/Makefile | 1 + - net/ipv4/tcp.c | 1 + - net/ipv4/tcp_bbr.c | 38 +- - net/ipv4/tcp_bbr2.c | 2674 ++++++++++++++++++++++++++++ - net/ipv4/tcp_cong.c | 1 + - net/ipv4/tcp_input.c | 27 +- - net/ipv4/tcp_output.c | 26 +- - net/ipv4/tcp_rate.c | 30 +- - net/ipv4/tcp_timer.c | 1 + - 14 files changed, 2867 insertions(+), 34 deletions(-) - create mode 100644 net/ipv4/tcp_bbr2.c - -diff --git a/include/linux/tcp.h b/include/linux/tcp.h -index 41b1da621a45..d8f94ef1a297 100644 ---- a/include/linux/tcp.h -+++ b/include/linux/tcp.h -@@ -255,7 +255,8 @@ struct tcp_sock { - u8 compressed_ack; - u8 dup_ack_counter:2, - tlp_retrans:1, /* TLP is a retransmission */ -- unused:5; -+ fast_ack_mode:2, /* which fast ack mode ? */ -+ unused:3; - u32 chrono_start; /* Start time in jiffies of a TCP chrono */ - u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ - u8 chrono_type:2, /* current chronograph type */ -diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h -index c2b15f7e5516..d85858efa571 100644 ---- a/include/net/inet_connection_sock.h -+++ b/include/net/inet_connection_sock.h -@@ -135,7 +135,8 @@ struct inet_connection_sock { - u32 icsk_probes_tstamp; - u32 icsk_user_timeout; - -- u64 icsk_ca_priv[104 / sizeof(u64)]; -+/* XXX inflated by temporary internal debugging info */ -+ u64 icsk_ca_priv[216 / sizeof(u64)]; - #define ICSK_CA_PRIV_SIZE sizeof_field(struct inet_connection_sock, icsk_ca_priv) - }; - -diff --git a/include/net/tcp.h b/include/net/tcp.h -index 14d45661a84d..7261fae79403 100644 ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -375,6 +375,7 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, - #define TCP_ECN_QUEUE_CWR 2 - #define TCP_ECN_DEMAND_CWR 4 - #define TCP_ECN_SEEN 8 -+#define TCP_ECN_ECT_PERMANENT 16 - - enum tcp_tw_status { - TCP_TW_SUCCESS = 0, -@@ -823,6 +824,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) - return max_t(s64, t1 - t0, 0); - } - -+static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0) -+{ -+ return max_t(s32, t1 - t0, 0); -+} -+ - static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) - { - return tcp_ns_to_ts(skb->skb_mstamp_ns); -@@ -898,9 +904,14 @@ struct tcp_skb_cb { - /* pkts S/ACKed so far upon tx of skb, incl retrans: */ - __u32 delivered; - /* start of send pipeline phase */ -- u64 first_tx_mstamp; -+ u32 first_tx_mstamp; - /* when we reached the "delivered" count */ -- u64 delivered_mstamp; -+ u32 delivered_mstamp; -+#define TCPCB_IN_FLIGHT_BITS 20 -+#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1) -+ u32 in_flight:20, /* packets in flight at transmit */ -+ unused2:12; -+ u32 lost; /* packets lost so far upon tx of skb */ - } tx; /* only used for outgoing skbs */ - union { - struct inet_skb_parm h4; -@@ -1026,7 +1037,11 @@ enum tcp_ca_ack_event_flags { - #define TCP_CONG_NON_RESTRICTED 0x1 - /* Requires ECN/ECT set on all packets */ - #define TCP_CONG_NEEDS_ECN 0x2 --#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) -+/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */ -+#define TCP_CONG_WANTS_CE_EVENTS 0x4 -+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \ -+ TCP_CONG_NEEDS_ECN | \ -+ TCP_CONG_WANTS_CE_EVENTS) - - union tcp_cc_info; - -@@ -1046,8 +1061,11 @@ struct ack_sample { - */ - struct rate_sample { - u64 prior_mstamp; /* starting timestamp for interval */ -+ u32 prior_lost; /* tp->lost at "prior_mstamp" */ - u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ - u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */ -+ u32 tx_in_flight; /* packets in flight at starting timestamp */ -+ s32 lost; /* number of packets lost over interval */ - s32 delivered; /* number of packets delivered over interval */ - s32 delivered_ce; /* number of packets delivered w/ CE marks*/ - long interval_us; /* time for tp->delivered to incr "delivered" */ -@@ -1061,6 +1079,7 @@ struct rate_sample { - bool is_app_limited; /* is sample from packet with bubble in pipe? */ - bool is_retrans; /* is sample from retransmission? */ - bool is_ack_delayed; /* is this (likely) a delayed ACK? */ -+ bool is_ece; /* did this ACK have ECN marked? */ - }; - - struct tcp_congestion_ops { -@@ -1084,8 +1103,11 @@ struct tcp_congestion_ops { - /* hook for packet ack accounting (optional) */ - void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - -- /* override sysctl_tcp_min_tso_segs */ -- u32 (*min_tso_segs)(struct sock *sk); -+ /* pick target number of segments per TSO/GSO skb (optional): */ -+ u32 (*tso_segs)(struct sock *sk, unsigned int mss_now); -+ -+ /* react to a specific lost skb (optional) */ -+ void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb); - - /* call when packets are delivered to update cwnd and pacing rate, - * after all the ca_state processing. (optional) -@@ -1148,6 +1170,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer) - } - #endif - -+static inline bool tcp_ca_wants_ce_events(const struct sock *sk) -+{ -+ const struct inet_connection_sock *icsk = inet_csk(sk); -+ -+ return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN | -+ TCP_CONG_WANTS_CE_EVENTS); -+} -+ - static inline bool tcp_ca_needs_ecn(const struct sock *sk) - { - const struct inet_connection_sock *icsk = inet_csk(sk); -@@ -1167,6 +1197,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) - void tcp_set_ca_state(struct sock *sk, const u8 ca_state); - - /* From tcp_rate.c */ -+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb); - void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); - void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, - struct rate_sample *rs); -diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h -index 50655de04c9b..0e24f11627d5 100644 ---- a/include/uapi/linux/inet_diag.h -+++ b/include/uapi/linux/inet_diag.h -@@ -231,9 +231,42 @@ struct tcp_bbr_info { - __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ - }; - -+/* Phase as reported in netlink/ss stats. */ -+enum tcp_bbr2_phase { -+ BBR2_PHASE_INVALID = 0, -+ BBR2_PHASE_STARTUP = 1, -+ BBR2_PHASE_DRAIN = 2, -+ BBR2_PHASE_PROBE_RTT = 3, -+ BBR2_PHASE_PROBE_BW_UP = 4, -+ BBR2_PHASE_PROBE_BW_DOWN = 5, -+ BBR2_PHASE_PROBE_BW_CRUISE = 6, -+ BBR2_PHASE_PROBE_BW_REFILL = 7 -+}; -+ -+struct tcp_bbr2_info { -+ /* u64 bw: bandwidth (app throughput) estimate in Byte per sec: */ -+ __u32 bbr_bw_lsb; /* lower 32 bits of bw */ -+ __u32 bbr_bw_msb; /* upper 32 bits of bw */ -+ __u32 bbr_min_rtt; /* min-filtered RTT in uSec */ -+ __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */ -+ __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ -+ __u32 bbr_bw_hi_lsb; /* lower 32 bits of bw_hi */ -+ __u32 bbr_bw_hi_msb; /* upper 32 bits of bw_hi */ -+ __u32 bbr_bw_lo_lsb; /* lower 32 bits of bw_lo */ -+ __u32 bbr_bw_lo_msb; /* upper 32 bits of bw_lo */ -+ __u8 bbr_mode; /* current bbr_mode in state machine */ -+ __u8 bbr_phase; /* current state machine phase */ -+ __u8 unused1; /* alignment padding; not used yet */ -+ __u8 bbr_version; /* MUST be at this offset in struct */ -+ __u32 bbr_inflight_lo; /* lower/short-term data volume bound */ -+ __u32 bbr_inflight_hi; /* higher/long-term data volume bound */ -+ __u32 bbr_extra_acked; /* max excess packets ACKed in epoch */ -+}; -+ - union tcp_cc_info { - struct tcpvegas_info vegas; - struct tcp_dctcp_info dctcp; - struct tcp_bbr_info bbr; -+ struct tcp_bbr2_info bbr2; - }; - #endif /* _UAPI_INET_DIAG_H_ */ -diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig -index 2dfb12230f08..b6bec331a82e 100644 ---- a/net/ipv4/Kconfig -+++ b/net/ipv4/Kconfig -@@ -678,6 +678,24 @@ config TCP_CONG_BBR - AQM schemes that do not provide a delay signal. It requires the fq - ("Fair Queue") pacing packet scheduler. - -+config TCP_CONG_BBR2 -+ tristate "BBR2 TCP" -+ default n -+ help -+ -+ BBR2 TCP congestion control is a model-based congestion control -+ algorithm that aims to maximize network utilization, keep queues and -+ retransmit rates low, and to be able to coexist with Reno/CUBIC in -+ common scenarios. It builds an explicit model of the network path. It -+ tolerates a targeted degree of random packet loss and delay that are -+ unrelated to congestion. It can operate over LAN, WAN, cellular, wifi, -+ or cable modem links, and can use DCTCP-L4S-style ECN signals. It can -+ coexist with flows that use loss-based congestion control, and can -+ operate with shallow buffers, deep buffers, bufferbloat, policers, or -+ AQM schemes that do not provide a delay signal. It requires pacing, -+ using either TCP internal pacing or the fq ("Fair Queue") pacing packet -+ scheduler. -+ - choice - prompt "Default TCP congestion control" - default DEFAULT_CUBIC -@@ -715,6 +733,9 @@ choice - config DEFAULT_BBR - bool "BBR" if TCP_CONG_BBR=y - -+ config DEFAULT_BBR2 -+ bool "BBR2" if TCP_CONG_BBR2=y -+ - config DEFAULT_RENO - bool "Reno" - endchoice -@@ -739,6 +760,7 @@ config DEFAULT_TCP_CONG - default "dctcp" if DEFAULT_DCTCP - default "cdg" if DEFAULT_CDG - default "bbr" if DEFAULT_BBR -+ default "bbr2" if DEFAULT_BBR2 - default "cubic" - - config TCP_MD5SIG -diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile -index bbdd9c44f14e..8dee1547d820 100644 ---- a/net/ipv4/Makefile -+++ b/net/ipv4/Makefile -@@ -46,6 +46,7 @@ obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o - obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o - obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o - obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o -+obj-$(CONFIG_TCP_CONG_BBR2) += tcp_bbr2.o - obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o - obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o - obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o -diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c -index 4f2205756cfe..c139747666dd 100644 ---- a/net/ipv4/tcp.c -+++ b/net/ipv4/tcp.c -@@ -3188,6 +3188,7 @@ int tcp_disconnect(struct sock *sk, int flags) - tp->rx_opt.dsack = 0; - tp->rx_opt.num_sacks = 0; - tp->rcv_ooopack = 0; -+ tp->fast_ack_mode = 0; - - - /* Clean up fastopen related fields */ -diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c -index 54eec33c6e1c..bfbf158c71f4 100644 ---- a/net/ipv4/tcp_bbr.c -+++ b/net/ipv4/tcp_bbr.c -@@ -294,26 +294,40 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) - sk->sk_pacing_rate = rate; - } - --/* override sysctl_tcp_min_tso_segs */ - __bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk) - { - return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; - } - -+/* Return the number of segments BBR would like in a TSO/GSO skb, given -+ * a particular max gso size as a constraint. -+ */ -+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, -+ u32 gso_max_size) -+{ -+ u32 segs; -+ u64 bytes; -+ -+ /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ -+ bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift; -+ -+ bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); -+ segs = max_t(u32, div_u64(bytes, mss_now), bbr_min_tso_segs(sk)); -+ return segs; -+} -+ -+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ -+static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) -+{ -+ return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); -+} -+ -+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */ - static u32 bbr_tso_segs_goal(struct sock *sk) - { - struct tcp_sock *tp = tcp_sk(sk); -- u32 segs, bytes; -- -- /* Sort of tcp_tso_autosize() but ignoring -- * driver provided sk_gso_max_size. -- */ -- bytes = min_t(unsigned long, -- sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), -- GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER); -- segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); - -- return min(segs, 0x7FU); -+ return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_LEGACY_MAX_SIZE); - } - - /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ -@@ -1149,7 +1163,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { - .undo_cwnd = bbr_undo_cwnd, - .cwnd_event = bbr_cwnd_event, - .ssthresh = bbr_ssthresh, -- .min_tso_segs = bbr_min_tso_segs, -+ .tso_segs = bbr_tso_segs, - .get_info = bbr_get_info, - .set_state = bbr_set_state, - }; -diff --git a/net/ipv4/tcp_bbr2.c b/net/ipv4/tcp_bbr2.c -new file mode 100644 -index 000000000000..488429f0f3d0 ---- /dev/null -+++ b/net/ipv4/tcp_bbr2.c -@@ -0,0 +1,2674 @@ -+/* BBR (Bottleneck Bandwidth and RTT) congestion control, v2 -+ * -+ * BBRv2 is a model-based congestion control algorithm that aims for low -+ * queues, low loss, and (bounded) Reno/CUBIC coexistence. To maintain a model -+ * of the network path, it uses measurements of bandwidth and RTT, as well as -+ * (if they occur) packet loss and/or DCTCP/L4S-style ECN signals. Note that -+ * although it can use ECN or loss signals explicitly, it does not require -+ * either; it can bound its in-flight data based on its estimate of the BDP. -+ * -+ * The model has both higher and lower bounds for the operating range: -+ * lo: bw_lo, inflight_lo: conservative short-term lower bound -+ * hi: bw_hi, inflight_hi: robust long-term upper bound -+ * The bandwidth-probing time scale is (a) extended dynamically based on -+ * estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by -+ * an interactive wall-clock time-scale to be more scalable and responsive -+ * than Reno and CUBIC. -+ * -+ * Here is a state transition diagram for BBR: -+ * -+ * | -+ * V -+ * +---> STARTUP ----+ -+ * | | | -+ * | V | -+ * | DRAIN ----+ -+ * | | | -+ * | V | -+ * +---> PROBE_BW ----+ -+ * | ^ | | -+ * | | | | -+ * | +----+ | -+ * | | -+ * +---- PROBE_RTT <--+ -+ * -+ * A BBR flow starts in STARTUP, and ramps up its sending rate quickly. -+ * When it estimates the pipe is full, it enters DRAIN to drain the queue. -+ * In steady state a BBR flow only uses PROBE_BW and PROBE_RTT. -+ * A long-lived BBR flow spends the vast majority of its time remaining -+ * (repeatedly) in PROBE_BW, fully probing and utilizing the pipe's bandwidth -+ * in a fair manner, with a small, bounded queue. *If* a flow has been -+ * continuously sending for the entire min_rtt window, and hasn't seen an RTT -+ * sample that matches or decreases its min_rtt estimate for 10 seconds, then -+ * it briefly enters PROBE_RTT to cut inflight to a minimum value to re-probe -+ * the path's two-way propagation delay (min_rtt). When exiting PROBE_RTT, if -+ * we estimated that we reached the full bw of the pipe then we enter PROBE_BW; -+ * otherwise we enter STARTUP to try to fill the pipe. -+ * -+ * BBR is described in detail in: -+ * "BBR: Congestion-Based Congestion Control", -+ * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh, -+ * Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016. -+ * -+ * There is a public e-mail list for discussing BBR development and testing: -+ * https://groups.google.com/forum/#!forum/bbr-dev -+ * -+ * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled, -+ * otherwise TCP stack falls back to an internal pacing using one high -+ * resolution timer per TCP socket and may use more resources. -+ */ -+#include -+#include -+#include -+#include -+#include -+ -+#include "tcp_dctcp.h" -+ -+/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth -+ * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps. -+ * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32. -+ * Since the minimum window is >=4 packets, the lower bound isn't -+ * an issue. The upper bound isn't an issue with existing technologies. -+ */ -+#define BW_SCALE 24 -+#define BW_UNIT (1 << BW_SCALE) -+ -+#define BBR_SCALE 8 /* scaling factor for fractions in BBR (e.g. gains) */ -+#define BBR_UNIT (1 << BBR_SCALE) -+ -+#define FLAG_DEBUG_VERBOSE 0x1 /* Verbose debugging messages */ -+#define FLAG_DEBUG_LOOPBACK 0x2 /* Do NOT skip loopback addr */ -+ -+#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ -+ -+/* BBR has the following modes for deciding how fast to send: */ -+enum bbr_mode { -+ BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */ -+ BBR_DRAIN, /* drain any queue created during startup */ -+ BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */ -+ BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */ -+}; -+ -+/* How does the incoming ACK stream relate to our bandwidth probing? */ -+enum bbr_ack_phase { -+ BBR_ACKS_INIT, /* not probing; not getting probe feedback */ -+ BBR_ACKS_REFILLING, /* sending at est. bw to fill pipe */ -+ BBR_ACKS_PROBE_STARTING, /* inflight rising to probe bw */ -+ BBR_ACKS_PROBE_FEEDBACK, /* getting feedback from bw probing */ -+ BBR_ACKS_PROBE_STOPPING, /* stopped probing; still getting feedback */ -+}; -+ -+/* BBR congestion control block */ -+struct bbr { -+ u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */ -+ u32 min_rtt_stamp; /* timestamp of min_rtt_us */ -+ u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */ -+ u32 probe_rtt_min_us; /* min RTT in bbr_probe_rtt_win_ms window */ -+ u32 probe_rtt_min_stamp; /* timestamp of probe_rtt_min_us*/ -+ u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ -+ u32 prior_rcv_nxt; /* tp->rcv_nxt when CE state last changed */ -+ u64 cycle_mstamp; /* time of this cycle phase start */ -+ u32 mode:3, /* current bbr_mode in state machine */ -+ prev_ca_state:3, /* CA state on previous ACK */ -+ packet_conservation:1, /* use packet conservation? */ -+ round_start:1, /* start of packet-timed tx->ack round? */ -+ ce_state:1, /* If most recent data has CE bit set */ -+ bw_probe_up_rounds:5, /* cwnd-limited rounds in PROBE_UP */ -+ try_fast_path:1, /* can we take fast path? */ -+ unused2:11, -+ idle_restart:1, /* restarting after idle? */ -+ probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ -+ cycle_idx:3, /* current index in pacing_gain cycle array */ -+ has_seen_rtt:1; /* have we seen an RTT sample yet? */ -+ u32 pacing_gain:11, /* current gain for setting pacing rate */ -+ cwnd_gain:11, /* current gain for setting cwnd */ -+ full_bw_reached:1, /* reached full bw in Startup? */ -+ full_bw_cnt:2, /* number of rounds without large bw gains */ -+ init_cwnd:7; /* initial cwnd */ -+ u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ -+ u32 full_bw; /* recent bw, to estimate if pipe is full */ -+ -+ /* For tracking ACK aggregation: */ -+ u64 ack_epoch_mstamp; /* start of ACK sampling epoch */ -+ u16 extra_acked[2]; /* max excess data ACKed in epoch */ -+ u32 ack_epoch_acked:20, /* packets (S)ACKed in sampling epoch */ -+ extra_acked_win_rtts:5, /* age of extra_acked, in round trips */ -+ extra_acked_win_idx:1, /* current index in extra_acked array */ -+ /* BBR v2 state: */ -+ unused1:2, -+ startup_ecn_rounds:2, /* consecutive hi ECN STARTUP rounds */ -+ loss_in_cycle:1, /* packet loss in this cycle? */ -+ ecn_in_cycle:1; /* ECN in this cycle? */ -+ u32 loss_round_delivered; /* scb->tx.delivered ending loss round */ -+ u32 undo_bw_lo; /* bw_lo before latest losses */ -+ u32 undo_inflight_lo; /* inflight_lo before latest losses */ -+ u32 undo_inflight_hi; /* inflight_hi before latest losses */ -+ u32 bw_latest; /* max delivered bw in last round trip */ -+ u32 bw_lo; /* lower bound on sending bandwidth */ -+ u32 bw_hi[2]; /* upper bound of sending bandwidth range*/ -+ u32 inflight_latest; /* max delivered data in last round trip */ -+ u32 inflight_lo; /* lower bound of inflight data range */ -+ u32 inflight_hi; /* upper bound of inflight data range */ -+ u32 bw_probe_up_cnt; /* packets delivered per inflight_hi incr */ -+ u32 bw_probe_up_acks; /* packets (S)ACKed since inflight_hi incr */ -+ u32 probe_wait_us; /* PROBE_DOWN until next clock-driven probe */ -+ u32 ecn_eligible:1, /* sender can use ECN (RTT, handshake)? */ -+ ecn_alpha:9, /* EWMA delivered_ce/delivered; 0..256 */ -+ bw_probe_samples:1, /* rate samples reflect bw probing? */ -+ prev_probe_too_high:1, /* did last PROBE_UP go too high? */ -+ stopped_risky_probe:1, /* last PROBE_UP stopped due to risk? */ -+ rounds_since_probe:8, /* packet-timed rounds since probed bw */ -+ loss_round_start:1, /* loss_round_delivered round trip? */ -+ loss_in_round:1, /* loss marked in this round trip? */ -+ ecn_in_round:1, /* ECN marked in this round trip? */ -+ ack_phase:3, /* bbr_ack_phase: meaning of ACKs */ -+ loss_events_in_round:4,/* losses in STARTUP round */ -+ initialized:1; /* has bbr_init() been called? */ -+ u32 alpha_last_delivered; /* tp->delivered at alpha update */ -+ u32 alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */ -+ -+ /* Params configurable using setsockopt. Refer to correspoding -+ * module param for detailed description of params. -+ */ -+ struct bbr_params { -+ u32 high_gain:11, /* max allowed value: 2047 */ -+ drain_gain:10, /* max allowed value: 1023 */ -+ cwnd_gain:11; /* max allowed value: 2047 */ -+ u32 cwnd_min_target:4, /* max allowed value: 15 */ -+ min_rtt_win_sec:5, /* max allowed value: 31 */ -+ probe_rtt_mode_ms:9, /* max allowed value: 511 */ -+ full_bw_cnt:3, /* max allowed value: 7 */ -+ cwnd_tso_budget:1, /* allowed values: {0, 1} */ -+ unused3:6, -+ drain_to_target:1, /* boolean */ -+ precise_ece_ack:1, /* boolean */ -+ extra_acked_in_startup:1, /* allowed values: {0, 1} */ -+ fast_path:1; /* boolean */ -+ u32 full_bw_thresh:10, /* max allowed value: 1023 */ -+ startup_cwnd_gain:11, /* max allowed value: 2047 */ -+ bw_probe_pif_gain:9, /* max allowed value: 511 */ -+ usage_based_cwnd:1, /* boolean */ -+ unused2:1; -+ u16 probe_rtt_win_ms:14, /* max allowed value: 16383 */ -+ refill_add_inc:2; /* max allowed value: 3 */ -+ u16 extra_acked_gain:11, /* max allowed value: 2047 */ -+ extra_acked_win_rtts:5; /* max allowed value: 31*/ -+ u16 pacing_gain[CYCLE_LEN]; /* max allowed value: 1023 */ -+ /* Mostly BBR v2 parameters below here: */ -+ u32 ecn_alpha_gain:8, /* max allowed value: 255 */ -+ ecn_factor:8, /* max allowed value: 255 */ -+ ecn_thresh:8, /* max allowed value: 255 */ -+ beta:8; /* max allowed value: 255 */ -+ u32 ecn_max_rtt_us:19, /* max allowed value: 524287 */ -+ bw_probe_reno_gain:9, /* max allowed value: 511 */ -+ full_loss_cnt:4; /* max allowed value: 15 */ -+ u32 probe_rtt_cwnd_gain:8, /* max allowed value: 255 */ -+ inflight_headroom:8, /* max allowed value: 255 */ -+ loss_thresh:8, /* max allowed value: 255 */ -+ bw_probe_max_rounds:8; /* max allowed value: 255 */ -+ u32 bw_probe_rand_rounds:4, /* max allowed value: 15 */ -+ bw_probe_base_us:26, /* usecs: 0..2^26-1 (67 secs) */ -+ full_ecn_cnt:2; /* max allowed value: 3 */ -+ u32 bw_probe_rand_us:26, /* usecs: 0..2^26-1 (67 secs) */ -+ undo:1, /* boolean */ -+ tso_rtt_shift:4, /* max allowed value: 15 */ -+ unused5:1; -+ u32 ecn_reprobe_gain:9, /* max allowed value: 511 */ -+ unused1:14, -+ ecn_alpha_init:9; /* max allowed value: 256 */ -+ } params; -+ -+ struct { -+ u32 snd_isn; /* Initial sequence number */ -+ u32 rs_bw; /* last valid rate sample bw */ -+ u32 target_cwnd; /* target cwnd, based on BDP */ -+ u8 undo:1, /* Undo even happened but not yet logged */ -+ unused:7; -+ char event; /* single-letter event debug codes */ -+ u16 unused2; -+ } debug; -+}; -+ -+struct bbr_context { -+ u32 sample_bw; -+ u32 target_cwnd; -+ u32 log:1; -+}; -+ -+/* Window length of min_rtt filter (in sec). Max allowed value is 31 (0x1F) */ -+static u32 bbr_min_rtt_win_sec = 10; -+/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode. -+ * Max allowed value is 511 (0x1FF). -+ */ -+static u32 bbr_probe_rtt_mode_ms = 200; -+/* Window length of probe_rtt_min_us filter (in ms), and consequently the -+ * typical interval between PROBE_RTT mode entries. -+ * Note that bbr_probe_rtt_win_ms must be <= bbr_min_rtt_win_sec * MSEC_PER_SEC -+ */ -+static u32 bbr_probe_rtt_win_ms = 5000; -+/* Skip TSO below the following bandwidth (bits/sec): */ -+static int bbr_min_tso_rate = 1200000; -+ -+/* Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting -+ * in bigger TSO bursts. By default we cut the RTT-based allowance in half -+ * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance -+ * is below 1500 bytes after 6 * ~500 usec = 3ms. -+ */ -+static u32 bbr_tso_rtt_shift = 9; /* halve allowance per 2^9 usecs, 512us */ -+ -+/* Select cwnd TSO budget approach: -+ * 0: padding -+ * 1: flooring -+ */ -+static uint bbr_cwnd_tso_budget = 1; -+ -+/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. -+ * In order to help drive the network toward lower queues and low latency while -+ * maintaining high utilization, the average pacing rate aims to be slightly -+ * lower than the estimated bandwidth. This is an important aspect of the -+ * design. -+ */ -+static const int bbr_pacing_margin_percent = 1; -+ -+/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain -+ * that will allow a smoothly increasing pacing rate that will double each RTT -+ * and send the same number of packets per RTT that an un-paced, slow-starting -+ * Reno or CUBIC flow would. Max allowed value is 2047 (0x7FF). -+ */ -+static int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1; -+/* The gain for deriving startup cwnd. Max allowed value is 2047 (0x7FF). */ -+static int bbr_startup_cwnd_gain = BBR_UNIT * 2885 / 1000 + 1; -+/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain -+ * the queue created in BBR_STARTUP in a single round. Max allowed value -+ * is 1023 (0x3FF). -+ */ -+static int bbr_drain_gain = BBR_UNIT * 1000 / 2885; -+/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs. -+ * Max allowed value is 2047 (0x7FF). -+ */ -+static int bbr_cwnd_gain = BBR_UNIT * 2; -+/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw. -+ * Max allowed value for each element is 1023 (0x3FF). -+ */ -+enum bbr_pacing_gain_phase { -+ BBR_BW_PROBE_UP = 0, /* push up inflight to probe for bw/vol */ -+ BBR_BW_PROBE_DOWN = 1, /* drain excess inflight from the queue */ -+ BBR_BW_PROBE_CRUISE = 2, /* use pipe, w/ headroom in queue/pipe */ -+ BBR_BW_PROBE_REFILL = 3, /* v2: refill the pipe again to 100% */ -+}; -+static int bbr_pacing_gain[] = { -+ BBR_UNIT * 5 / 4, /* probe for more available bw */ -+ BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */ -+ BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */ -+ BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */ -+}; -+ -+/* Try to keep at least this many packets in flight, if things go smoothly. For -+ * smooth functioning, a sliding window protocol ACKing every other packet -+ * needs at least 4 packets in flight. Max allowed value is 15 (0xF). -+ */ -+static u32 bbr_cwnd_min_target = 4; -+ -+/* Cwnd to BDP proportion in PROBE_RTT mode scaled by BBR_UNIT. Default: 50%. -+ * Use 0 to disable. Max allowed value is 255. -+ */ -+static u32 bbr_probe_rtt_cwnd_gain = BBR_UNIT * 1 / 2; -+ -+/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */ -+/* If bw has increased significantly (1.25x), there may be more bw available. -+ * Max allowed value is 1023 (0x3FF). -+ */ -+static u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4; -+/* But after 3 rounds w/o significant bw growth, estimate pipe is full. -+ * Max allowed value is 7 (0x7). -+ */ -+static u32 bbr_full_bw_cnt = 3; -+ -+static u32 bbr_flags; /* Debugging related stuff */ -+ -+/* Whether to debug using printk. -+ */ -+static bool bbr_debug_with_printk; -+ -+/* Whether to debug using ftrace event tcp:tcp_bbr_event. -+ * Ignored when bbr_debug_with_printk is set. -+ */ -+static bool bbr_debug_ftrace; -+ -+/* Experiment: each cycle, try to hold sub-unity gain until inflight <= BDP. */ -+static bool bbr_drain_to_target = true; /* default: enabled */ -+ -+/* Experiment: Flags to control BBR with ECN behavior. -+ */ -+static bool bbr_precise_ece_ack = true; /* default: enabled */ -+ -+/* The max rwin scaling shift factor is 14 (RFC 1323), so the max sane rwin is -+ * (2^(16+14) B)/(1024 B/packet) = 1M packets. -+ */ -+static u32 bbr_cwnd_warn_val = 1U << 20; -+ -+static u16 bbr_debug_port_mask; -+ -+/* BBR module parameters. These are module parameters only in Google prod. -+ * Upstream these are intentionally not module parameters. -+ */ -+static int bbr_pacing_gain_size = CYCLE_LEN; -+ -+/* Gain factor for adding extra_acked to target cwnd: */ -+static int bbr_extra_acked_gain = 256; -+ -+/* Window length of extra_acked window. Max allowed val is 31. */ -+static u32 bbr_extra_acked_win_rtts = 5; -+ -+/* Max allowed val for ack_epoch_acked, after which sampling epoch is reset */ -+static u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20; -+ -+/* Time period for clamping cwnd increment due to ack aggregation */ -+static u32 bbr_extra_acked_max_us = 100 * 1000; -+ -+/* Use extra acked in startup ? -+ * 0: disabled -+ * 1: use latest extra_acked value from 1-2 rtt in startup -+ */ -+static int bbr_extra_acked_in_startup = 1; /* default: enabled */ -+ -+/* Experiment: don't grow cwnd beyond twice of what we just probed. */ -+static bool bbr_usage_based_cwnd; /* default: disabled */ -+ -+/* For lab testing, researchers can enable BBRv2 ECN support with this flag, -+ * when they know that any ECN marks that the connections experience will be -+ * DCTCP/L4S-style ECN marks, rather than RFC3168 ECN marks. -+ * TODO(ncardwell): Production use of the BBRv2 ECN functionality depends on -+ * negotiation or configuration that is outside the scope of the BBRv2 -+ * alpha release. -+ */ -+static bool bbr_ecn_enable = false; -+ -+module_param_named(min_tso_rate, bbr_min_tso_rate, int, 0644); -+module_param_named(tso_rtt_shift, bbr_tso_rtt_shift, int, 0644); -+module_param_named(high_gain, bbr_high_gain, int, 0644); -+module_param_named(drain_gain, bbr_drain_gain, int, 0644); -+module_param_named(startup_cwnd_gain, bbr_startup_cwnd_gain, int, 0644); -+module_param_named(cwnd_gain, bbr_cwnd_gain, int, 0644); -+module_param_array_named(pacing_gain, bbr_pacing_gain, int, -+ &bbr_pacing_gain_size, 0644); -+module_param_named(cwnd_min_target, bbr_cwnd_min_target, uint, 0644); -+module_param_named(probe_rtt_cwnd_gain, -+ bbr_probe_rtt_cwnd_gain, uint, 0664); -+module_param_named(cwnd_warn_val, bbr_cwnd_warn_val, uint, 0664); -+module_param_named(debug_port_mask, bbr_debug_port_mask, ushort, 0644); -+module_param_named(flags, bbr_flags, uint, 0644); -+module_param_named(debug_ftrace, bbr_debug_ftrace, bool, 0644); -+module_param_named(debug_with_printk, bbr_debug_with_printk, bool, 0644); -+module_param_named(min_rtt_win_sec, bbr_min_rtt_win_sec, uint, 0644); -+module_param_named(probe_rtt_mode_ms, bbr_probe_rtt_mode_ms, uint, 0644); -+module_param_named(probe_rtt_win_ms, bbr_probe_rtt_win_ms, uint, 0644); -+module_param_named(full_bw_thresh, bbr_full_bw_thresh, uint, 0644); -+module_param_named(full_bw_cnt, bbr_full_bw_cnt, uint, 0644); -+module_param_named(cwnd_tso_bduget, bbr_cwnd_tso_budget, uint, 0664); -+module_param_named(extra_acked_gain, bbr_extra_acked_gain, int, 0664); -+module_param_named(extra_acked_win_rtts, -+ bbr_extra_acked_win_rtts, uint, 0664); -+module_param_named(extra_acked_max_us, -+ bbr_extra_acked_max_us, uint, 0664); -+module_param_named(ack_epoch_acked_reset_thresh, -+ bbr_ack_epoch_acked_reset_thresh, uint, 0664); -+module_param_named(drain_to_target, bbr_drain_to_target, bool, 0664); -+module_param_named(precise_ece_ack, bbr_precise_ece_ack, bool, 0664); -+module_param_named(extra_acked_in_startup, -+ bbr_extra_acked_in_startup, int, 0664); -+module_param_named(usage_based_cwnd, bbr_usage_based_cwnd, bool, 0664); -+module_param_named(ecn_enable, bbr_ecn_enable, bool, 0664); -+ -+static void bbr2_exit_probe_rtt(struct sock *sk); -+static void bbr2_reset_congestion_signals(struct sock *sk); -+ -+static void bbr_check_probe_rtt_done(struct sock *sk); -+ -+/* Do we estimate that STARTUP filled the pipe? */ -+static bool bbr_full_bw_reached(const struct sock *sk) -+{ -+ const struct bbr *bbr = inet_csk_ca(sk); -+ -+ return bbr->full_bw_reached; -+} -+ -+/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ -+static u32 bbr_max_bw(const struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ return max(bbr->bw_hi[0], bbr->bw_hi[1]); -+} -+ -+/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */ -+static u32 bbr_bw(const struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ return min(bbr_max_bw(sk), bbr->bw_lo); -+} -+ -+/* Return maximum extra acked in past k-2k round trips, -+ * where k = bbr_extra_acked_win_rtts. -+ */ -+static u16 bbr_extra_acked(const struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ return max(bbr->extra_acked[0], bbr->extra_acked[1]); -+} -+ -+/* Return rate in bytes per second, optionally with a gain. -+ * The order here is chosen carefully to avoid overflow of u64. This should -+ * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. -+ */ -+static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain, -+ int margin) -+{ -+ unsigned int mss = tcp_sk(sk)->mss_cache; -+ -+ rate *= mss; -+ rate *= gain; -+ rate >>= BBR_SCALE; -+ rate *= USEC_PER_SEC / 100 * (100 - margin); -+ rate >>= BW_SCALE; -+ rate = max(rate, 1ULL); -+ return rate; -+} -+ -+static u64 bbr_bw_bytes_per_sec(struct sock *sk, u64 rate) -+{ -+ return bbr_rate_bytes_per_sec(sk, rate, BBR_UNIT, 0); -+} -+ -+static u64 bbr_rate_kbps(struct sock *sk, u64 rate) -+{ -+ rate = bbr_bw_bytes_per_sec(sk, rate); -+ rate *= 8; -+ do_div(rate, 1000); -+ return rate; -+} -+ -+static u32 bbr_tso_segs_goal(struct sock *sk); -+static void bbr_debug(struct sock *sk, u32 acked, -+ const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ static const char ca_states[] = { -+ [TCP_CA_Open] = 'O', -+ [TCP_CA_Disorder] = 'D', -+ [TCP_CA_CWR] = 'C', -+ [TCP_CA_Recovery] = 'R', -+ [TCP_CA_Loss] = 'L', -+ }; -+ static const char mode[] = { -+ 'G', /* Growing - BBR_STARTUP */ -+ 'D', /* Drain - BBR_DRAIN */ -+ 'W', /* Window - BBR_PROBE_BW */ -+ 'M', /* Min RTT - BBR_PROBE_RTT */ -+ }; -+ static const char ack_phase[] = { /* bbr_ack_phase strings */ -+ 'I', /* BBR_ACKS_INIT - 'Init' */ -+ 'R', /* BBR_ACKS_REFILLING - 'Refilling' */ -+ 'B', /* BBR_ACKS_PROBE_STARTING - 'Before' */ -+ 'F', /* BBR_ACKS_PROBE_FEEDBACK - 'Feedback' */ -+ 'A', /* BBR_ACKS_PROBE_STOPPING - 'After' */ -+ }; -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ const u32 una = tp->snd_una - bbr->debug.snd_isn; -+ const u32 fack = tcp_highest_sack_seq(tp); -+ const u16 dport = ntohs(inet_sk(sk)->inet_dport); -+ bool is_port_match = (bbr_debug_port_mask && -+ ((dport & bbr_debug_port_mask) == 0)); -+ char debugmsg[320]; -+ -+ if (sk->sk_state == TCP_SYN_SENT) -+ return; /* no bbr_init() yet if SYN retransmit -> CA_Loss */ -+ -+ if (!tp->snd_cwnd || tp->snd_cwnd > bbr_cwnd_warn_val) { -+ char addr[INET6_ADDRSTRLEN + 10] = { 0 }; -+ -+ if (sk->sk_family == AF_INET) -+ snprintf(addr, sizeof(addr), "%pI4:%u", -+ &inet_sk(sk)->inet_daddr, dport); -+ else if (sk->sk_family == AF_INET6) -+ snprintf(addr, sizeof(addr), "%pI6:%u", -+ &sk->sk_v6_daddr, dport); -+ -+ WARN_ONCE(1, -+ "BBR %s cwnd alert: %u " -+ "snd_una: %u ca: %d pacing_gain: %u cwnd_gain: %u " -+ "bw: %u rtt: %u min_rtt: %u " -+ "acked: %u tso_segs: %u " -+ "bw: %d %ld %d pif: %u\n", -+ addr, tp->snd_cwnd, -+ una, inet_csk(sk)->icsk_ca_state, -+ bbr->pacing_gain, bbr->cwnd_gain, -+ bbr_max_bw(sk), (tp->srtt_us >> 3), bbr->min_rtt_us, -+ acked, bbr_tso_segs_goal(sk), -+ rs->delivered, rs->interval_us, rs->is_retrans, -+ tcp_packets_in_flight(tp)); -+ } -+ -+ if (likely(!bbr_debug_with_printk && !bbr_debug_ftrace)) -+ return; -+ -+ if (!sock_flag(sk, SOCK_DBG) && !is_port_match) -+ return; -+ -+ if (!ctx->log && !tp->app_limited && !(bbr_flags & FLAG_DEBUG_VERBOSE)) -+ return; -+ -+ if (ipv4_is_loopback(inet_sk(sk)->inet_daddr) && -+ !(bbr_flags & FLAG_DEBUG_LOOPBACK)) -+ return; -+ -+ snprintf(debugmsg, sizeof(debugmsg) - 1, -+ "BBR %pI4:%-5u %5u,%03u:%-7u %c " -+ "%c %2u br %2u cr %2d rtt %5ld d %2d i %5ld mrtt %d %cbw %llu " -+ "bw %llu lb %llu ib %llu qb %llu " -+ "a %u if %2u %c %c dl %u l %u al %u # %u t %u %c %c " -+ "lr %d er %d ea %d bwl %lld il %d ih %d c %d " -+ "v %d %c %u %c %s\n", -+ &inet_sk(sk)->inet_daddr, dport, -+ una / 1000, una % 1000, fack - tp->snd_una, -+ ca_states[inet_csk(sk)->icsk_ca_state], -+ bbr->debug.undo ? '@' : mode[bbr->mode], -+ tp->snd_cwnd, -+ bbr_extra_acked(sk), /* br (legacy): extra_acked */ -+ rs->tx_in_flight, /* cr (legacy): tx_inflight */ -+ rs->rtt_us, -+ rs->delivered, -+ rs->interval_us, -+ bbr->min_rtt_us, -+ rs->is_app_limited ? '_' : 'l', -+ bbr_rate_kbps(sk, ctx->sample_bw), /* lbw: latest sample bw */ -+ bbr_rate_kbps(sk, bbr_max_bw(sk)), /* bw: max bw */ -+ 0ULL, /* lb: [obsolete] */ -+ 0ULL, /* ib: [obsolete] */ -+ div_u64((u64)sk->sk_pacing_rate * 8, 1000), -+ acked, -+ tcp_packets_in_flight(tp), -+ rs->is_ack_delayed ? 'd' : '.', -+ bbr->round_start ? '*' : '.', -+ tp->delivered, tp->lost, -+ tp->app_limited, -+ 0, /* #: [obsolete] */ -+ ctx->target_cwnd, -+ tp->reord_seen ? 'r' : '.', /* r: reordering seen? */ -+ ca_states[bbr->prev_ca_state], -+ (rs->lost + rs->delivered) > 0 ? -+ (1000 * rs->lost / -+ (rs->lost + rs->delivered)) : 0, /* lr: loss rate x1000 */ -+ (rs->delivered) > 0 ? -+ (1000 * rs->delivered_ce / -+ (rs->delivered)) : 0, /* er: ECN rate x1000 */ -+ 1000 * bbr->ecn_alpha >> BBR_SCALE, /* ea: ECN alpha x1000 */ -+ bbr->bw_lo == ~0U ? -+ -1 : (s64)bbr_rate_kbps(sk, bbr->bw_lo), /* bwl */ -+ bbr->inflight_lo, /* il */ -+ bbr->inflight_hi, /* ih */ -+ bbr->bw_probe_up_cnt, /* c */ -+ 2, /* v: version */ -+ bbr->debug.event, -+ bbr->cycle_idx, -+ ack_phase[bbr->ack_phase], -+ bbr->bw_probe_samples ? "Y" : "N"); -+ debugmsg[sizeof(debugmsg) - 1] = 0; -+ -+ /* printk takes a higher precedence. */ -+ if (bbr_debug_with_printk) -+ printk(KERN_DEBUG "%s", debugmsg); -+ -+ if (unlikely(bbr->debug.undo)) -+ bbr->debug.undo = 0; -+} -+ -+/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ -+static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) -+{ -+ u64 rate = bw; -+ -+ rate = bbr_rate_bytes_per_sec(sk, rate, gain, -+ bbr_pacing_margin_percent); -+ rate = min_t(u64, rate, sk->sk_max_pacing_rate); -+ return rate; -+} -+ -+/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ -+static void bbr_init_pacing_rate_from_rtt(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u64 bw; -+ u32 rtt_us; -+ -+ if (tp->srtt_us) { /* any RTT sample yet? */ -+ rtt_us = max(tp->srtt_us >> 3, 1U); -+ bbr->has_seen_rtt = 1; -+ } else { /* no RTT sample yet */ -+ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ -+ } -+ bw = (u64)tp->snd_cwnd * BW_UNIT; -+ do_div(bw, rtt_us); -+ sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr->params.high_gain); -+} -+ -+/* Pace using current bw estimate and a gain factor. */ -+static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ unsigned long rate = bbr_bw_to_pacing_rate(sk, bw, gain); -+ -+ if (unlikely(!bbr->has_seen_rtt && tp->srtt_us)) -+ bbr_init_pacing_rate_from_rtt(sk); -+ if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) -+ sk->sk_pacing_rate = rate; -+} -+ -+static u32 bbr_min_tso_segs(struct sock *sk) -+{ -+ return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; -+} -+ -+/* Return the number of segments BBR would like in a TSO/GSO skb, given -+ * a particular max gso size as a constraint. -+ */ -+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, -+ u32 gso_max_size) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 segs, r; -+ u64 bytes; -+ -+ /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ -+ bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift; -+ -+ /* Budget a TSO/GSO burst size allowance based on min_rtt. For every -+ * K = 2^tso_rtt_shift microseconds of min_rtt, halve the burst. -+ * The min_rtt-based burst allowance is: 64 KBytes / 2^(min_rtt/K) -+ */ -+ if (bbr->params.tso_rtt_shift) { -+ r = bbr->min_rtt_us >> bbr->params.tso_rtt_shift; -+ if (r < BITS_PER_TYPE(u32)) /* prevent undefined behavior */ -+ bytes += GSO_MAX_SIZE >> r; -+ } -+ -+ bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); -+ segs = max_t(u32, div_u64(bytes, mss_now), bbr_min_tso_segs(sk)); -+ return segs; -+} -+ -+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ -+static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) -+{ -+ return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); -+} -+ -+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */ -+static u32 bbr_tso_segs_goal(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE); -+} -+ -+/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ -+static void bbr_save_cwnd(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT) -+ bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */ -+ else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */ -+ bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd); -+} -+ -+static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (event == CA_EVENT_TX_START && tp->app_limited) { -+ bbr->idle_restart = 1; -+ bbr->ack_epoch_mstamp = tp->tcp_mstamp; -+ bbr->ack_epoch_acked = 0; -+ /* Avoid pointless buffer overflows: pace at est. bw if we don't -+ * need more speed (we're restarting from idle and app-limited). -+ */ -+ if (bbr->mode == BBR_PROBE_BW) -+ bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); -+ else if (bbr->mode == BBR_PROBE_RTT) -+ bbr_check_probe_rtt_done(sk); -+ } else if ((event == CA_EVENT_ECN_IS_CE || -+ event == CA_EVENT_ECN_NO_CE) && -+ bbr_ecn_enable && -+ bbr->params.precise_ece_ack) { -+ u32 state = bbr->ce_state; -+ dctcp_ece_ack_update(sk, event, &bbr->prior_rcv_nxt, &state); -+ bbr->ce_state = state; -+ if (tp->fast_ack_mode == 2 && event == CA_EVENT_ECN_IS_CE) -+ tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); -+ } -+} -+ -+/* Calculate bdp based on min RTT and the estimated bottleneck bandwidth: -+ * -+ * bdp = ceil(bw * min_rtt * gain) -+ * -+ * The key factor, gain, controls the amount of queue. While a small gain -+ * builds a smaller queue, it becomes more vulnerable to noise in RTT -+ * measurements (e.g., delayed ACKs or other ACK compression effects). This -+ * noise may cause BBR to under-estimate the rate. -+ */ -+static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 bdp; -+ u64 w; -+ -+ /* If we've never had a valid RTT sample, cap cwnd at the initial -+ * default. This should only happen when the connection is not using TCP -+ * timestamps and has retransmitted all of the SYN/SYNACK/data packets -+ * ACKed so far. In this case, an RTO can cut cwnd to 1, in which -+ * case we need to slow-start up toward something safe: initial cwnd. -+ */ -+ if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */ -+ return bbr->init_cwnd; /* be safe: cap at initial cwnd */ -+ -+ w = (u64)bw * bbr->min_rtt_us; -+ -+ /* Apply a gain to the given value, remove the BW_SCALE shift, and -+ * round the value up to avoid a negative feedback loop. -+ */ -+ bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; -+ -+ return bdp; -+} -+ -+/* To achieve full performance in high-speed paths, we budget enough cwnd to -+ * fit full-sized skbs in-flight on both end hosts to fully utilize the path: -+ * - one skb in sending host Qdisc, -+ * - one skb in sending host TSO/GSO engine -+ * - one skb being received by receiver host LRO/GRO/delayed-ACK engine -+ * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because -+ * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, -+ * which allows 2 outstanding 2-packet sequences, to try to keep pipe -+ * full even with ACK-every-other-packet delayed ACKs. -+ */ -+static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 tso_segs_goal; -+ -+ tso_segs_goal = 3 * bbr_tso_segs_goal(sk); -+ -+ /* Allow enough full-sized skbs in flight to utilize end systems. */ -+ if (bbr->params.cwnd_tso_budget == 1) { -+ cwnd = max_t(u32, cwnd, tso_segs_goal); -+ cwnd = max_t(u32, cwnd, bbr->params.cwnd_min_target); -+ } else { -+ cwnd += tso_segs_goal; -+ cwnd = (cwnd + 1) & ~1U; -+ } -+ /* Ensure gain cycling gets inflight above BDP even for small BDPs. */ -+ if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) -+ cwnd += 2; -+ -+ return cwnd; -+} -+ -+/* Find inflight based on min RTT and the estimated bottleneck bandwidth. */ -+static u32 bbr_inflight(struct sock *sk, u32 bw, int gain) -+{ -+ u32 inflight; -+ -+ inflight = bbr_bdp(sk, bw, gain); -+ inflight = bbr_quantization_budget(sk, inflight); -+ -+ return inflight; -+} -+ -+/* With pacing at lower layers, there's often less data "in the network" than -+ * "in flight". With TSQ and departure time pacing at lower layers (e.g. fq), -+ * we often have several skbs queued in the pacing layer with a pre-scheduled -+ * earliest departure time (EDT). BBR adapts its pacing rate based on the -+ * inflight level that it estimates has already been "baked in" by previous -+ * departure time decisions. We calculate a rough estimate of the number of our -+ * packets that might be in the network at the earliest departure time for the -+ * next skb scheduled: -+ * in_network_at_edt = inflight_at_edt - (EDT - now) * bw -+ * If we're increasing inflight, then we want to know if the transmit of the -+ * EDT skb will push inflight above the target, so inflight_at_edt includes -+ * bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight, -+ * then estimate if inflight will sink too low just before the EDT transmit. -+ */ -+static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u64 now_ns, edt_ns, interval_us; -+ u32 interval_delivered, inflight_at_edt; -+ -+ now_ns = tp->tcp_clock_cache; -+ edt_ns = max(tp->tcp_wstamp_ns, now_ns); -+ interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC); -+ interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE; -+ inflight_at_edt = inflight_now; -+ if (bbr->pacing_gain > BBR_UNIT) /* increasing inflight */ -+ inflight_at_edt += bbr_tso_segs_goal(sk); /* include EDT skb */ -+ if (interval_delivered >= inflight_at_edt) -+ return 0; -+ return inflight_at_edt - interval_delivered; -+} -+ -+/* Find the cwnd increment based on estimate of ack aggregation */ -+static u32 bbr_ack_aggregation_cwnd(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 max_aggr_cwnd, aggr_cwnd = 0; -+ -+ if (bbr->params.extra_acked_gain && -+ (bbr_full_bw_reached(sk) || bbr->params.extra_acked_in_startup)) { -+ max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us) -+ / BW_UNIT; -+ aggr_cwnd = (bbr->params.extra_acked_gain * bbr_extra_acked(sk)) -+ >> BBR_SCALE; -+ aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd); -+ } -+ -+ return aggr_cwnd; -+} -+ -+/* Returns the cwnd for PROBE_RTT mode. */ -+static u32 bbr_probe_rtt_cwnd(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr->params.probe_rtt_cwnd_gain == 0) -+ return bbr->params.cwnd_min_target; -+ return max_t(u32, bbr->params.cwnd_min_target, -+ bbr_bdp(sk, bbr_bw(sk), bbr->params.probe_rtt_cwnd_gain)); -+} -+ -+/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss -+ * has drawn us down below target), or snap down to target if we're above it. -+ */ -+static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, -+ u32 acked, u32 bw, int gain, u32 cwnd, -+ struct bbr_context *ctx) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 target_cwnd = 0, prev_cwnd = tp->snd_cwnd, max_probe; -+ -+ if (!acked) -+ goto done; /* no packet fully ACKed; just apply caps */ -+ -+ target_cwnd = bbr_bdp(sk, bw, gain); -+ -+ /* Increment the cwnd to account for excess ACKed data that seems -+ * due to aggregation (of data and/or ACKs) visible in the ACK stream. -+ */ -+ target_cwnd += bbr_ack_aggregation_cwnd(sk); -+ target_cwnd = bbr_quantization_budget(sk, target_cwnd); -+ -+ /* If we're below target cwnd, slow start cwnd toward target cwnd. */ -+ bbr->debug.target_cwnd = target_cwnd; -+ -+ /* Update cwnd and enable fast path if cwnd reaches target_cwnd. */ -+ bbr->try_fast_path = 0; -+ if (bbr_full_bw_reached(sk)) { /* only cut cwnd if we filled the pipe */ -+ cwnd += acked; -+ if (cwnd >= target_cwnd) { -+ cwnd = target_cwnd; -+ bbr->try_fast_path = 1; -+ } -+ } else if (cwnd < target_cwnd || cwnd < 2 * bbr->init_cwnd) { -+ cwnd += acked; -+ } else { -+ bbr->try_fast_path = 1; -+ } -+ -+ /* When growing cwnd, don't grow beyond twice what we just probed. */ -+ if (bbr->params.usage_based_cwnd) { -+ max_probe = max(2 * tp->max_packets_out, tp->snd_cwnd); -+ cwnd = min(cwnd, max_probe); -+ } -+ -+ cwnd = max_t(u32, cwnd, bbr->params.cwnd_min_target); -+done: -+ tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */ -+ if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ -+ tp->snd_cwnd = min_t(u32, tp->snd_cwnd, bbr_probe_rtt_cwnd(sk)); -+ -+ ctx->target_cwnd = target_cwnd; -+ ctx->log = (tp->snd_cwnd != prev_cwnd); -+} -+ -+/* See if we have reached next round trip */ -+static void bbr_update_round_start(struct sock *sk, -+ const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->round_start = 0; -+ -+ /* See if we've reached the next RTT */ -+ if (rs->interval_us > 0 && -+ !before(rs->prior_delivered, bbr->next_rtt_delivered)) { -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr->round_start = 1; -+ } -+} -+ -+/* Calculate the bandwidth based on how fast packets are delivered */ -+static void bbr_calculate_bw_sample(struct sock *sk, -+ const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u64 bw = 0; -+ -+ /* Divide delivered by the interval to find a (lower bound) bottleneck -+ * bandwidth sample. Delivered is in packets and interval_us in uS and -+ * ratio will be <<1 for most connections. So delivered is first scaled. -+ * Round up to allow growth at low rates, even with integer division. -+ */ -+ if (rs->interval_us > 0) { -+ if (WARN_ONCE(rs->delivered < 0, -+ "negative delivered: %d interval_us: %ld\n", -+ rs->delivered, rs->interval_us)) -+ return; -+ -+ bw = DIV_ROUND_UP_ULL((u64)rs->delivered * BW_UNIT, rs->interval_us); -+ } -+ -+ ctx->sample_bw = bw; -+ bbr->debug.rs_bw = bw; -+} -+ -+/* Estimates the windowed max degree of ack aggregation. -+ * This is used to provision extra in-flight data to keep sending during -+ * inter-ACK silences. -+ * -+ * Degree of ack aggregation is estimated as extra data acked beyond expected. -+ * -+ * max_extra_acked = "maximum recent excess data ACKed beyond max_bw * interval" -+ * cwnd += max_extra_acked -+ * -+ * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms). -+ * Max filter is an approximate sliding window of 5-10 (packet timed) round -+ * trips for non-startup phase, and 1-2 round trips for startup. -+ */ -+static void bbr_update_ack_aggregation(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ u32 epoch_us, expected_acked, extra_acked; -+ struct bbr *bbr = inet_csk_ca(sk); -+ struct tcp_sock *tp = tcp_sk(sk); -+ u32 extra_acked_win_rtts_thresh = bbr->params.extra_acked_win_rtts; -+ -+ if (!bbr->params.extra_acked_gain || rs->acked_sacked <= 0 || -+ rs->delivered < 0 || rs->interval_us <= 0) -+ return; -+ -+ if (bbr->round_start) { -+ bbr->extra_acked_win_rtts = min(0x1F, -+ bbr->extra_acked_win_rtts + 1); -+ if (bbr->params.extra_acked_in_startup && -+ !bbr_full_bw_reached(sk)) -+ extra_acked_win_rtts_thresh = 1; -+ if (bbr->extra_acked_win_rtts >= -+ extra_acked_win_rtts_thresh) { -+ bbr->extra_acked_win_rtts = 0; -+ bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ? -+ 0 : 1; -+ bbr->extra_acked[bbr->extra_acked_win_idx] = 0; -+ } -+ } -+ -+ /* Compute how many packets we expected to be delivered over epoch. */ -+ epoch_us = tcp_stamp_us_delta(tp->delivered_mstamp, -+ bbr->ack_epoch_mstamp); -+ expected_acked = ((u64)bbr_bw(sk) * epoch_us) / BW_UNIT; -+ -+ /* Reset the aggregation epoch if ACK rate is below expected rate or -+ * significantly large no. of ack received since epoch (potentially -+ * quite old epoch). -+ */ -+ if (bbr->ack_epoch_acked <= expected_acked || -+ (bbr->ack_epoch_acked + rs->acked_sacked >= -+ bbr_ack_epoch_acked_reset_thresh)) { -+ bbr->ack_epoch_acked = 0; -+ bbr->ack_epoch_mstamp = tp->delivered_mstamp; -+ expected_acked = 0; -+ } -+ -+ /* Compute excess data delivered, beyond what was expected. */ -+ bbr->ack_epoch_acked = min_t(u32, 0xFFFFF, -+ bbr->ack_epoch_acked + rs->acked_sacked); -+ extra_acked = bbr->ack_epoch_acked - expected_acked; -+ extra_acked = min(extra_acked, tp->snd_cwnd); -+ if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx]) -+ bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; -+} -+ -+/* Estimate when the pipe is full, using the change in delivery rate: BBR -+ * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by -+ * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited -+ * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the -+ * higher rwin, 3: we get higher delivery rate samples. Or transient -+ * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar -+ * design goal, but uses delay and inter-ACK spacing instead of bandwidth. -+ */ -+static void bbr_check_full_bw_reached(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 bw_thresh; -+ -+ if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited) -+ return; -+ -+ bw_thresh = (u64)bbr->full_bw * bbr->params.full_bw_thresh >> BBR_SCALE; -+ if (bbr_max_bw(sk) >= bw_thresh) { -+ bbr->full_bw = bbr_max_bw(sk); -+ bbr->full_bw_cnt = 0; -+ return; -+ } -+ ++bbr->full_bw_cnt; -+ bbr->full_bw_reached = bbr->full_bw_cnt >= bbr->params.full_bw_cnt; -+} -+ -+/* If pipe is probably full, drain the queue and then enter steady-state. */ -+static bool bbr_check_drain(struct sock *sk, const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { -+ bbr->mode = BBR_DRAIN; /* drain queue we created */ -+ tcp_sk(sk)->snd_ssthresh = -+ bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); -+ bbr2_reset_congestion_signals(sk); -+ } /* fall through to check if in-flight is already small: */ -+ if (bbr->mode == BBR_DRAIN && -+ bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= -+ bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) -+ return true; /* exiting DRAIN now */ -+ return false; -+} -+ -+static void bbr_check_probe_rtt_done(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (!(bbr->probe_rtt_done_stamp && -+ after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) -+ return; -+ -+ bbr->probe_rtt_min_stamp = tcp_jiffies32; /* schedule next PROBE_RTT */ -+ tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd); -+ bbr2_exit_probe_rtt(sk); -+} -+ -+/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and -+ * periodically drain the bottleneck queue, to converge to measure the true -+ * min_rtt (unloaded propagation delay). This allows the flows to keep queues -+ * small (reducing queuing delay and packet loss) and achieve fairness among -+ * BBR flows. -+ * -+ * The min_rtt filter window is 10 seconds. When the min_rtt estimate expires, -+ * we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets. -+ * After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed -+ * round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and -+ * re-enter the previous mode. BBR uses 200ms to approximately bound the -+ * performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s). -+ * -+ * Note that flows need only pay 2% if they are busy sending over the last 10 -+ * seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have -+ * natural silences or low-rate periods within 10 seconds where the rate is low -+ * enough for long enough to drain its queue in the bottleneck. We pick up -+ * these min RTT measurements opportunistically with our min_rtt filter. :-) -+ */ -+static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ bool probe_rtt_expired, min_rtt_expired; -+ u32 expire; -+ -+ /* Track min RTT in probe_rtt_win_ms to time next PROBE_RTT state. */ -+ expire = bbr->probe_rtt_min_stamp + -+ msecs_to_jiffies(bbr->params.probe_rtt_win_ms); -+ probe_rtt_expired = after(tcp_jiffies32, expire); -+ if (rs->rtt_us >= 0 && -+ (rs->rtt_us <= bbr->probe_rtt_min_us || -+ (probe_rtt_expired && !rs->is_ack_delayed))) { -+ bbr->probe_rtt_min_us = rs->rtt_us; -+ bbr->probe_rtt_min_stamp = tcp_jiffies32; -+ } -+ /* Track min RTT seen in the min_rtt_win_sec filter window: */ -+ expire = bbr->min_rtt_stamp + bbr->params.min_rtt_win_sec * HZ; -+ min_rtt_expired = after(tcp_jiffies32, expire); -+ if (bbr->probe_rtt_min_us <= bbr->min_rtt_us || -+ min_rtt_expired) { -+ bbr->min_rtt_us = bbr->probe_rtt_min_us; -+ bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp; -+ } -+ -+ if (bbr->params.probe_rtt_mode_ms > 0 && probe_rtt_expired && -+ !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { -+ bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ -+ bbr_save_cwnd(sk); /* note cwnd so we can restore it */ -+ bbr->probe_rtt_done_stamp = 0; -+ bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; -+ bbr->next_rtt_delivered = tp->delivered; -+ } -+ -+ if (bbr->mode == BBR_PROBE_RTT) { -+ /* Ignore low rate samples during this mode. */ -+ tp->app_limited = -+ (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; -+ /* Maintain min packets in flight for max(200 ms, 1 round). */ -+ if (!bbr->probe_rtt_done_stamp && -+ tcp_packets_in_flight(tp) <= bbr_probe_rtt_cwnd(sk)) { -+ bbr->probe_rtt_done_stamp = tcp_jiffies32 + -+ msecs_to_jiffies(bbr->params.probe_rtt_mode_ms); -+ bbr->probe_rtt_round_done = 0; -+ bbr->next_rtt_delivered = tp->delivered; -+ } else if (bbr->probe_rtt_done_stamp) { -+ if (bbr->round_start) -+ bbr->probe_rtt_round_done = 1; -+ if (bbr->probe_rtt_round_done) -+ bbr_check_probe_rtt_done(sk); -+ } -+ } -+ /* Restart after idle ends only once we process a new S/ACK for data */ -+ if (rs->delivered > 0) -+ bbr->idle_restart = 0; -+} -+ -+static void bbr_update_gains(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ switch (bbr->mode) { -+ case BBR_STARTUP: -+ bbr->pacing_gain = bbr->params.high_gain; -+ bbr->cwnd_gain = bbr->params.startup_cwnd_gain; -+ break; -+ case BBR_DRAIN: -+ bbr->pacing_gain = bbr->params.drain_gain; /* slow, to drain */ -+ bbr->cwnd_gain = bbr->params.startup_cwnd_gain; /* keep cwnd */ -+ break; -+ case BBR_PROBE_BW: -+ bbr->pacing_gain = bbr->params.pacing_gain[bbr->cycle_idx]; -+ bbr->cwnd_gain = bbr->params.cwnd_gain; -+ break; -+ case BBR_PROBE_RTT: -+ bbr->pacing_gain = BBR_UNIT; -+ bbr->cwnd_gain = BBR_UNIT; -+ break; -+ default: -+ WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode); -+ break; -+ } -+} -+ -+static void bbr_init(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ int i; -+ -+ WARN_ON_ONCE(tp->snd_cwnd >= bbr_cwnd_warn_val); -+ -+ bbr->initialized = 1; -+ bbr->params.high_gain = min(0x7FF, bbr_high_gain); -+ bbr->params.drain_gain = min(0x3FF, bbr_drain_gain); -+ bbr->params.startup_cwnd_gain = min(0x7FF, bbr_startup_cwnd_gain); -+ bbr->params.cwnd_gain = min(0x7FF, bbr_cwnd_gain); -+ bbr->params.cwnd_tso_budget = min(0x1U, bbr_cwnd_tso_budget); -+ bbr->params.cwnd_min_target = min(0xFU, bbr_cwnd_min_target); -+ bbr->params.min_rtt_win_sec = min(0x1FU, bbr_min_rtt_win_sec); -+ bbr->params.probe_rtt_mode_ms = min(0x1FFU, bbr_probe_rtt_mode_ms); -+ bbr->params.full_bw_cnt = min(0x7U, bbr_full_bw_cnt); -+ bbr->params.full_bw_thresh = min(0x3FFU, bbr_full_bw_thresh); -+ bbr->params.extra_acked_gain = min(0x7FF, bbr_extra_acked_gain); -+ bbr->params.extra_acked_win_rtts = min(0x1FU, bbr_extra_acked_win_rtts); -+ bbr->params.drain_to_target = bbr_drain_to_target ? 1 : 0; -+ bbr->params.precise_ece_ack = bbr_precise_ece_ack ? 1 : 0; -+ bbr->params.extra_acked_in_startup = bbr_extra_acked_in_startup ? 1 : 0; -+ bbr->params.probe_rtt_cwnd_gain = min(0xFFU, bbr_probe_rtt_cwnd_gain); -+ bbr->params.probe_rtt_win_ms = -+ min(0x3FFFU, -+ min_t(u32, bbr_probe_rtt_win_ms, -+ bbr->params.min_rtt_win_sec * MSEC_PER_SEC)); -+ for (i = 0; i < CYCLE_LEN; i++) -+ bbr->params.pacing_gain[i] = min(0x3FF, bbr_pacing_gain[i]); -+ bbr->params.usage_based_cwnd = bbr_usage_based_cwnd ? 1 : 0; -+ bbr->params.tso_rtt_shift = min(0xFU, bbr_tso_rtt_shift); -+ -+ bbr->debug.snd_isn = tp->snd_una; -+ bbr->debug.target_cwnd = 0; -+ bbr->debug.undo = 0; -+ -+ bbr->init_cwnd = min(0x7FU, tp->snd_cwnd); -+ bbr->prior_cwnd = tp->prior_cwnd; -+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; -+ bbr->next_rtt_delivered = 0; -+ bbr->prev_ca_state = TCP_CA_Open; -+ bbr->packet_conservation = 0; -+ -+ bbr->probe_rtt_done_stamp = 0; -+ bbr->probe_rtt_round_done = 0; -+ bbr->probe_rtt_min_us = tcp_min_rtt(tp); -+ bbr->probe_rtt_min_stamp = tcp_jiffies32; -+ bbr->min_rtt_us = tcp_min_rtt(tp); -+ bbr->min_rtt_stamp = tcp_jiffies32; -+ -+ bbr->has_seen_rtt = 0; -+ bbr_init_pacing_rate_from_rtt(sk); -+ -+ bbr->round_start = 0; -+ bbr->idle_restart = 0; -+ bbr->full_bw_reached = 0; -+ bbr->full_bw = 0; -+ bbr->full_bw_cnt = 0; -+ bbr->cycle_mstamp = 0; -+ bbr->cycle_idx = 0; -+ bbr->mode = BBR_STARTUP; -+ bbr->debug.rs_bw = 0; -+ -+ bbr->ack_epoch_mstamp = tp->tcp_mstamp; -+ bbr->ack_epoch_acked = 0; -+ bbr->extra_acked_win_rtts = 0; -+ bbr->extra_acked_win_idx = 0; -+ bbr->extra_acked[0] = 0; -+ bbr->extra_acked[1] = 0; -+ -+ bbr->ce_state = 0; -+ bbr->prior_rcv_nxt = tp->rcv_nxt; -+ bbr->try_fast_path = 0; -+ -+ cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); -+} -+ -+static u32 bbr_sndbuf_expand(struct sock *sk) -+{ -+ /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ -+ return 3; -+} -+ -+/* __________________________________________________________________________ -+ * -+ * Functions new to BBR v2 ("bbr") congestion control are below here. -+ * __________________________________________________________________________ -+ */ -+ -+/* Incorporate a new bw sample into the current window of our max filter. */ -+static void bbr2_take_bw_hi_sample(struct sock *sk, u32 bw) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->bw_hi[1] = max(bw, bbr->bw_hi[1]); -+} -+ -+/* Keep max of last 1-2 cycles. Each PROBE_BW cycle, flip filter window. */ -+static void bbr2_advance_bw_hi_filter(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (!bbr->bw_hi[1]) -+ return; /* no samples in this window; remember old window */ -+ bbr->bw_hi[0] = bbr->bw_hi[1]; -+ bbr->bw_hi[1] = 0; -+} -+ -+/* How much do we want in flight? Our BDP, unless congestion cut cwnd. */ -+static u32 bbr2_target_inflight(struct sock *sk) -+{ -+ u32 bdp = bbr_inflight(sk, bbr_bw(sk), BBR_UNIT); -+ -+ return min(bdp, tcp_sk(sk)->snd_cwnd); -+} -+ -+static bool bbr2_is_probing_bandwidth(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ return (bbr->mode == BBR_STARTUP) || -+ (bbr->mode == BBR_PROBE_BW && -+ (bbr->cycle_idx == BBR_BW_PROBE_REFILL || -+ bbr->cycle_idx == BBR_BW_PROBE_UP)); -+} -+ -+/* Has the given amount of time elapsed since we marked the phase start? */ -+static bool bbr2_has_elapsed_in_phase(const struct sock *sk, u32 interval_us) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ const struct bbr *bbr = inet_csk_ca(sk); -+ -+ return tcp_stamp_us_delta(tp->tcp_mstamp, -+ bbr->cycle_mstamp + interval_us) > 0; -+} -+ -+static void bbr2_handle_queue_too_high_in_startup(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->full_bw_reached = 1; -+ bbr->inflight_hi = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); -+} -+ -+/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */ -+static void bbr2_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible || -+ !bbr->params.full_ecn_cnt || !bbr->params.ecn_thresh) -+ return; -+ -+ if (ce_ratio >= bbr->params.ecn_thresh) -+ bbr->startup_ecn_rounds++; -+ else -+ bbr->startup_ecn_rounds = 0; -+ -+ if (bbr->startup_ecn_rounds >= bbr->params.full_ecn_cnt) { -+ bbr->debug.event = 'E'; /* ECN caused STARTUP exit */ -+ bbr2_handle_queue_too_high_in_startup(sk); -+ return; -+ } -+} -+ -+static void bbr2_update_ecn_alpha(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ s32 delivered, delivered_ce; -+ u64 alpha, ce_ratio; -+ u32 gain; -+ -+ if (bbr->params.ecn_factor == 0) -+ return; -+ -+ delivered = tp->delivered - bbr->alpha_last_delivered; -+ delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce; -+ -+ if (delivered == 0 || /* avoid divide by zero */ -+ WARN_ON_ONCE(delivered < 0 || delivered_ce < 0)) /* backwards? */ -+ return; -+ -+ /* See if we should use ECN sender logic for this connection. */ -+ if (!bbr->ecn_eligible && bbr_ecn_enable && -+ (bbr->min_rtt_us <= bbr->params.ecn_max_rtt_us || -+ !bbr->params.ecn_max_rtt_us)) -+ bbr->ecn_eligible = 1; -+ -+ ce_ratio = (u64)delivered_ce << BBR_SCALE; -+ do_div(ce_ratio, delivered); -+ gain = bbr->params.ecn_alpha_gain; -+ alpha = ((BBR_UNIT - gain) * bbr->ecn_alpha) >> BBR_SCALE; -+ alpha += (gain * ce_ratio) >> BBR_SCALE; -+ bbr->ecn_alpha = min_t(u32, alpha, BBR_UNIT); -+ -+ bbr->alpha_last_delivered = tp->delivered; -+ bbr->alpha_last_delivered_ce = tp->delivered_ce; -+ -+ bbr2_check_ecn_too_high_in_startup(sk, ce_ratio); -+} -+ -+/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */ -+static void bbr2_raise_inflight_hi_slope(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 growth_this_round, cnt; -+ -+ /* Calculate "slope": packets S/Acked per inflight_hi increment. */ -+ growth_this_round = 1 << bbr->bw_probe_up_rounds; -+ bbr->bw_probe_up_rounds = min(bbr->bw_probe_up_rounds + 1, 30); -+ cnt = tp->snd_cwnd / growth_this_round; -+ cnt = max(cnt, 1U); -+ bbr->bw_probe_up_cnt = cnt; -+ bbr->debug.event = 'G'; /* Grow inflight_hi slope */ -+} -+ -+/* In BBR_BW_PROBE_UP, not seeing high loss/ECN/queue, so raise inflight_hi. */ -+static void bbr2_probe_inflight_hi_upward(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 delta; -+ -+ if (!tp->is_cwnd_limited || tp->snd_cwnd < bbr->inflight_hi) { -+ bbr->bw_probe_up_acks = 0; /* don't accmulate unused credits */ -+ return; /* not fully using inflight_hi, so don't grow it */ -+ } -+ -+ /* For each bw_probe_up_cnt packets ACKed, increase inflight_hi by 1. */ -+ bbr->bw_probe_up_acks += rs->acked_sacked; -+ if (bbr->bw_probe_up_acks >= bbr->bw_probe_up_cnt) { -+ delta = bbr->bw_probe_up_acks / bbr->bw_probe_up_cnt; -+ bbr->bw_probe_up_acks -= delta * bbr->bw_probe_up_cnt; -+ bbr->inflight_hi += delta; -+ bbr->debug.event = 'I'; /* Increment inflight_hi */ -+ } -+ -+ if (bbr->round_start) -+ bbr2_raise_inflight_hi_slope(sk); -+} -+ -+/* Does loss/ECN rate for this sample say inflight is "too high"? -+ * This is used by both the bbr_check_loss_too_high_in_startup() function, -+ * which can be used in either v1 or v2, and the PROBE_UP phase of v2, which -+ * uses it to notice when loss/ECN rates suggest inflight is too high. -+ */ -+static bool bbr2_is_inflight_too_high(const struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ const struct bbr *bbr = inet_csk_ca(sk); -+ u32 loss_thresh, ecn_thresh; -+ -+ if (rs->lost > 0 && rs->tx_in_flight) { -+ loss_thresh = (u64)rs->tx_in_flight * bbr->params.loss_thresh >> -+ BBR_SCALE; -+ if (rs->lost > loss_thresh) -+ return true; -+ } -+ -+ if (rs->delivered_ce > 0 && rs->delivered > 0 && -+ bbr->ecn_eligible && bbr->params.ecn_thresh) { -+ ecn_thresh = (u64)rs->delivered * bbr->params.ecn_thresh >> -+ BBR_SCALE; -+ if (rs->delivered_ce >= ecn_thresh) -+ return true; -+ } -+ -+ return false; -+} -+ -+/* Calculate the tx_in_flight level that corresponded to excessive loss. -+ * We find "lost_prefix" segs of the skb where loss rate went too high, -+ * by solving for "lost_prefix" in the following equation: -+ * lost / inflight >= loss_thresh -+ * (lost_prev + lost_prefix) / (inflight_prev + lost_prefix) >= loss_thresh -+ * Then we take that equation, convert it to fixed point, and -+ * round up to the nearest packet. -+ */ -+static u32 bbr2_inflight_hi_from_lost_skb(const struct sock *sk, -+ const struct rate_sample *rs, -+ const struct sk_buff *skb) -+{ -+ const struct bbr *bbr = inet_csk_ca(sk); -+ u32 loss_thresh = bbr->params.loss_thresh; -+ u32 pcount, divisor, inflight_hi; -+ s32 inflight_prev, lost_prev; -+ u64 loss_budget, lost_prefix; -+ -+ pcount = tcp_skb_pcount(skb); -+ -+ /* How much data was in flight before this skb? */ -+ inflight_prev = rs->tx_in_flight - pcount; -+ if (WARN_ONCE(inflight_prev < 0, -+ "tx_in_flight: %u pcount: %u reneg: %u", -+ rs->tx_in_flight, pcount, tcp_sk(sk)->is_sack_reneg)) -+ return ~0U; -+ -+ /* How much inflight data was marked lost before this skb? */ -+ lost_prev = rs->lost - pcount; -+ if (WARN_ON_ONCE(lost_prev < 0)) -+ return ~0U; -+ -+ /* At what prefix of this lost skb did losss rate exceed loss_thresh? */ -+ loss_budget = (u64)inflight_prev * loss_thresh + BBR_UNIT - 1; -+ loss_budget >>= BBR_SCALE; -+ if (lost_prev >= loss_budget) { -+ lost_prefix = 0; /* previous losses crossed loss_thresh */ -+ } else { -+ lost_prefix = loss_budget - lost_prev; -+ lost_prefix <<= BBR_SCALE; -+ divisor = BBR_UNIT - loss_thresh; -+ if (WARN_ON_ONCE(!divisor)) /* loss_thresh is 8 bits */ -+ return ~0U; -+ do_div(lost_prefix, divisor); -+ } -+ -+ inflight_hi = inflight_prev + lost_prefix; -+ return inflight_hi; -+} -+ -+/* If loss/ECN rates during probing indicated we may have overfilled a -+ * buffer, return an operating point that tries to leave unutilized headroom in -+ * the path for other flows, for fairness convergence and lower RTTs and loss. -+ */ -+static u32 bbr2_inflight_with_headroom(const struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 headroom, headroom_fraction; -+ -+ if (bbr->inflight_hi == ~0U) -+ return ~0U; -+ -+ headroom_fraction = bbr->params.inflight_headroom; -+ headroom = ((u64)bbr->inflight_hi * headroom_fraction) >> BBR_SCALE; -+ headroom = max(headroom, 1U); -+ return max_t(s32, bbr->inflight_hi - headroom, -+ bbr->params.cwnd_min_target); -+} -+ -+/* Bound cwnd to a sensible level, based on our current probing state -+ * machine phase and model of a good inflight level (inflight_lo, inflight_hi). -+ */ -+static void bbr2_bound_cwnd_for_inflight_model(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 cap; -+ -+ /* tcp_rcv_synsent_state_process() currently calls tcp_ack() -+ * and thus cong_control() without first initializing us(!). -+ */ -+ if (!bbr->initialized) -+ return; -+ -+ cap = ~0U; -+ if (bbr->mode == BBR_PROBE_BW && -+ bbr->cycle_idx != BBR_BW_PROBE_CRUISE) { -+ /* Probe to see if more packets fit in the path. */ -+ cap = bbr->inflight_hi; -+ } else { -+ if (bbr->mode == BBR_PROBE_RTT || -+ (bbr->mode == BBR_PROBE_BW && -+ bbr->cycle_idx == BBR_BW_PROBE_CRUISE)) -+ cap = bbr2_inflight_with_headroom(sk); -+ } -+ /* Adapt to any loss/ECN since our last bw probe. */ -+ cap = min(cap, bbr->inflight_lo); -+ -+ cap = max_t(u32, cap, bbr->params.cwnd_min_target); -+ tp->snd_cwnd = min(cap, tp->snd_cwnd); -+} -+ -+/* Estimate a short-term lower bound on the capacity available now, based -+ * on measurements of the current delivery process and recent history. When we -+ * are seeing loss/ECN at times when we are not probing bw, then conservatively -+ * move toward flow balance by multiplicatively cutting our short-term -+ * estimated safe rate and volume of data (bw_lo and inflight_lo). We use a -+ * multiplicative decrease in order to converge to a lower capacity in time -+ * logarithmic in the magnitude of the decrease. -+ * -+ * However, we do not cut our short-term estimates lower than the current rate -+ * and volume of delivered data from this round trip, since from the current -+ * delivery process we can estimate the measured capacity available now. -+ * -+ * Anything faster than that approach would knowingly risk high loss, which can -+ * cause low bw for Reno/CUBIC and high loss recovery latency for -+ * request/response flows using any congestion control. -+ */ -+static void bbr2_adapt_lower_bounds(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 ecn_cut, ecn_inflight_lo, beta; -+ -+ /* We only use lower-bound estimates when not probing bw. -+ * When probing we need to push inflight higher to probe bw. -+ */ -+ if (bbr2_is_probing_bandwidth(sk)) -+ return; -+ -+ /* ECN response. */ -+ if (bbr->ecn_in_round && bbr->ecn_eligible && bbr->params.ecn_factor) { -+ /* Reduce inflight to (1 - alpha*ecn_factor). */ -+ ecn_cut = (BBR_UNIT - -+ ((bbr->ecn_alpha * bbr->params.ecn_factor) >> -+ BBR_SCALE)); -+ if (bbr->inflight_lo == ~0U) -+ bbr->inflight_lo = tp->snd_cwnd; -+ ecn_inflight_lo = (u64)bbr->inflight_lo * ecn_cut >> BBR_SCALE; -+ } else { -+ ecn_inflight_lo = ~0U; -+ } -+ -+ /* Loss response. */ -+ if (bbr->loss_in_round) { -+ /* Reduce bw and inflight to (1 - beta). */ -+ if (bbr->bw_lo == ~0U) -+ bbr->bw_lo = bbr_max_bw(sk); -+ if (bbr->inflight_lo == ~0U) -+ bbr->inflight_lo = tp->snd_cwnd; -+ beta = bbr->params.beta; -+ bbr->bw_lo = -+ max_t(u32, bbr->bw_latest, -+ (u64)bbr->bw_lo * -+ (BBR_UNIT - beta) >> BBR_SCALE); -+ bbr->inflight_lo = -+ max_t(u32, bbr->inflight_latest, -+ (u64)bbr->inflight_lo * -+ (BBR_UNIT - beta) >> BBR_SCALE); -+ } -+ -+ /* Adjust to the lower of the levels implied by loss or ECN. */ -+ bbr->inflight_lo = min(bbr->inflight_lo, ecn_inflight_lo); -+} -+ -+/* Reset any short-term lower-bound adaptation to congestion, so that we can -+ * push our inflight up. -+ */ -+static void bbr2_reset_lower_bounds(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->bw_lo = ~0U; -+ bbr->inflight_lo = ~0U; -+} -+ -+/* After bw probing (STARTUP/PROBE_UP), reset signals before entering a state -+ * machine phase where we adapt our lower bound based on congestion signals. -+ */ -+static void bbr2_reset_congestion_signals(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->loss_in_round = 0; -+ bbr->ecn_in_round = 0; -+ bbr->loss_in_cycle = 0; -+ bbr->ecn_in_cycle = 0; -+ bbr->bw_latest = 0; -+ bbr->inflight_latest = 0; -+} -+ -+/* Update (most of) our congestion signals: track the recent rate and volume of -+ * delivered data, presence of loss, and EWMA degree of ECN marking. -+ */ -+static void bbr2_update_congestion_signals( -+ struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u64 bw; -+ -+ bbr->loss_round_start = 0; -+ if (rs->interval_us <= 0 || !rs->acked_sacked) -+ return; /* Not a valid observation */ -+ bw = ctx->sample_bw; -+ -+ if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) -+ bbr2_take_bw_hi_sample(sk, bw); -+ -+ bbr->loss_in_round |= (rs->losses > 0); -+ -+ /* Update rate and volume of delivered data from latest round trip: */ -+ bbr->bw_latest = max_t(u32, bbr->bw_latest, ctx->sample_bw); -+ bbr->inflight_latest = max_t(u32, bbr->inflight_latest, rs->delivered); -+ -+ if (before(rs->prior_delivered, bbr->loss_round_delivered)) -+ return; /* skip the per-round-trip updates */ -+ /* Now do per-round-trip updates. */ -+ bbr->loss_round_delivered = tp->delivered; /* mark round trip */ -+ bbr->loss_round_start = 1; -+ bbr2_adapt_lower_bounds(sk); -+ -+ /* Update windowed "latest" (single-round-trip) filters. */ -+ bbr->loss_in_round = 0; -+ bbr->ecn_in_round = 0; -+ bbr->bw_latest = ctx->sample_bw; -+ bbr->inflight_latest = rs->delivered; -+} -+ -+/* Bandwidth probing can cause loss. To help coexistence with loss-based -+ * congestion control we spread out our probing in a Reno-conscious way. Due to -+ * the shape of the Reno sawtooth, the time required between loss epochs for an -+ * idealized Reno flow is a number of round trips that is the BDP of that -+ * flow. We count packet-timed round trips directly, since measured RTT can -+ * vary widely, and Reno is driven by packet-timed round trips. -+ */ -+static bool bbr2_is_reno_coexistence_probe_time(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 inflight, rounds, reno_gain, reno_rounds; -+ -+ /* Random loss can shave some small percentage off of our inflight -+ * in each round. To survive this, flows need robust periodic probes. -+ */ -+ rounds = bbr->params.bw_probe_max_rounds; -+ -+ reno_gain = bbr->params.bw_probe_reno_gain; -+ if (reno_gain) { -+ inflight = bbr2_target_inflight(sk); -+ reno_rounds = ((u64)inflight * reno_gain) >> BBR_SCALE; -+ rounds = min(rounds, reno_rounds); -+ } -+ return bbr->rounds_since_probe >= rounds; -+} -+ -+/* How long do we want to wait before probing for bandwidth (and risking -+ * loss)? We randomize the wait, for better mixing and fairness convergence. -+ * -+ * We bound the Reno-coexistence inter-bw-probe time to be 62-63 round trips. -+ * This is calculated to allow fairness with a 25Mbps, 30ms Reno flow, -+ * (eg 4K video to a broadband user): -+ * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets -+ * -+ * We bound the BBR-native inter-bw-probe wall clock time to be: -+ * (a) higher than 2 sec: to try to avoid causing loss for a long enough time -+ * to allow Reno at 30ms to get 4K video bw, the inter-bw-probe time must -+ * be at least: 25Mbps * .030sec / (1514bytes) * 0.030sec = 1.9secs -+ * (b) lower than 3 sec: to ensure flows can start probing in a reasonable -+ * amount of time to discover unutilized bw on human-scale interactive -+ * time-scales (e.g. perhaps traffic from a web page download that we -+ * were competing with is now complete). -+ */ -+static void bbr2_pick_probe_wait(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ /* Decide the random round-trip bound for wait until probe: */ -+ bbr->rounds_since_probe = -+ get_random_u32_below(bbr->params.bw_probe_rand_rounds); -+ /* Decide the random wall clock bound for wait until probe: */ -+ bbr->probe_wait_us = bbr->params.bw_probe_base_us + -+ get_random_u32_below(bbr->params.bw_probe_rand_us); -+} -+ -+static void bbr2_set_cycle_idx(struct sock *sk, int cycle_idx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->cycle_idx = cycle_idx; -+ /* New phase, so need to update cwnd and pacing rate. */ -+ bbr->try_fast_path = 0; -+} -+ -+/* Send at estimated bw to fill the pipe, but not queue. We need this phase -+ * before PROBE_UP, because as soon as we send faster than the available bw -+ * we will start building a queue, and if the buffer is shallow we can cause -+ * loss. If we do not fill the pipe before we cause this loss, our bw_hi and -+ * inflight_hi estimates will underestimate. -+ */ -+static void bbr2_start_bw_probe_refill(struct sock *sk, u32 bw_probe_up_rounds) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr2_reset_lower_bounds(sk); -+ if (bbr->inflight_hi != ~0U) -+ bbr->inflight_hi += bbr->params.refill_add_inc; -+ bbr->bw_probe_up_rounds = bw_probe_up_rounds; -+ bbr->bw_probe_up_acks = 0; -+ bbr->stopped_risky_probe = 0; -+ bbr->ack_phase = BBR_ACKS_REFILLING; -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr2_set_cycle_idx(sk, BBR_BW_PROBE_REFILL); -+} -+ -+/* Now probe max deliverable data rate and volume. */ -+static void bbr2_start_bw_probe_up(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->ack_phase = BBR_ACKS_PROBE_STARTING; -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr->cycle_mstamp = tp->tcp_mstamp; -+ bbr2_set_cycle_idx(sk, BBR_BW_PROBE_UP); -+ bbr2_raise_inflight_hi_slope(sk); -+} -+ -+/* Start a new PROBE_BW probing cycle of some wall clock length. Pick a wall -+ * clock time at which to probe beyond an inflight that we think to be -+ * safe. This will knowingly risk packet loss, so we want to do this rarely, to -+ * keep packet loss rates low. Also start a round-trip counter, to probe faster -+ * if we estimate a Reno flow at our BDP would probe faster. -+ */ -+static void bbr2_start_bw_probe_down(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr2_reset_congestion_signals(sk); -+ bbr->bw_probe_up_cnt = ~0U; /* not growing inflight_hi any more */ -+ bbr2_pick_probe_wait(sk); -+ bbr->cycle_mstamp = tp->tcp_mstamp; /* start wall clock */ -+ bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr2_set_cycle_idx(sk, BBR_BW_PROBE_DOWN); -+} -+ -+/* Cruise: maintain what we estimate to be a neutral, conservative -+ * operating point, without attempting to probe up for bandwidth or down for -+ * RTT, and only reducing inflight in response to loss/ECN signals. -+ */ -+static void bbr2_start_bw_probe_cruise(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr->inflight_lo != ~0U) -+ bbr->inflight_lo = min(bbr->inflight_lo, bbr->inflight_hi); -+ -+ bbr2_set_cycle_idx(sk, BBR_BW_PROBE_CRUISE); -+} -+ -+/* Loss and/or ECN rate is too high while probing. -+ * Adapt (once per bw probe) by cutting inflight_hi and then restarting cycle. -+ */ -+static void bbr2_handle_inflight_too_high(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ const u32 beta = bbr->params.beta; -+ -+ bbr->prev_probe_too_high = 1; -+ bbr->bw_probe_samples = 0; /* only react once per probe */ -+ bbr->debug.event = 'L'; /* Loss/ECN too high */ -+ /* If we are app-limited then we are not robustly -+ * probing the max volume of inflight data we think -+ * might be safe (analogous to how app-limited bw -+ * samples are not known to be robustly probing bw). -+ */ -+ if (!rs->is_app_limited) -+ bbr->inflight_hi = max_t(u32, rs->tx_in_flight, -+ (u64)bbr2_target_inflight(sk) * -+ (BBR_UNIT - beta) >> BBR_SCALE); -+ if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) -+ bbr2_start_bw_probe_down(sk); -+} -+ -+/* If we're seeing bw and loss samples reflecting our bw probing, adapt -+ * using the signals we see. If loss or ECN mark rate gets too high, then adapt -+ * inflight_hi downward. If we're able to push inflight higher without such -+ * signals, push higher: adapt inflight_hi upward. -+ */ -+static bool bbr2_adapt_upper_bounds(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ /* Track when we'll see bw/loss samples resulting from our bw probes. */ -+ if (bbr->ack_phase == BBR_ACKS_PROBE_STARTING && bbr->round_start) -+ bbr->ack_phase = BBR_ACKS_PROBE_FEEDBACK; -+ if (bbr->ack_phase == BBR_ACKS_PROBE_STOPPING && bbr->round_start) { -+ /* End of samples from bw probing phase. */ -+ bbr->bw_probe_samples = 0; -+ bbr->ack_phase = BBR_ACKS_INIT; -+ /* At this point in the cycle, our current bw sample is also -+ * our best recent chance at finding the highest available bw -+ * for this flow. So now is the best time to forget the bw -+ * samples from the previous cycle, by advancing the window. -+ */ -+ if (bbr->mode == BBR_PROBE_BW && !rs->is_app_limited) -+ bbr2_advance_bw_hi_filter(sk); -+ /* If we had an inflight_hi, then probed and pushed inflight all -+ * the way up to hit that inflight_hi without seeing any -+ * high loss/ECN in all the resulting ACKs from that probing, -+ * then probe up again, this time letting inflight persist at -+ * inflight_hi for a round trip, then accelerating beyond. -+ */ -+ if (bbr->mode == BBR_PROBE_BW && -+ bbr->stopped_risky_probe && !bbr->prev_probe_too_high) { -+ bbr->debug.event = 'R'; /* reprobe */ -+ bbr2_start_bw_probe_refill(sk, 0); -+ return true; /* yes, decided state transition */ -+ } -+ } -+ -+ if (bbr2_is_inflight_too_high(sk, rs)) { -+ if (bbr->bw_probe_samples) /* sample is from bw probing? */ -+ bbr2_handle_inflight_too_high(sk, rs); -+ } else { -+ /* Loss/ECN rate is declared safe. Adjust upper bound upward. */ -+ if (bbr->inflight_hi == ~0U) /* no excess queue signals yet? */ -+ return false; -+ -+ /* To be resilient to random loss, we must raise inflight_hi -+ * if we observe in any phase that a higher level is safe. -+ */ -+ if (rs->tx_in_flight > bbr->inflight_hi) { -+ bbr->inflight_hi = rs->tx_in_flight; -+ bbr->debug.event = 'U'; /* raise up inflight_hi */ -+ } -+ -+ if (bbr->mode == BBR_PROBE_BW && -+ bbr->cycle_idx == BBR_BW_PROBE_UP) -+ bbr2_probe_inflight_hi_upward(sk, rs); -+ } -+ -+ return false; -+} -+ -+/* Check if it's time to probe for bandwidth now, and if so, kick it off. */ -+static bool bbr2_check_time_to_probe_bw(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 n; -+ -+ /* If we seem to be at an operating point where we are not seeing loss -+ * but we are seeing ECN marks, then when the ECN marks cease we reprobe -+ * quickly (in case a burst of cross-traffic has ceased and freed up bw, -+ * or in case we are sharing with multiplicatively probing traffic). -+ */ -+ if (bbr->params.ecn_reprobe_gain && bbr->ecn_eligible && -+ bbr->ecn_in_cycle && !bbr->loss_in_cycle && -+ inet_csk(sk)->icsk_ca_state == TCP_CA_Open) { -+ bbr->debug.event = 'A'; /* *A*ll clear to probe *A*gain */ -+ /* Calculate n so that when bbr2_raise_inflight_hi_slope() -+ * computes growth_this_round as 2^n it will be roughly the -+ * desired volume of data (inflight_hi*ecn_reprobe_gain). -+ */ -+ n = ilog2((((u64)bbr->inflight_hi * -+ bbr->params.ecn_reprobe_gain) >> BBR_SCALE)); -+ bbr2_start_bw_probe_refill(sk, n); -+ return true; -+ } -+ -+ if (bbr2_has_elapsed_in_phase(sk, bbr->probe_wait_us) || -+ bbr2_is_reno_coexistence_probe_time(sk)) { -+ bbr2_start_bw_probe_refill(sk, 0); -+ return true; -+ } -+ return false; -+} -+ -+/* Is it time to transition from PROBE_DOWN to PROBE_CRUISE? */ -+static bool bbr2_check_time_to_cruise(struct sock *sk, u32 inflight, u32 bw) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ bool is_under_bdp, is_long_enough; -+ -+ /* Always need to pull inflight down to leave headroom in queue. */ -+ if (inflight > bbr2_inflight_with_headroom(sk)) -+ return false; -+ -+ is_under_bdp = inflight <= bbr_inflight(sk, bw, BBR_UNIT); -+ if (bbr->params.drain_to_target) -+ return is_under_bdp; -+ -+ is_long_enough = bbr2_has_elapsed_in_phase(sk, bbr->min_rtt_us); -+ return is_under_bdp || is_long_enough; -+} -+ -+/* PROBE_BW state machine: cruise, refill, probe for bw, or drain? */ -+static void bbr2_update_cycle_phase(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ bool is_risky = false, is_queuing = false; -+ u32 inflight, bw; -+ -+ if (!bbr_full_bw_reached(sk)) -+ return; -+ -+ /* In DRAIN, PROBE_BW, or PROBE_RTT, adjust upper bounds. */ -+ if (bbr2_adapt_upper_bounds(sk, rs)) -+ return; /* already decided state transition */ -+ -+ if (bbr->mode != BBR_PROBE_BW) -+ return; -+ -+ inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); -+ bw = bbr_max_bw(sk); -+ -+ switch (bbr->cycle_idx) { -+ /* First we spend most of our time cruising with a pacing_gain of 1.0, -+ * which paces at the estimated bw, to try to fully use the pipe -+ * without building queue. If we encounter loss/ECN marks, we adapt -+ * by slowing down. -+ */ -+ case BBR_BW_PROBE_CRUISE: -+ if (bbr2_check_time_to_probe_bw(sk)) -+ return; /* already decided state transition */ -+ break; -+ -+ /* After cruising, when it's time to probe, we first "refill": we send -+ * at the estimated bw to fill the pipe, before probing higher and -+ * knowingly risking overflowing the bottleneck buffer (causing loss). -+ */ -+ case BBR_BW_PROBE_REFILL: -+ if (bbr->round_start) { -+ /* After one full round trip of sending in REFILL, we -+ * start to see bw samples reflecting our REFILL, which -+ * may be putting too much data in flight. -+ */ -+ bbr->bw_probe_samples = 1; -+ bbr2_start_bw_probe_up(sk); -+ } -+ break; -+ -+ /* After we refill the pipe, we probe by using a pacing_gain > 1.0, to -+ * probe for bw. If we have not seen loss/ECN, we try to raise inflight -+ * to at least pacing_gain*BDP; note that this may take more than -+ * min_rtt if min_rtt is small (e.g. on a LAN). -+ * -+ * We terminate PROBE_UP bandwidth probing upon any of the following: -+ * -+ * (1) We've pushed inflight up to hit the inflight_hi target set in the -+ * most recent previous bw probe phase. Thus we want to start -+ * draining the queue immediately because it's very likely the most -+ * recently sent packets will fill the queue and cause drops. -+ * (checked here) -+ * (2) We have probed for at least 1*min_rtt_us, and the -+ * estimated queue is high enough (inflight > 1.25 * estimated_bdp). -+ * (checked here) -+ * (3) Loss filter says loss rate is "too high". -+ * (checked in bbr_is_inflight_too_high()) -+ * (4) ECN filter says ECN mark rate is "too high". -+ * (checked in bbr_is_inflight_too_high()) -+ */ -+ case BBR_BW_PROBE_UP: -+ if (bbr->prev_probe_too_high && -+ inflight >= bbr->inflight_hi) { -+ bbr->stopped_risky_probe = 1; -+ is_risky = true; -+ bbr->debug.event = 'D'; /* D for danger */ -+ } else if (bbr2_has_elapsed_in_phase(sk, bbr->min_rtt_us) && -+ inflight >= -+ bbr_inflight(sk, bw, -+ bbr->params.bw_probe_pif_gain)) { -+ is_queuing = true; -+ bbr->debug.event = 'Q'; /* building Queue */ -+ } -+ if (is_risky || is_queuing) { -+ bbr->prev_probe_too_high = 0; /* no loss/ECN (yet) */ -+ bbr2_start_bw_probe_down(sk); /* restart w/ down */ -+ } -+ break; -+ -+ /* After probing in PROBE_UP, we have usually accumulated some data in -+ * the bottleneck buffer (if bw probing didn't find more bw). We next -+ * enter PROBE_DOWN to try to drain any excess data from the queue. To -+ * do this, we use a pacing_gain < 1.0. We hold this pacing gain until -+ * our inflight is less then that target cruising point, which is the -+ * minimum of (a) the amount needed to leave headroom, and (b) the -+ * estimated BDP. Once inflight falls to match the target, we estimate -+ * the queue is drained; persisting would underutilize the pipe. -+ */ -+ case BBR_BW_PROBE_DOWN: -+ if (bbr2_check_time_to_probe_bw(sk)) -+ return; /* already decided state transition */ -+ if (bbr2_check_time_to_cruise(sk, inflight, bw)) -+ bbr2_start_bw_probe_cruise(sk); -+ break; -+ -+ default: -+ WARN_ONCE(1, "BBR invalid cycle index %u\n", bbr->cycle_idx); -+ } -+} -+ -+/* Exiting PROBE_RTT, so return to bandwidth probing in STARTUP or PROBE_BW. */ -+static void bbr2_exit_probe_rtt(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr2_reset_lower_bounds(sk); -+ if (bbr_full_bw_reached(sk)) { -+ bbr->mode = BBR_PROBE_BW; -+ /* Raising inflight after PROBE_RTT may cause loss, so reset -+ * the PROBE_BW clock and schedule the next bandwidth probe for -+ * a friendly and randomized future point in time. -+ */ -+ bbr2_start_bw_probe_down(sk); -+ /* Since we are exiting PROBE_RTT, we know inflight is -+ * below our estimated BDP, so it is reasonable to cruise. -+ */ -+ bbr2_start_bw_probe_cruise(sk); -+ } else { -+ bbr->mode = BBR_STARTUP; -+ } -+} -+ -+/* Exit STARTUP based on loss rate > 1% and loss gaps in round >= N. Wait until -+ * the end of the round in recovery to get a good estimate of how many packets -+ * have been lost, and how many we need to drain with a low pacing rate. -+ */ -+static void bbr2_check_loss_too_high_in_startup(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr_full_bw_reached(sk)) -+ return; -+ -+ /* For STARTUP exit, check the loss rate at the end of each round trip -+ * of Recovery episodes in STARTUP. We check the loss rate at the end -+ * of the round trip to filter out noisy/low loss and have a better -+ * sense of inflight (extent of loss), so we can drain more accurately. -+ */ -+ if (rs->losses && bbr->loss_events_in_round < 0xf) -+ bbr->loss_events_in_round++; /* update saturating counter */ -+ if (bbr->params.full_loss_cnt && bbr->loss_round_start && -+ inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery && -+ bbr->loss_events_in_round >= bbr->params.full_loss_cnt && -+ bbr2_is_inflight_too_high(sk, rs)) { -+ bbr->debug.event = 'P'; /* Packet loss caused STARTUP exit */ -+ bbr2_handle_queue_too_high_in_startup(sk); -+ return; -+ } -+ if (bbr->loss_round_start) -+ bbr->loss_events_in_round = 0; -+} -+ -+/* If we are done draining, advance into steady state operation in PROBE_BW. */ -+static void bbr2_check_drain(struct sock *sk, const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr_check_drain(sk, rs, ctx)) { -+ bbr->mode = BBR_PROBE_BW; -+ bbr2_start_bw_probe_down(sk); -+ } -+} -+ -+static void bbr2_update_model(struct sock *sk, const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ bbr2_update_congestion_signals(sk, rs, ctx); -+ bbr_update_ack_aggregation(sk, rs); -+ bbr2_check_loss_too_high_in_startup(sk, rs); -+ bbr_check_full_bw_reached(sk, rs); -+ bbr2_check_drain(sk, rs, ctx); -+ bbr2_update_cycle_phase(sk, rs); -+ bbr_update_min_rtt(sk, rs); -+} -+ -+/* Fast path for app-limited case. -+ * -+ * On each ack, we execute bbr state machine, which primarily consists of: -+ * 1) update model based on new rate sample, and -+ * 2) update control based on updated model or state change. -+ * -+ * There are certain workload/scenarios, e.g. app-limited case, where -+ * either we can skip updating model or we can skip update of both model -+ * as well as control. This provides signifcant softirq cpu savings for -+ * processing incoming acks. -+ * -+ * In case of app-limited, if there is no congestion (loss/ecn) and -+ * if observed bw sample is less than current estimated bw, then we can -+ * skip some of the computation in bbr state processing: -+ * -+ * - if there is no rtt/mode/phase change: In this case, since all the -+ * parameters of the network model are constant, we can skip model -+ * as well control update. -+ * -+ * - else we can skip rest of the model update. But we still need to -+ * update the control to account for the new rtt/mode/phase. -+ * -+ * Returns whether we can take fast path or not. -+ */ -+static bool bbr2_fast_path(struct sock *sk, bool *update_model, -+ const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 prev_min_rtt_us, prev_mode; -+ -+ if (bbr->params.fast_path && bbr->try_fast_path && -+ rs->is_app_limited && ctx->sample_bw < bbr_max_bw(sk) && -+ !bbr->loss_in_round && !bbr->ecn_in_round) { -+ prev_mode = bbr->mode; -+ prev_min_rtt_us = bbr->min_rtt_us; -+ bbr2_check_drain(sk, rs, ctx); -+ bbr2_update_cycle_phase(sk, rs); -+ bbr_update_min_rtt(sk, rs); -+ -+ if (bbr->mode == prev_mode && -+ bbr->min_rtt_us == prev_min_rtt_us && -+ bbr->try_fast_path) -+ return true; -+ -+ /* Skip model update, but control still needs to be updated */ -+ *update_model = false; -+ } -+ return false; -+} -+ -+static void bbr2_main(struct sock *sk, const struct rate_sample *rs) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ struct bbr_context ctx = { 0 }; -+ bool update_model = true; -+ u32 bw; -+ -+ bbr->debug.event = '.'; /* init to default NOP (no event yet) */ -+ -+ bbr_update_round_start(sk, rs, &ctx); -+ if (bbr->round_start) { -+ bbr->rounds_since_probe = -+ min_t(s32, bbr->rounds_since_probe + 1, 0xFF); -+ bbr2_update_ecn_alpha(sk); -+ } -+ -+ bbr->ecn_in_round |= rs->is_ece; -+ bbr_calculate_bw_sample(sk, rs, &ctx); -+ -+ if (bbr2_fast_path(sk, &update_model, rs, &ctx)) -+ goto out; -+ -+ if (update_model) -+ bbr2_update_model(sk, rs, &ctx); -+ -+ bbr_update_gains(sk); -+ bw = bbr_bw(sk); -+ bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); -+ bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain, -+ tp->snd_cwnd, &ctx); -+ bbr2_bound_cwnd_for_inflight_model(sk); -+ -+out: -+ bbr->prev_ca_state = inet_csk(sk)->icsk_ca_state; -+ bbr->loss_in_cycle |= rs->lost > 0; -+ bbr->ecn_in_cycle |= rs->delivered_ce > 0; -+ -+ bbr_debug(sk, rs->acked_sacked, rs, &ctx); -+} -+ -+/* Module parameters that are settable by TCP_CONGESTION_PARAMS are declared -+ * down here, so that the algorithm functions that use the parameters must use -+ * the per-socket parameters; if they accidentally use the global version -+ * then there will be a compile error. -+ * TODO(ncardwell): move all per-socket parameters down to this section. -+ */ -+ -+/* On losses, scale down inflight and pacing rate by beta scaled by BBR_SCALE. -+ * No loss response when 0. Max allwed value is 255. -+ */ -+static u32 bbr_beta = BBR_UNIT * 30 / 100; -+ -+/* Gain factor for ECN mark ratio samples, scaled by BBR_SCALE. -+ * Max allowed value is 255. -+ */ -+static u32 bbr_ecn_alpha_gain = BBR_UNIT * 1 / 16; /* 1/16 = 6.25% */ -+ -+/* The initial value for the ecn_alpha state variable. Default and max -+ * BBR_UNIT (256), representing 1.0. This allows a flow to respond quickly -+ * to congestion if the bottleneck is congested when the flow starts up. -+ */ -+static u32 bbr_ecn_alpha_init = BBR_UNIT; /* 1.0, to respond quickly */ -+ -+/* On ECN, cut inflight_lo to (1 - ecn_factor * ecn_alpha) scaled by BBR_SCALE. -+ * No ECN based bounding when 0. Max allwed value is 255. -+ */ -+static u32 bbr_ecn_factor = BBR_UNIT * 1 / 3; /* 1/3 = 33% */ -+ -+/* Estimate bw probing has gone too far if CE ratio exceeds this threshold. -+ * Scaled by BBR_SCALE. Disabled when 0. Max allowed is 255. -+ */ -+static u32 bbr_ecn_thresh = BBR_UNIT * 1 / 2; /* 1/2 = 50% */ -+ -+/* Max RTT (in usec) at which to use sender-side ECN logic. -+ * Disabled when 0 (ECN allowed at any RTT). -+ * Max allowed for the parameter is 524287 (0x7ffff) us, ~524 ms. -+ */ -+static u32 bbr_ecn_max_rtt_us = 5000; -+ -+/* If non-zero, if in a cycle with no losses but some ECN marks, after ECN -+ * clears then use a multiplicative increase to quickly reprobe bw by -+ * starting inflight probing at the given multiple of inflight_hi. -+ * Default for this experimental knob is 0 (disabled). -+ * Planned value for experiments: BBR_UNIT * 1 / 2 = 128, representing 0.5. -+ */ -+static u32 bbr_ecn_reprobe_gain; -+ -+/* Estimate bw probing has gone too far if loss rate exceeds this level. */ -+static u32 bbr_loss_thresh = BBR_UNIT * 2 / 100; /* 2% loss */ -+ -+/* Exit STARTUP if number of loss marking events in a Recovery round is >= N, -+ * and loss rate is higher than bbr_loss_thresh. -+ * Disabled if 0. Max allowed value is 15 (0xF). -+ */ -+static u32 bbr_full_loss_cnt = 8; -+ -+/* Exit STARTUP if number of round trips with ECN mark rate above ecn_thresh -+ * meets this count. Max allowed value is 3. -+ */ -+static u32 bbr_full_ecn_cnt = 2; -+ -+/* Fraction of unutilized headroom to try to leave in path upon high loss. */ -+static u32 bbr_inflight_headroom = BBR_UNIT * 15 / 100; -+ -+/* Multiplier to get target inflight (as multiple of BDP) for PROBE_UP phase. -+ * Default is 1.25x, as in BBR v1. Max allowed is 511. -+ */ -+static u32 bbr_bw_probe_pif_gain = BBR_UNIT * 5 / 4; -+ -+/* Multiplier to get Reno-style probe epoch duration as: k * BDP round trips. -+ * If zero, disables this BBR v2 Reno-style BDP-scaled coexistence mechanism. -+ * Max allowed is 511. -+ */ -+static u32 bbr_bw_probe_reno_gain = BBR_UNIT; -+ -+/* Max number of packet-timed rounds to wait before probing for bandwidth. If -+ * we want to tolerate 1% random loss per round, and not have this cut our -+ * inflight too much, we must probe for bw periodically on roughly this scale. -+ * If low, limits Reno/CUBIC coexistence; if high, limits loss tolerance. -+ * We aim to be fair with Reno/CUBIC up to a BDP of at least: -+ * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets -+ */ -+static u32 bbr_bw_probe_max_rounds = 63; -+ -+/* Max amount of randomness to inject in round counting for Reno-coexistence. -+ * Max value is 15. -+ */ -+static u32 bbr_bw_probe_rand_rounds = 2; -+ -+/* Use BBR-native probe time scale starting at this many usec. -+ * We aim to be fair with Reno/CUBIC up to an inter-loss time epoch of at least: -+ * BDP*RTT = 25Mbps * .030sec /(1514bytes) * 0.030sec = 1.9 secs -+ */ -+static u32 bbr_bw_probe_base_us = 2 * USEC_PER_SEC; /* 2 secs */ -+ -+/* Use BBR-native probes spread over this many usec: */ -+static u32 bbr_bw_probe_rand_us = 1 * USEC_PER_SEC; /* 1 secs */ -+ -+/* Undo the model changes made in loss recovery if recovery was spurious? */ -+static bool bbr_undo = true; -+ -+/* Use fast path if app-limited, no loss/ECN, and target cwnd was reached? */ -+static bool bbr_fast_path = true; /* default: enabled */ -+ -+/* Use fast ack mode ? */ -+static int bbr_fast_ack_mode = 1; /* default: rwnd check off */ -+ -+/* How much to additively increase inflight_hi when entering REFILL? */ -+static u32 bbr_refill_add_inc; /* default: disabled */ -+ -+module_param_named(beta, bbr_beta, uint, 0644); -+module_param_named(ecn_alpha_gain, bbr_ecn_alpha_gain, uint, 0644); -+module_param_named(ecn_alpha_init, bbr_ecn_alpha_init, uint, 0644); -+module_param_named(ecn_factor, bbr_ecn_factor, uint, 0644); -+module_param_named(ecn_thresh, bbr_ecn_thresh, uint, 0644); -+module_param_named(ecn_max_rtt_us, bbr_ecn_max_rtt_us, uint, 0644); -+module_param_named(ecn_reprobe_gain, bbr_ecn_reprobe_gain, uint, 0644); -+module_param_named(loss_thresh, bbr_loss_thresh, uint, 0664); -+module_param_named(full_loss_cnt, bbr_full_loss_cnt, uint, 0664); -+module_param_named(full_ecn_cnt, bbr_full_ecn_cnt, uint, 0664); -+module_param_named(inflight_headroom, bbr_inflight_headroom, uint, 0664); -+module_param_named(bw_probe_pif_gain, bbr_bw_probe_pif_gain, uint, 0664); -+module_param_named(bw_probe_reno_gain, bbr_bw_probe_reno_gain, uint, 0664); -+module_param_named(bw_probe_max_rounds, bbr_bw_probe_max_rounds, uint, 0664); -+module_param_named(bw_probe_rand_rounds, bbr_bw_probe_rand_rounds, uint, 0664); -+module_param_named(bw_probe_base_us, bbr_bw_probe_base_us, uint, 0664); -+module_param_named(bw_probe_rand_us, bbr_bw_probe_rand_us, uint, 0664); -+module_param_named(undo, bbr_undo, bool, 0664); -+module_param_named(fast_path, bbr_fast_path, bool, 0664); -+module_param_named(fast_ack_mode, bbr_fast_ack_mode, uint, 0664); -+module_param_named(refill_add_inc, bbr_refill_add_inc, uint, 0664); -+ -+static void bbr2_init(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr_init(sk); /* run shared init code for v1 and v2 */ -+ -+ /* BBR v2 parameters: */ -+ bbr->params.beta = min_t(u32, 0xFFU, bbr_beta); -+ bbr->params.ecn_alpha_gain = min_t(u32, 0xFFU, bbr_ecn_alpha_gain); -+ bbr->params.ecn_alpha_init = min_t(u32, BBR_UNIT, bbr_ecn_alpha_init); -+ bbr->params.ecn_factor = min_t(u32, 0xFFU, bbr_ecn_factor); -+ bbr->params.ecn_thresh = min_t(u32, 0xFFU, bbr_ecn_thresh); -+ bbr->params.ecn_max_rtt_us = min_t(u32, 0x7ffffU, bbr_ecn_max_rtt_us); -+ bbr->params.ecn_reprobe_gain = min_t(u32, 0x1FF, bbr_ecn_reprobe_gain); -+ bbr->params.loss_thresh = min_t(u32, 0xFFU, bbr_loss_thresh); -+ bbr->params.full_loss_cnt = min_t(u32, 0xFU, bbr_full_loss_cnt); -+ bbr->params.full_ecn_cnt = min_t(u32, 0x3U, bbr_full_ecn_cnt); -+ bbr->params.inflight_headroom = -+ min_t(u32, 0xFFU, bbr_inflight_headroom); -+ bbr->params.bw_probe_pif_gain = -+ min_t(u32, 0x1FFU, bbr_bw_probe_pif_gain); -+ bbr->params.bw_probe_reno_gain = -+ min_t(u32, 0x1FFU, bbr_bw_probe_reno_gain); -+ bbr->params.bw_probe_max_rounds = -+ min_t(u32, 0xFFU, bbr_bw_probe_max_rounds); -+ bbr->params.bw_probe_rand_rounds = -+ min_t(u32, 0xFU, bbr_bw_probe_rand_rounds); -+ bbr->params.bw_probe_base_us = -+ min_t(u32, (1 << 26) - 1, bbr_bw_probe_base_us); -+ bbr->params.bw_probe_rand_us = -+ min_t(u32, (1 << 26) - 1, bbr_bw_probe_rand_us); -+ bbr->params.undo = bbr_undo; -+ bbr->params.fast_path = bbr_fast_path ? 1 : 0; -+ bbr->params.refill_add_inc = min_t(u32, 0x3U, bbr_refill_add_inc); -+ -+ /* BBR v2 state: */ -+ bbr->initialized = 1; -+ /* Start sampling ECN mark rate after first full flight is ACKed: */ -+ bbr->loss_round_delivered = tp->delivered + 1; -+ bbr->loss_round_start = 0; -+ bbr->undo_bw_lo = 0; -+ bbr->undo_inflight_lo = 0; -+ bbr->undo_inflight_hi = 0; -+ bbr->loss_events_in_round = 0; -+ bbr->startup_ecn_rounds = 0; -+ bbr2_reset_congestion_signals(sk); -+ bbr->bw_lo = ~0U; -+ bbr->bw_hi[0] = 0; -+ bbr->bw_hi[1] = 0; -+ bbr->inflight_lo = ~0U; -+ bbr->inflight_hi = ~0U; -+ bbr->bw_probe_up_cnt = ~0U; -+ bbr->bw_probe_up_acks = 0; -+ bbr->bw_probe_up_rounds = 0; -+ bbr->probe_wait_us = 0; -+ bbr->stopped_risky_probe = 0; -+ bbr->ack_phase = BBR_ACKS_INIT; -+ bbr->rounds_since_probe = 0; -+ bbr->bw_probe_samples = 0; -+ bbr->prev_probe_too_high = 0; -+ bbr->ecn_eligible = 0; -+ bbr->ecn_alpha = bbr->params.ecn_alpha_init; -+ bbr->alpha_last_delivered = 0; -+ bbr->alpha_last_delivered_ce = 0; -+ -+ tp->fast_ack_mode = min_t(u32, 0x2U, bbr_fast_ack_mode); -+ -+ if ((tp->ecn_flags & TCP_ECN_OK) && bbr_ecn_enable) -+ tp->ecn_flags |= TCP_ECN_ECT_PERMANENT; -+} -+ -+/* Core TCP stack informs us that the given skb was just marked lost. */ -+static void bbr2_skb_marked_lost(struct sock *sk, const struct sk_buff *skb) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ struct tcp_skb_cb *scb = TCP_SKB_CB(skb); -+ struct rate_sample rs; -+ -+ /* Capture "current" data over the full round trip of loss, -+ * to have a better chance to see the full capacity of the path. -+ */ -+ if (!bbr->loss_in_round) /* first loss in this round trip? */ -+ bbr->loss_round_delivered = tp->delivered; /* set round trip */ -+ bbr->loss_in_round = 1; -+ bbr->loss_in_cycle = 1; -+ -+ if (!bbr->bw_probe_samples) -+ return; /* not an skb sent while probing for bandwidth */ -+ if (unlikely(!scb->tx.delivered_mstamp)) -+ return; /* skb was SACKed, reneged, marked lost; ignore it */ -+ /* We are probing for bandwidth. Construct a rate sample that -+ * estimates what happened in the flight leading up to this lost skb, -+ * then see if the loss rate went too high, and if so at which packet. -+ */ -+ memset(&rs, 0, sizeof(rs)); -+ rs.tx_in_flight = scb->tx.in_flight; -+ rs.lost = tp->lost - scb->tx.lost; -+ rs.is_app_limited = scb->tx.is_app_limited; -+ if (bbr2_is_inflight_too_high(sk, &rs)) { -+ rs.tx_in_flight = bbr2_inflight_hi_from_lost_skb(sk, &rs, skb); -+ bbr2_handle_inflight_too_high(sk, &rs); -+ } -+} -+ -+/* Revert short-term model if current loss recovery event was spurious. */ -+static u32 bbr2_undo_cwnd(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->debug.undo = 1; -+ bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ -+ bbr->full_bw_cnt = 0; -+ bbr->loss_in_round = 0; -+ -+ if (!bbr->params.undo) -+ return tp->snd_cwnd; -+ -+ /* Revert to cwnd and other state saved before loss episode. */ -+ bbr->bw_lo = max(bbr->bw_lo, bbr->undo_bw_lo); -+ bbr->inflight_lo = max(bbr->inflight_lo, bbr->undo_inflight_lo); -+ bbr->inflight_hi = max(bbr->inflight_hi, bbr->undo_inflight_hi); -+ return bbr->prior_cwnd; -+} -+ -+/* Entering loss recovery, so save state for when we undo recovery. */ -+static u32 bbr2_ssthresh(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr_save_cwnd(sk); -+ /* For undo, save state that adapts based on loss signal. */ -+ bbr->undo_bw_lo = bbr->bw_lo; -+ bbr->undo_inflight_lo = bbr->inflight_lo; -+ bbr->undo_inflight_hi = bbr->inflight_hi; -+ return tcp_sk(sk)->snd_ssthresh; -+} -+ -+static enum tcp_bbr2_phase bbr2_get_phase(struct bbr *bbr) -+{ -+ switch (bbr->mode) { -+ case BBR_STARTUP: -+ return BBR2_PHASE_STARTUP; -+ case BBR_DRAIN: -+ return BBR2_PHASE_DRAIN; -+ case BBR_PROBE_BW: -+ break; -+ case BBR_PROBE_RTT: -+ return BBR2_PHASE_PROBE_RTT; -+ default: -+ return BBR2_PHASE_INVALID; -+ } -+ switch (bbr->cycle_idx) { -+ case BBR_BW_PROBE_UP: -+ return BBR2_PHASE_PROBE_BW_UP; -+ case BBR_BW_PROBE_DOWN: -+ return BBR2_PHASE_PROBE_BW_DOWN; -+ case BBR_BW_PROBE_CRUISE: -+ return BBR2_PHASE_PROBE_BW_CRUISE; -+ case BBR_BW_PROBE_REFILL: -+ return BBR2_PHASE_PROBE_BW_REFILL; -+ default: -+ return BBR2_PHASE_INVALID; -+ } -+} -+ -+static size_t bbr2_get_info(struct sock *sk, u32 ext, int *attr, -+ union tcp_cc_info *info) -+{ -+ if (ext & (1 << (INET_DIAG_BBRINFO - 1)) || -+ ext & (1 << (INET_DIAG_VEGASINFO - 1))) { -+ struct bbr *bbr = inet_csk_ca(sk); -+ u64 bw = bbr_bw_bytes_per_sec(sk, bbr_bw(sk)); -+ u64 bw_hi = bbr_bw_bytes_per_sec(sk, bbr_max_bw(sk)); -+ u64 bw_lo = bbr->bw_lo == ~0U ? -+ ~0ULL : bbr_bw_bytes_per_sec(sk, bbr->bw_lo); -+ -+ memset(&info->bbr2, 0, sizeof(info->bbr2)); -+ info->bbr2.bbr_bw_lsb = (u32)bw; -+ info->bbr2.bbr_bw_msb = (u32)(bw >> 32); -+ info->bbr2.bbr_min_rtt = bbr->min_rtt_us; -+ info->bbr2.bbr_pacing_gain = bbr->pacing_gain; -+ info->bbr2.bbr_cwnd_gain = bbr->cwnd_gain; -+ info->bbr2.bbr_bw_hi_lsb = (u32)bw_hi; -+ info->bbr2.bbr_bw_hi_msb = (u32)(bw_hi >> 32); -+ info->bbr2.bbr_bw_lo_lsb = (u32)bw_lo; -+ info->bbr2.bbr_bw_lo_msb = (u32)(bw_lo >> 32); -+ info->bbr2.bbr_mode = bbr->mode; -+ info->bbr2.bbr_phase = (__u8)bbr2_get_phase(bbr); -+ info->bbr2.bbr_version = (__u8)2; -+ info->bbr2.bbr_inflight_lo = bbr->inflight_lo; -+ info->bbr2.bbr_inflight_hi = bbr->inflight_hi; -+ info->bbr2.bbr_extra_acked = bbr_extra_acked(sk); -+ *attr = INET_DIAG_BBRINFO; -+ return sizeof(info->bbr2); -+ } -+ return 0; -+} -+ -+static void bbr2_set_state(struct sock *sk, u8 new_state) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (new_state == TCP_CA_Loss) { -+ struct rate_sample rs = { .losses = 1 }; -+ struct bbr_context ctx = { 0 }; -+ -+ bbr->prev_ca_state = TCP_CA_Loss; -+ bbr->full_bw = 0; -+ if (!bbr2_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) { -+ /* bbr_adapt_lower_bounds() needs cwnd before -+ * we suffered an RTO, to update inflight_lo: -+ */ -+ bbr->inflight_lo = -+ max(tp->snd_cwnd, bbr->prior_cwnd); -+ } -+ bbr_debug(sk, 0, &rs, &ctx); -+ } else if (bbr->prev_ca_state == TCP_CA_Loss && -+ new_state != TCP_CA_Loss) { -+ tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd); -+ bbr->try_fast_path = 0; /* bound cwnd using latest model */ -+ } -+} -+ -+static struct tcp_congestion_ops tcp_bbr2_cong_ops __read_mostly = { -+ .flags = TCP_CONG_NON_RESTRICTED | TCP_CONG_WANTS_CE_EVENTS, -+ .name = "bbr2", -+ .owner = THIS_MODULE, -+ .init = bbr2_init, -+ .cong_control = bbr2_main, -+ .sndbuf_expand = bbr_sndbuf_expand, -+ .skb_marked_lost = bbr2_skb_marked_lost, -+ .undo_cwnd = bbr2_undo_cwnd, -+ .cwnd_event = bbr_cwnd_event, -+ .ssthresh = bbr2_ssthresh, -+ .tso_segs = bbr_tso_segs, -+ .get_info = bbr2_get_info, -+ .set_state = bbr2_set_state, -+}; -+ -+static int __init bbr_register(void) -+{ -+ BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); -+ return tcp_register_congestion_control(&tcp_bbr2_cong_ops); -+} -+ -+static void __exit bbr_unregister(void) -+{ -+ tcp_unregister_congestion_control(&tcp_bbr2_cong_ops); -+} -+ -+module_init(bbr_register); -+module_exit(bbr_unregister); -+ -+MODULE_AUTHOR("Van Jacobson "); -+MODULE_AUTHOR("Neal Cardwell "); -+MODULE_AUTHOR("Yuchung Cheng "); -+MODULE_AUTHOR("Soheil Hassas Yeganeh "); -+MODULE_AUTHOR("Priyaranjan Jha "); -+MODULE_AUTHOR("Yousuk Seung "); -+MODULE_AUTHOR("Kevin Yang "); -+MODULE_AUTHOR("Arjun Roy "); -+ -+MODULE_LICENSE("Dual BSD/GPL"); -+MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); -diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c -index d3cae40749e8..0f268f2ff2e9 100644 ---- a/net/ipv4/tcp_cong.c -+++ b/net/ipv4/tcp_cong.c -@@ -189,6 +189,7 @@ void tcp_init_congestion_control(struct sock *sk) - struct inet_connection_sock *icsk = inet_csk(sk); - - tcp_sk(sk)->prior_ssthresh = 0; -+ tcp_sk(sk)->fast_ack_mode = 0; - if (icsk->icsk_ca_ops->init) - icsk->icsk_ca_ops->init(sk); - if (tcp_ca_needs_ecn(sk)) -diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index 0640453fce54..8a455eb0c552 100644 ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -349,7 +349,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) - tcp_enter_quickack_mode(sk, 2); - break; - case INET_ECN_CE: -- if (tcp_ca_needs_ecn(sk)) -+ if (tcp_ca_wants_ce_events(sk)) - tcp_ca_event(sk, CA_EVENT_ECN_IS_CE); - - if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { -@@ -360,7 +360,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) - tp->ecn_flags |= TCP_ECN_SEEN; - break; - default: -- if (tcp_ca_needs_ecn(sk)) -+ if (tcp_ca_wants_ce_events(sk)) - tcp_ca_event(sk, CA_EVENT_ECN_NO_CE); - tp->ecn_flags |= TCP_ECN_SEEN; - break; -@@ -1079,7 +1079,12 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) - */ - static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb) - { -+ struct sock *sk = (struct sock *)tp; -+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; -+ - tp->lost += tcp_skb_pcount(skb); -+ if (ca_ops->skb_marked_lost) -+ ca_ops->skb_marked_lost(sk, skb); - } - - void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) -@@ -1460,6 +1465,17 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, - WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); - tcp_skb_pcount_add(skb, -pcount); - -+ /* Adjust tx.in_flight as pcount is shifted from skb to prev. */ -+ if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount, -+ "prev in_flight: %u skb in_flight: %u pcount: %u", -+ TCP_SKB_CB(prev)->tx.in_flight, -+ TCP_SKB_CB(skb)->tx.in_flight, -+ pcount)) -+ TCP_SKB_CB(skb)->tx.in_flight = 0; -+ else -+ TCP_SKB_CB(skb)->tx.in_flight -= pcount; -+ TCP_SKB_CB(prev)->tx.in_flight += pcount; -+ - /* When we're adding to gso_segs == 1, gso_size will be zero, - * in theory this shouldn't be necessary but as long as DSACK - * code can come after this skb later on it's better to keep -@@ -3812,6 +3828,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) - - prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; - rs.prior_in_flight = tcp_packets_in_flight(tp); -+ tcp_rate_check_app_limited(sk); - - /* ts_recent update must be made after we are sure that the packet - * is in window. -@@ -3910,6 +3927,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) - delivered = tcp_newly_delivered(sk, delivered, flag); - lost = tp->lost - lost; /* freshly marked lost */ - rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); -+ rs.is_ece = !!(flag & FLAG_ECE); - tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); - tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); - tcp_xmit_recovery(sk, rexmit); -@@ -5509,13 +5527,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) - - /* More than one full frame received... */ - if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && -+ (tp->fast_ack_mode == 1 || - /* ... and right edge of window advances far enough. - * (tcp_recvmsg() will send ACK otherwise). - * If application uses SO_RCVLOWAT, we want send ack now if - * we have not received enough bytes to satisfy the condition. - */ -- (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || -- __tcp_select_window(sk) >= tp->rcv_wnd)) || -+ (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || -+ __tcp_select_window(sk) >= tp->rcv_wnd))) || - /* We ACK each frame or... */ - tcp_in_quickack_mode(sk) || - /* Protocol state mandates a one-time immediate ACK */ -diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c -index c69f4d966024..a9ceec2702b2 100644 ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -375,7 +375,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, - th->cwr = 1; - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - } -- } else if (!tcp_ca_needs_ecn(sk)) { -+ } else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) && -+ !tcp_ca_needs_ecn(sk)) { - /* ACK or retransmitted segment: clear ECT|CE */ - INET_ECN_dontxmit(sk); - } -@@ -1533,7 +1533,7 @@ - { - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *buff; -- int old_factor; -+ int old_factor, inflight_prev; - long limit; - int nlen; - u8 flags; -@@ -1610,6 +1611,15 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, - - if (diff) - tcp_adjust_pcount(sk, skb, diff); -+ -+ /* Set buff tx.in_flight as if buff were sent by itself. */ -+ inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor; -+ if (WARN_ONCE(inflight_prev < 0, -+ "inconsistent: tx.in_flight: %u old_factor: %d", -+ TCP_SKB_CB(skb)->tx.in_flight, old_factor)) -+ inflight_prev = 0; -+ TCP_SKB_CB(buff)->tx.in_flight = inflight_prev + -+ tcp_skb_pcount(buff); - } - - /* Link BUFF into the send queue. */ -@@ -1993,13 +2003,12 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, - static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) - { - const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; -- u32 min_tso, tso_segs; -- -- min_tso = ca_ops->min_tso_segs ? -- ca_ops->min_tso_segs(sk) : -- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); -+ u32 tso_segs; - -- tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); -+ tso_segs = ca_ops->tso_segs ? -+ ca_ops->tso_segs(sk, mss_now) : -+ tcp_tso_autosize(sk, mss_now, -+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs)); - return min_t(u32, tso_segs, sk->sk_gso_max_segs); - } - -@@ -2635,6 +2644,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); - list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); - tcp_init_tso_segs(skb, mss_now); -+ tcp_set_tx_in_flight(sk, skb); - goto repair; /* Skip network transmission */ - } - -diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c -index a8f6d9d06f2e..a8b4c9504570 100644 ---- a/net/ipv4/tcp_rate.c -+++ b/net/ipv4/tcp_rate.c -@@ -34,6 +34,24 @@ - * ready to send in the write queue. - */ - -+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ u32 in_flight; -+ -+ /* Check, sanitize, and record packets in flight after skb was sent. */ -+ in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb); -+ if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX, -+ "insane in_flight %u cc %s mss %u " -+ "cwnd %u pif %u %u %u %u\n", -+ in_flight, inet_csk(sk)->icsk_ca_ops->name, -+ tp->mss_cache, tp->snd_cwnd, -+ tp->packets_out, tp->retrans_out, -+ tp->sacked_out, tp->lost_out)) -+ in_flight = TCPCB_IN_FLIGHT_MAX; -+ TCP_SKB_CB(skb)->tx.in_flight = in_flight; -+} -+ - /* Snapshot the current delivery information in the skb, to generate - * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). - */ -@@ -66,7 +84,9 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) - TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; - TCP_SKB_CB(skb)->tx.delivered = tp->delivered; - TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; -+ TCP_SKB_CB(skb)->tx.lost = tp->lost; - TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; -+ tcp_set_tx_in_flight(sk, skb); - } - - /* When an skb is sacked or acked, we fill in the rate sample with the (prior) -@@ -91,18 +111,21 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, - if (!rs->prior_delivered || - tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp, - scb->end_seq, rs->last_end_seq)) { -+ rs->prior_lost = scb->tx.lost; - rs->prior_delivered_ce = scb->tx.delivered_ce; - rs->prior_delivered = scb->tx.delivered; - rs->prior_mstamp = scb->tx.delivered_mstamp; - rs->is_app_limited = scb->tx.is_app_limited; - rs->is_retrans = scb->sacked & TCPCB_RETRANS; - rs->last_end_seq = scb->end_seq; -+ rs->tx_in_flight = scb->tx.in_flight; - - /* Record send time of most recently ACKed packet: */ - tp->first_tx_mstamp = tx_tstamp; - /* Find the duration of the "send phase" of this window: */ -- rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, -- scb->tx.first_tx_mstamp); -+ rs->interval_us = tcp_stamp32_us_delta( -+ tp->first_tx_mstamp, -+ scb->tx.first_tx_mstamp); - - } - /* Mark off the skb delivered once it's sacked to avoid being -@@ -144,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - return; - } - rs->delivered = tp->delivered - rs->prior_delivered; -+ rs->lost = tp->lost - rs->prior_lost; - - rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce; - /* delivered_ce occupies less than 32 bits in the skb control block */ -@@ -155,7 +179,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - * longer phase. - */ - snd_us = rs->interval_us; /* send phase */ -- ack_us = tcp_stamp_us_delta(tp->tcp_mstamp, -+ ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp, - rs->prior_mstamp); /* ack phase */ - rs->interval_us = max(snd_us, ack_us); - -diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c -index cb79127f45c3..70e4de876a7f 100644 ---- a/net/ipv4/tcp_timer.c -+++ b/net/ipv4/tcp_timer.c -@@ -605,6 +605,7 @@ void tcp_write_timer_handler(struct sock *sk) - return; - } - -+ tcp_rate_check_app_limited(sk); - tcp_mstamp_refresh(tcp_sk(sk)); - event = icsk->icsk_pending; - ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -287,7 +287,7 @@ - icsk->icsk_ack.quick = quickacks; - } - -+void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) --static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) - { - struct inet_connection_sock *icsk = inet_csk(sk); - -@@ -295,6 +299,7 @@ - inet_csk_exit_pingpong_mode(sk); - icsk->icsk_ack.ato = TCP_ATO_MIN; - } -+EXPORT_SYMBOL(tcp_enter_quickack_mode); - - /* Send ACKs quickly, if "quick" count is not exhausted - * and the session is not interactive. ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -350,6 +350,7 @@ - struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp, - bool force_schedule); - -+void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks); - static inline void tcp_dec_quickack_mode(struct sock *sk) - { - struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/SOURCES/tkg-bcachefs.patch b/SOURCES/tkg-bcachefs.patch deleted file mode 100644 index aca2622..0000000 --- a/SOURCES/tkg-bcachefs.patch +++ /dev/null @@ -1,98955 +0,0 @@ -From 0a195912e89bd49e868e7e4879d137091f0762c8 Mon Sep 17 00:00:00 2001 -From: Piotr Gorski -Date: Wed, 8 Nov 2023 08:07:19 +0100 -Subject: [PATCH] bcachefs - -Signed-off-by: Piotr Gorski ---- - MAINTAINERS | 23 + - drivers/md/bcache/Kconfig | 10 +- - drivers/md/bcache/Makefile | 4 +- - drivers/md/bcache/bcache.h | 2 +- - drivers/md/bcache/super.c | 1 - - drivers/md/bcache/util.h | 3 +- - fs/Kconfig | 1 + - fs/Makefile | 1 + - fs/bcachefs/Kconfig | 83 + - fs/bcachefs/Makefile | 89 + - fs/bcachefs/acl.c | 463 +++ - fs/bcachefs/acl.h | 60 + - fs/bcachefs/alloc_background.c | 2159 +++++++++++ - fs/bcachefs/alloc_background.h | 259 ++ - fs/bcachefs/alloc_foreground.c | 1600 ++++++++ - fs/bcachefs/alloc_foreground.h | 224 ++ - fs/bcachefs/alloc_types.h | 126 + - fs/bcachefs/backpointers.c | 860 +++++ - fs/bcachefs/backpointers.h | 140 + - fs/bcachefs/bbpos.h | 37 + - fs/bcachefs/bbpos_types.h | 18 + - fs/bcachefs/bcachefs.h | 1161 ++++++ - fs/bcachefs/bcachefs_format.h | 2425 ++++++++++++ - fs/bcachefs/bcachefs_ioctl.h | 368 ++ - fs/bcachefs/bkey.c | 1120 ++++++ - fs/bcachefs/bkey.h | 778 ++++ - fs/bcachefs/bkey_buf.h | 61 + - fs/bcachefs/bkey_cmp.h | 129 + - fs/bcachefs/bkey_methods.c | 459 +++ - fs/bcachefs/bkey_methods.h | 179 + - fs/bcachefs/bkey_sort.c | 201 + - fs/bcachefs/bkey_sort.h | 54 + - fs/bcachefs/bset.c | 1592 ++++++++ - fs/bcachefs/bset.h | 541 +++ - fs/bcachefs/btree_cache.c | 1215 ++++++ - fs/bcachefs/btree_cache.h | 131 + - fs/bcachefs/btree_gc.c | 2145 +++++++++++ - fs/bcachefs/btree_gc.h | 114 + - fs/bcachefs/btree_io.c | 2298 ++++++++++++ - fs/bcachefs/btree_io.h | 228 ++ - fs/bcachefs/btree_iter.c | 3242 +++++++++++++++++ - fs/bcachefs/btree_iter.h | 943 +++++ - fs/bcachefs/btree_journal_iter.c | 531 +++ - fs/bcachefs/btree_journal_iter.h | 57 + - fs/bcachefs/btree_key_cache.c | 1072 ++++++ - fs/bcachefs/btree_key_cache.h | 48 + - fs/bcachefs/btree_locking.c | 817 +++++ - fs/bcachefs/btree_locking.h | 433 +++ - fs/bcachefs/btree_trans_commit.c | 1145 ++++++ - fs/bcachefs/btree_types.h | 756 ++++ - fs/bcachefs/btree_update.c | 933 +++++ - fs/bcachefs/btree_update.h | 340 ++ - fs/bcachefs/btree_update_interior.c | 2474 +++++++++++++ - fs/bcachefs/btree_update_interior.h | 337 ++ - fs/bcachefs/btree_write_buffer.c | 375 ++ - fs/bcachefs/btree_write_buffer.h | 14 + - fs/bcachefs/btree_write_buffer_types.h | 44 + - fs/bcachefs/buckets.c | 2168 +++++++++++ - fs/bcachefs/buckets.h | 458 +++ - fs/bcachefs/buckets_types.h | 92 + - fs/bcachefs/buckets_waiting_for_journal.c | 166 + - fs/bcachefs/buckets_waiting_for_journal.h | 15 + - .../buckets_waiting_for_journal_types.h | 23 + - fs/bcachefs/chardev.c | 784 ++++ - fs/bcachefs/chardev.h | 31 + - fs/bcachefs/checksum.c | 804 ++++ - fs/bcachefs/checksum.h | 213 ++ - fs/bcachefs/clock.c | 193 + - fs/bcachefs/clock.h | 38 + - fs/bcachefs/clock_types.h | 37 + - fs/bcachefs/compress.c | 728 ++++ - fs/bcachefs/compress.h | 73 + - fs/bcachefs/counters.c | 107 + - fs/bcachefs/counters.h | 17 + - fs/bcachefs/darray.h | 93 + - fs/bcachefs/data_update.c | 551 +++ - fs/bcachefs/data_update.h | 44 + - fs/bcachefs/debug.c | 954 +++++ - fs/bcachefs/debug.h | 32 + - fs/bcachefs/dirent.c | 577 +++ - fs/bcachefs/dirent.h | 70 + - fs/bcachefs/disk_groups.c | 620 ++++ - fs/bcachefs/disk_groups.h | 111 + - fs/bcachefs/disk_groups_types.h | 18 + - fs/bcachefs/ec.c | 1969 ++++++++++ - fs/bcachefs/ec.h | 260 ++ - fs/bcachefs/ec_types.h | 41 + - fs/bcachefs/errcode.c | 68 + - fs/bcachefs/errcode.h | 269 ++ - fs/bcachefs/error.c | 299 ++ - fs/bcachefs/error.h | 242 ++ - fs/bcachefs/extent_update.c | 173 + - fs/bcachefs/extent_update.h | 12 + - fs/bcachefs/extents.c | 1516 ++++++++ - fs/bcachefs/extents.h | 765 ++++ - fs/bcachefs/extents_types.h | 40 + - fs/bcachefs/eytzinger.h | 281 ++ - fs/bcachefs/fifo.h | 127 + - fs/bcachefs/fs-common.c | 501 +++ - fs/bcachefs/fs-common.h | 43 + - fs/bcachefs/fs-io-buffered.c | 1106 ++++++ - fs/bcachefs/fs-io-buffered.h | 27 + - fs/bcachefs/fs-io-direct.c | 680 ++++ - fs/bcachefs/fs-io-direct.h | 16 + - fs/bcachefs/fs-io-pagecache.c | 791 ++++ - fs/bcachefs/fs-io-pagecache.h | 176 + - fs/bcachefs/fs-io.c | 1072 ++++++ - fs/bcachefs/fs-io.h | 184 + - fs/bcachefs/fs-ioctl.c | 572 +++ - fs/bcachefs/fs-ioctl.h | 81 + - fs/bcachefs/fs.c | 1977 ++++++++++ - fs/bcachefs/fs.h | 209 ++ - fs/bcachefs/fsck.c | 2490 +++++++++++++ - fs/bcachefs/fsck.h | 15 + - fs/bcachefs/inode.c | 1198 ++++++ - fs/bcachefs/inode.h | 217 ++ - fs/bcachefs/io_misc.c | 524 +++ - fs/bcachefs/io_misc.h | 34 + - fs/bcachefs/io_read.c | 1210 ++++++ - fs/bcachefs/io_read.h | 158 + - fs/bcachefs/io_write.c | 1675 +++++++++ - fs/bcachefs/io_write.h | 110 + - fs/bcachefs/io_write_types.h | 96 + - fs/bcachefs/journal.c | 1468 ++++++++ - fs/bcachefs/journal.h | 549 +++ - fs/bcachefs/journal_io.c | 1947 ++++++++++ - fs/bcachefs/journal_io.h | 65 + - fs/bcachefs/journal_reclaim.c | 876 +++++ - fs/bcachefs/journal_reclaim.h | 87 + - fs/bcachefs/journal_sb.c | 219 ++ - fs/bcachefs/journal_sb.h | 24 + - fs/bcachefs/journal_seq_blacklist.c | 320 ++ - fs/bcachefs/journal_seq_blacklist.h | 22 + - fs/bcachefs/journal_types.h | 345 ++ - fs/bcachefs/keylist.c | 52 + - fs/bcachefs/keylist.h | 74 + - fs/bcachefs/keylist_types.h | 16 + - fs/bcachefs/logged_ops.c | 112 + - fs/bcachefs/logged_ops.h | 20 + - fs/bcachefs/lru.c | 164 + - fs/bcachefs/lru.h | 69 + - fs/bcachefs/mean_and_variance.c | 159 + - fs/bcachefs/mean_and_variance.h | 198 + - fs/bcachefs/mean_and_variance_test.c | 240 ++ - fs/bcachefs/migrate.c | 179 + - fs/bcachefs/migrate.h | 7 + - fs/bcachefs/move.c | 1198 ++++++ - fs/bcachefs/move.h | 139 + - fs/bcachefs/move_types.h | 36 + - fs/bcachefs/movinggc.c | 431 +++ - fs/bcachefs/movinggc.h | 12 + - fs/bcachefs/nocow_locking.c | 144 + - fs/bcachefs/nocow_locking.h | 50 + - fs/bcachefs/nocow_locking_types.h | 20 + - fs/bcachefs/opts.c | 602 +++ - fs/bcachefs/opts.h | 564 +++ - fs/bcachefs/printbuf.c | 425 +++ - fs/bcachefs/printbuf.h | 284 ++ - fs/bcachefs/quota.c | 979 +++++ - fs/bcachefs/quota.h | 74 + - fs/bcachefs/quota_types.h | 43 + - fs/bcachefs/rebalance.c | 464 +++ - fs/bcachefs/rebalance.h | 27 + - fs/bcachefs/rebalance_types.h | 37 + - fs/bcachefs/recovery.c | 1057 ++++++ - fs/bcachefs/recovery.h | 33 + - fs/bcachefs/recovery_types.h | 53 + - fs/bcachefs/reflink.c | 406 +++ - fs/bcachefs/reflink.h | 81 + - fs/bcachefs/replicas.c | 1050 ++++++ - fs/bcachefs/replicas.h | 91 + - fs/bcachefs/replicas_types.h | 27 + - fs/bcachefs/sb-clean.c | 398 ++ - fs/bcachefs/sb-clean.h | 16 + - fs/bcachefs/sb-errors.c | 172 + - fs/bcachefs/sb-errors.h | 270 ++ - fs/bcachefs/sb-errors_types.h | 16 + - fs/bcachefs/sb-members.c | 420 +++ - fs/bcachefs/sb-members.h | 227 ++ - fs/bcachefs/seqmutex.h | 48 + - fs/bcachefs/siphash.c | 173 + - fs/bcachefs/siphash.h | 87 + - fs/bcachefs/six.c | 917 +++++ - fs/bcachefs/six.h | 393 ++ - fs/bcachefs/snapshot.c | 1713 +++++++++ - fs/bcachefs/snapshot.h | 268 ++ - fs/bcachefs/str_hash.h | 370 ++ - fs/bcachefs/subvolume.c | 437 +++ - fs/bcachefs/subvolume.h | 35 + - fs/bcachefs/subvolume_types.h | 31 + - fs/bcachefs/super-io.c | 1266 +++++++ - fs/bcachefs/super-io.h | 94 + - fs/bcachefs/super.c | 2017 ++++++++++ - fs/bcachefs/super.h | 52 + - fs/bcachefs/super_types.h | 40 + - fs/bcachefs/sysfs.c | 1034 ++++++ - fs/bcachefs/sysfs.h | 48 + - fs/bcachefs/tests.c | 919 +++++ - fs/bcachefs/tests.h | 15 + - fs/bcachefs/trace.c | 17 + - fs/bcachefs/trace.h | 1334 +++++++ - fs/bcachefs/two_state_shared_lock.c | 8 + - fs/bcachefs/two_state_shared_lock.h | 59 + - fs/bcachefs/util.c | 1159 ++++++ - fs/bcachefs/util.h | 833 +++++ - fs/bcachefs/varint.c | 129 + - fs/bcachefs/varint.h | 11 + - fs/bcachefs/vstructs.h | 63 + - fs/bcachefs/xattr.c | 643 ++++ - fs/bcachefs/xattr.h | 50 + - fs/dcache.c | 12 +- - .../md/bcache => include/linux}/closure.h | 56 +- - include/linux/dcache.h | 1 + - include/linux/exportfs.h | 6 + - include/linux/generic-radix-tree.h | 68 +- - include/linux/sched.h | 1 + - include/linux/string_helpers.h | 4 +- - init/init_task.c | 1 + - kernel/locking/mutex.c | 3 + - kernel/stacktrace.c | 2 + - lib/Kconfig | 3 + - lib/Kconfig.debug | 9 + - lib/Makefile | 2 + - {drivers/md/bcache => lib}/closure.c | 45 +- - lib/errname.c | 1 + - lib/generic-radix-tree.c | 76 +- - lib/string_helpers.c | 10 +- - tools/objtool/noreturns.h | 2 + - 228 files changed, 96727 insertions(+), 60 deletions(-) - create mode 100644 fs/bcachefs/Kconfig - create mode 100644 fs/bcachefs/Makefile - create mode 100644 fs/bcachefs/acl.c - create mode 100644 fs/bcachefs/acl.h - create mode 100644 fs/bcachefs/alloc_background.c - create mode 100644 fs/bcachefs/alloc_background.h - create mode 100644 fs/bcachefs/alloc_foreground.c - create mode 100644 fs/bcachefs/alloc_foreground.h - create mode 100644 fs/bcachefs/alloc_types.h - create mode 100644 fs/bcachefs/backpointers.c - create mode 100644 fs/bcachefs/backpointers.h - create mode 100644 fs/bcachefs/bbpos.h - create mode 100644 fs/bcachefs/bbpos_types.h - create mode 100644 fs/bcachefs/bcachefs.h - create mode 100644 fs/bcachefs/bcachefs_format.h - create mode 100644 fs/bcachefs/bcachefs_ioctl.h - create mode 100644 fs/bcachefs/bkey.c - create mode 100644 fs/bcachefs/bkey.h - create mode 100644 fs/bcachefs/bkey_buf.h - create mode 100644 fs/bcachefs/bkey_cmp.h - create mode 100644 fs/bcachefs/bkey_methods.c - create mode 100644 fs/bcachefs/bkey_methods.h - create mode 100644 fs/bcachefs/bkey_sort.c - create mode 100644 fs/bcachefs/bkey_sort.h - create mode 100644 fs/bcachefs/bset.c - create mode 100644 fs/bcachefs/bset.h - create mode 100644 fs/bcachefs/btree_cache.c - create mode 100644 fs/bcachefs/btree_cache.h - create mode 100644 fs/bcachefs/btree_gc.c - create mode 100644 fs/bcachefs/btree_gc.h - create mode 100644 fs/bcachefs/btree_io.c - create mode 100644 fs/bcachefs/btree_io.h - create mode 100644 fs/bcachefs/btree_iter.c - create mode 100644 fs/bcachefs/btree_iter.h - create mode 100644 fs/bcachefs/btree_journal_iter.c - create mode 100644 fs/bcachefs/btree_journal_iter.h - create mode 100644 fs/bcachefs/btree_key_cache.c - create mode 100644 fs/bcachefs/btree_key_cache.h - create mode 100644 fs/bcachefs/btree_locking.c - create mode 100644 fs/bcachefs/btree_locking.h - create mode 100644 fs/bcachefs/btree_trans_commit.c - create mode 100644 fs/bcachefs/btree_types.h - create mode 100644 fs/bcachefs/btree_update.c - create mode 100644 fs/bcachefs/btree_update.h - create mode 100644 fs/bcachefs/btree_update_interior.c - create mode 100644 fs/bcachefs/btree_update_interior.h - create mode 100644 fs/bcachefs/btree_write_buffer.c - create mode 100644 fs/bcachefs/btree_write_buffer.h - create mode 100644 fs/bcachefs/btree_write_buffer_types.h - create mode 100644 fs/bcachefs/buckets.c - create mode 100644 fs/bcachefs/buckets.h - create mode 100644 fs/bcachefs/buckets_types.h - create mode 100644 fs/bcachefs/buckets_waiting_for_journal.c - create mode 100644 fs/bcachefs/buckets_waiting_for_journal.h - create mode 100644 fs/bcachefs/buckets_waiting_for_journal_types.h - create mode 100644 fs/bcachefs/chardev.c - create mode 100644 fs/bcachefs/chardev.h - create mode 100644 fs/bcachefs/checksum.c - create mode 100644 fs/bcachefs/checksum.h - create mode 100644 fs/bcachefs/clock.c - create mode 100644 fs/bcachefs/clock.h - create mode 100644 fs/bcachefs/clock_types.h - create mode 100644 fs/bcachefs/compress.c - create mode 100644 fs/bcachefs/compress.h - create mode 100644 fs/bcachefs/counters.c - create mode 100644 fs/bcachefs/counters.h - create mode 100644 fs/bcachefs/darray.h - create mode 100644 fs/bcachefs/data_update.c - create mode 100644 fs/bcachefs/data_update.h - create mode 100644 fs/bcachefs/debug.c - create mode 100644 fs/bcachefs/debug.h - create mode 100644 fs/bcachefs/dirent.c - create mode 100644 fs/bcachefs/dirent.h - create mode 100644 fs/bcachefs/disk_groups.c - create mode 100644 fs/bcachefs/disk_groups.h - create mode 100644 fs/bcachefs/disk_groups_types.h - create mode 100644 fs/bcachefs/ec.c - create mode 100644 fs/bcachefs/ec.h - create mode 100644 fs/bcachefs/ec_types.h - create mode 100644 fs/bcachefs/errcode.c - create mode 100644 fs/bcachefs/errcode.h - create mode 100644 fs/bcachefs/error.c - create mode 100644 fs/bcachefs/error.h - create mode 100644 fs/bcachefs/extent_update.c - create mode 100644 fs/bcachefs/extent_update.h - create mode 100644 fs/bcachefs/extents.c - create mode 100644 fs/bcachefs/extents.h - create mode 100644 fs/bcachefs/extents_types.h - create mode 100644 fs/bcachefs/eytzinger.h - create mode 100644 fs/bcachefs/fifo.h - create mode 100644 fs/bcachefs/fs-common.c - create mode 100644 fs/bcachefs/fs-common.h - create mode 100644 fs/bcachefs/fs-io-buffered.c - create mode 100644 fs/bcachefs/fs-io-buffered.h - create mode 100644 fs/bcachefs/fs-io-direct.c - create mode 100644 fs/bcachefs/fs-io-direct.h - create mode 100644 fs/bcachefs/fs-io-pagecache.c - create mode 100644 fs/bcachefs/fs-io-pagecache.h - create mode 100644 fs/bcachefs/fs-io.c - create mode 100644 fs/bcachefs/fs-io.h - create mode 100644 fs/bcachefs/fs-ioctl.c - create mode 100644 fs/bcachefs/fs-ioctl.h - create mode 100644 fs/bcachefs/fs.c - create mode 100644 fs/bcachefs/fs.h - create mode 100644 fs/bcachefs/fsck.c - create mode 100644 fs/bcachefs/fsck.h - create mode 100644 fs/bcachefs/inode.c - create mode 100644 fs/bcachefs/inode.h - create mode 100644 fs/bcachefs/io_misc.c - create mode 100644 fs/bcachefs/io_misc.h - create mode 100644 fs/bcachefs/io_read.c - create mode 100644 fs/bcachefs/io_read.h - create mode 100644 fs/bcachefs/io_write.c - create mode 100644 fs/bcachefs/io_write.h - create mode 100644 fs/bcachefs/io_write_types.h - create mode 100644 fs/bcachefs/journal.c - create mode 100644 fs/bcachefs/journal.h - create mode 100644 fs/bcachefs/journal_io.c - create mode 100644 fs/bcachefs/journal_io.h - create mode 100644 fs/bcachefs/journal_reclaim.c - create mode 100644 fs/bcachefs/journal_reclaim.h - create mode 100644 fs/bcachefs/journal_sb.c - create mode 100644 fs/bcachefs/journal_sb.h - create mode 100644 fs/bcachefs/journal_seq_blacklist.c - create mode 100644 fs/bcachefs/journal_seq_blacklist.h - create mode 100644 fs/bcachefs/journal_types.h - create mode 100644 fs/bcachefs/keylist.c - create mode 100644 fs/bcachefs/keylist.h - create mode 100644 fs/bcachefs/keylist_types.h - create mode 100644 fs/bcachefs/logged_ops.c - create mode 100644 fs/bcachefs/logged_ops.h - create mode 100644 fs/bcachefs/lru.c - create mode 100644 fs/bcachefs/lru.h - create mode 100644 fs/bcachefs/mean_and_variance.c - create mode 100644 fs/bcachefs/mean_and_variance.h - create mode 100644 fs/bcachefs/mean_and_variance_test.c - create mode 100644 fs/bcachefs/migrate.c - create mode 100644 fs/bcachefs/migrate.h - create mode 100644 fs/bcachefs/move.c - create mode 100644 fs/bcachefs/move.h - create mode 100644 fs/bcachefs/move_types.h - create mode 100644 fs/bcachefs/movinggc.c - create mode 100644 fs/bcachefs/movinggc.h - create mode 100644 fs/bcachefs/nocow_locking.c - create mode 100644 fs/bcachefs/nocow_locking.h - create mode 100644 fs/bcachefs/nocow_locking_types.h - create mode 100644 fs/bcachefs/opts.c - create mode 100644 fs/bcachefs/opts.h - create mode 100644 fs/bcachefs/printbuf.c - create mode 100644 fs/bcachefs/printbuf.h - create mode 100644 fs/bcachefs/quota.c - create mode 100644 fs/bcachefs/quota.h - create mode 100644 fs/bcachefs/quota_types.h - create mode 100644 fs/bcachefs/rebalance.c - create mode 100644 fs/bcachefs/rebalance.h - create mode 100644 fs/bcachefs/rebalance_types.h - create mode 100644 fs/bcachefs/recovery.c - create mode 100644 fs/bcachefs/recovery.h - create mode 100644 fs/bcachefs/recovery_types.h - create mode 100644 fs/bcachefs/reflink.c - create mode 100644 fs/bcachefs/reflink.h - create mode 100644 fs/bcachefs/replicas.c - create mode 100644 fs/bcachefs/replicas.h - create mode 100644 fs/bcachefs/replicas_types.h - create mode 100644 fs/bcachefs/sb-clean.c - create mode 100644 fs/bcachefs/sb-clean.h - create mode 100644 fs/bcachefs/sb-errors.c - create mode 100644 fs/bcachefs/sb-errors.h - create mode 100644 fs/bcachefs/sb-errors_types.h - create mode 100644 fs/bcachefs/sb-members.c - create mode 100644 fs/bcachefs/sb-members.h - create mode 100644 fs/bcachefs/seqmutex.h - create mode 100644 fs/bcachefs/siphash.c - create mode 100644 fs/bcachefs/siphash.h - create mode 100644 fs/bcachefs/six.c - create mode 100644 fs/bcachefs/six.h - create mode 100644 fs/bcachefs/snapshot.c - create mode 100644 fs/bcachefs/snapshot.h - create mode 100644 fs/bcachefs/str_hash.h - create mode 100644 fs/bcachefs/subvolume.c - create mode 100644 fs/bcachefs/subvolume.h - create mode 100644 fs/bcachefs/subvolume_types.h - create mode 100644 fs/bcachefs/super-io.c - create mode 100644 fs/bcachefs/super-io.h - create mode 100644 fs/bcachefs/super.c - create mode 100644 fs/bcachefs/super.h - create mode 100644 fs/bcachefs/super_types.h - create mode 100644 fs/bcachefs/sysfs.c - create mode 100644 fs/bcachefs/sysfs.h - create mode 100644 fs/bcachefs/tests.c - create mode 100644 fs/bcachefs/tests.h - create mode 100644 fs/bcachefs/trace.c - create mode 100644 fs/bcachefs/trace.h - create mode 100644 fs/bcachefs/two_state_shared_lock.c - create mode 100644 fs/bcachefs/two_state_shared_lock.h - create mode 100644 fs/bcachefs/util.c - create mode 100644 fs/bcachefs/util.h - create mode 100644 fs/bcachefs/varint.c - create mode 100644 fs/bcachefs/varint.h - create mode 100644 fs/bcachefs/vstructs.h - create mode 100644 fs/bcachefs/xattr.c - create mode 100644 fs/bcachefs/xattr.h - rename {drivers/md/bcache => include/linux}/closure.h (91%) - rename {drivers/md/bcache => lib}/closure.c (83%) - -diff --git a/MAINTAINERS b/MAINTAINERS -index 2894f0777537..ce1c7073f40c 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -3482,6 +3482,14 @@ W: http://bcache.evilpiepirate.org - C: irc://irc.oftc.net/bcache - F: drivers/md/bcache/ - -+BCACHEFS -+M: Kent Overstreet -+R: Brian Foster -+L: linux-bcachefs@vger.kernel.org -+S: Supported -+C: irc://irc.oftc.net/bcache -+F: fs/bcachefs/ -+ - BDISP ST MEDIA DRIVER - M: Fabien Dessenne - L: linux-media@vger.kernel.org -@@ -5068,6 +5076,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core - F: Documentation/devicetree/bindings/timer/ - F: drivers/clocksource/ - -+CLOSURES -+M: Kent Overstreet -+L: linux-bcachefs@vger.kernel.org -+S: Supported -+C: irc://irc.oftc.net/bcache -+F: include/linux/closure.h -+F: lib/closure.c -+ - CMPC ACPI DRIVER - M: Thadeu Lima de Souza Cascardo - M: Daniel Oliveira Nascimento -@@ -8748,6 +8764,13 @@ S: Supported - T: git git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm.git - F: drivers/pmdomain/ - -+GENERIC RADIX TREE -+M: Kent Overstreet -+S: Supported -+C: irc://irc.oftc.net/bcache -+F: include/linux/generic-radix-tree.h -+F: lib/generic-radix-tree.c -+ - GENERIC RESISTIVE TOUCHSCREEN ADC DRIVER - M: Eugen Hristev - L: linux-input@vger.kernel.org -diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig -index 529c9d04e9a4..b2d10063d35f 100644 ---- a/drivers/md/bcache/Kconfig -+++ b/drivers/md/bcache/Kconfig -@@ -4,6 +4,7 @@ config BCACHE - tristate "Block device as cache" - select BLOCK_HOLDER_DEPRECATED if SYSFS - select CRC64 -+ select CLOSURES - help - Allows a block device to be used as cache for other devices; uses - a btree for indexing and the layout is optimized for SSDs. -@@ -19,15 +20,6 @@ config BCACHE_DEBUG - Enables extra debugging tools, allows expensive runtime checks to be - turned on. - --config BCACHE_CLOSURES_DEBUG -- bool "Debug closures" -- depends on BCACHE -- select DEBUG_FS -- help -- Keeps all active closures in a linked list and provides a debugfs -- interface to list them, which makes it possible to see asynchronous -- operations that get stuck. -- - config BCACHE_ASYNC_REGISTRATION - bool "Asynchronous device registration" - depends on BCACHE -diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile -index 5b87e59676b8..054e8a33a7ab 100644 ---- a/drivers/md/bcache/Makefile -+++ b/drivers/md/bcache/Makefile -@@ -2,6 +2,6 @@ - - obj-$(CONFIG_BCACHE) += bcache.o - --bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\ -- io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\ -+bcache-y := alloc.o bset.o btree.o debug.o extents.o io.o\ -+ journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\ - util.o writeback.o features.o -diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h -index 5a79bb3c272f..7c0d00432612 100644 ---- a/drivers/md/bcache/bcache.h -+++ b/drivers/md/bcache/bcache.h -@@ -179,6 +179,7 @@ - #define pr_fmt(fmt) "bcache: %s() " fmt, __func__ - - #include -+#include - #include - #include - #include -@@ -192,7 +193,6 @@ - #include "bcache_ondisk.h" - #include "bset.h" - #include "util.h" --#include "closure.h" - - struct bucket { - atomic_t pin; -diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c -index 0ae2b3676293..4affe587586c 100644 ---- a/drivers/md/bcache/super.c -+++ b/drivers/md/bcache/super.c -@@ -2905,7 +2905,6 @@ static int __init bcache_init(void) - goto err; - - bch_debug_init(); -- closure_debug_init(); - - bcache_is_reboot = false; - -diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h -index 6f3cb7c92130..f61ab1bada6c 100644 ---- a/drivers/md/bcache/util.h -+++ b/drivers/md/bcache/util.h -@@ -4,6 +4,7 @@ - #define _BCACHE_UTIL_H - - #include -+#include - #include - #include - #include -@@ -13,8 +14,6 @@ - #include - #include - --#include "closure.h" -- - struct closure; - - #ifdef CONFIG_BCACHE_DEBUG -diff --git a/fs/Kconfig b/fs/Kconfig -index aa7e03cc1941..0d6cb927872a 100644 ---- a/fs/Kconfig -+++ b/fs/Kconfig -@@ -48,6 +48,7 @@ source "fs/ocfs2/Kconfig" - source "fs/btrfs/Kconfig" - source "fs/nilfs2/Kconfig" - source "fs/f2fs/Kconfig" -+source "fs/bcachefs/Kconfig" - source "fs/zonefs/Kconfig" - - endif # BLOCK -diff --git a/fs/Makefile b/fs/Makefile -index f9541f40be4e..75522f88e763 100644 ---- a/fs/Makefile -+++ b/fs/Makefile -@@ -123,6 +123,7 @@ obj-$(CONFIG_OCFS2_FS) += ocfs2/ - obj-$(CONFIG_BTRFS_FS) += btrfs/ - obj-$(CONFIG_GFS2_FS) += gfs2/ - obj-$(CONFIG_F2FS_FS) += f2fs/ -+obj-$(CONFIG_BCACHEFS_FS) += bcachefs/ - obj-$(CONFIG_CEPH_FS) += ceph/ - obj-$(CONFIG_PSTORE) += pstore/ - obj-$(CONFIG_EFIVAR_FS) += efivarfs/ -diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig -new file mode 100644 -index 000000000000..c08c2c7d6fbb ---- /dev/null -+++ b/fs/bcachefs/Kconfig -@@ -0,0 +1,83 @@ -+ -+config BCACHEFS_FS -+ tristate "bcachefs filesystem support (EXPERIMENTAL)" -+ depends on BLOCK -+ select EXPORTFS -+ select CLOSURES -+ select LIBCRC32C -+ select CRC64 -+ select FS_POSIX_ACL -+ select LZ4_COMPRESS -+ select LZ4_DECOMPRESS -+ select LZ4HC_COMPRESS -+ select LZ4HC_DECOMPRESS -+ select ZLIB_DEFLATE -+ select ZLIB_INFLATE -+ select ZSTD_COMPRESS -+ select ZSTD_DECOMPRESS -+ select CRYPTO_SHA256 -+ select CRYPTO_CHACHA20 -+ select CRYPTO_POLY1305 -+ select KEYS -+ select RAID6_PQ -+ select XOR_BLOCKS -+ select XXHASH -+ select SRCU -+ select SYMBOLIC_ERRNAME -+ help -+ The bcachefs filesystem - a modern, copy on write filesystem, with -+ support for multiple devices, compression, checksumming, etc. -+ -+config BCACHEFS_QUOTA -+ bool "bcachefs quota support" -+ depends on BCACHEFS_FS -+ select QUOTACTL -+ -+config BCACHEFS_POSIX_ACL -+ bool "bcachefs POSIX ACL support" -+ depends on BCACHEFS_FS -+ select FS_POSIX_ACL -+ -+config BCACHEFS_DEBUG_TRANSACTIONS -+ bool "bcachefs runtime info" -+ depends on BCACHEFS_FS -+ help -+ This makes the list of running btree transactions available in debugfs. -+ -+ This is a highly useful debugging feature but does add a small amount of overhead. -+ -+config BCACHEFS_DEBUG -+ bool "bcachefs debugging" -+ depends on BCACHEFS_FS -+ help -+ Enables many extra debugging checks and assertions. -+ -+ The resulting code will be significantly slower than normal; you -+ probably shouldn't select this option unless you're a developer. -+ -+config BCACHEFS_TESTS -+ bool "bcachefs unit and performance tests" -+ depends on BCACHEFS_FS -+ help -+ Include some unit and performance tests for the core btree code -+ -+config BCACHEFS_LOCK_TIME_STATS -+ bool "bcachefs lock time statistics" -+ depends on BCACHEFS_FS -+ help -+ Expose statistics for how long we held a lock in debugfs -+ -+config BCACHEFS_NO_LATENCY_ACCT -+ bool "disable latency accounting and time stats" -+ depends on BCACHEFS_FS -+ help -+ This disables device latency tracking and time stats, only for performance testing -+ -+config MEAN_AND_VARIANCE_UNIT_TEST -+ tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS -+ depends on KUNIT -+ depends on BCACHEFS_FS -+ default KUNIT_ALL_TESTS -+ help -+ This option enables the kunit tests for mean_and_variance module. -+ If unsure, say N. -diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile -new file mode 100644 -index 000000000000..45b64f89258c ---- /dev/null -+++ b/fs/bcachefs/Makefile -@@ -0,0 +1,89 @@ -+ -+obj-$(CONFIG_BCACHEFS_FS) += bcachefs.o -+ -+bcachefs-y := \ -+ acl.o \ -+ alloc_background.o \ -+ alloc_foreground.o \ -+ backpointers.o \ -+ bkey.o \ -+ bkey_methods.o \ -+ bkey_sort.o \ -+ bset.o \ -+ btree_cache.o \ -+ btree_gc.o \ -+ btree_io.o \ -+ btree_iter.o \ -+ btree_journal_iter.o \ -+ btree_key_cache.o \ -+ btree_locking.o \ -+ btree_trans_commit.o \ -+ btree_update.o \ -+ btree_update_interior.o \ -+ btree_write_buffer.o \ -+ buckets.o \ -+ buckets_waiting_for_journal.o \ -+ chardev.o \ -+ checksum.o \ -+ clock.o \ -+ compress.o \ -+ counters.o \ -+ debug.o \ -+ dirent.o \ -+ disk_groups.o \ -+ data_update.o \ -+ ec.o \ -+ errcode.o \ -+ error.o \ -+ extents.o \ -+ extent_update.o \ -+ fs.o \ -+ fs-common.o \ -+ fs-ioctl.o \ -+ fs-io.o \ -+ fs-io-buffered.o \ -+ fs-io-direct.o \ -+ fs-io-pagecache.o \ -+ fsck.o \ -+ inode.o \ -+ io_read.o \ -+ io_misc.o \ -+ io_write.o \ -+ journal.o \ -+ journal_io.o \ -+ journal_reclaim.o \ -+ journal_sb.o \ -+ journal_seq_blacklist.o \ -+ keylist.o \ -+ logged_ops.o \ -+ lru.o \ -+ mean_and_variance.o \ -+ migrate.o \ -+ move.o \ -+ movinggc.o \ -+ nocow_locking.o \ -+ opts.o \ -+ printbuf.o \ -+ quota.o \ -+ rebalance.o \ -+ recovery.o \ -+ reflink.o \ -+ replicas.o \ -+ sb-clean.o \ -+ sb-errors.o \ -+ sb-members.o \ -+ siphash.o \ -+ six.o \ -+ snapshot.o \ -+ subvolume.o \ -+ super.o \ -+ super-io.o \ -+ sysfs.o \ -+ tests.o \ -+ trace.o \ -+ two_state_shared_lock.o \ -+ util.o \ -+ varint.o \ -+ xattr.o -+ -+obj-$(CONFIG_MEAN_AND_VARIANCE_UNIT_TEST) += mean_and_variance_test.o -diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c -new file mode 100644 -index 000000000000..f3809897f00a ---- /dev/null -+++ b/fs/bcachefs/acl.c -@@ -0,0 +1,463 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+ -+#include "acl.h" -+#include "xattr.h" -+ -+#include -+ -+static const char * const acl_types[] = { -+ [ACL_USER_OBJ] = "user_obj", -+ [ACL_USER] = "user", -+ [ACL_GROUP_OBJ] = "group_obj", -+ [ACL_GROUP] = "group", -+ [ACL_MASK] = "mask", -+ [ACL_OTHER] = "other", -+ NULL, -+}; -+ -+void bch2_acl_to_text(struct printbuf *out, const void *value, size_t size) -+{ -+ const void *p, *end = value + size; -+ -+ if (!value || -+ size < sizeof(bch_acl_header) || -+ ((bch_acl_header *)value)->a_version != cpu_to_le32(BCH_ACL_VERSION)) -+ return; -+ -+ p = value + sizeof(bch_acl_header); -+ while (p < end) { -+ const bch_acl_entry *in = p; -+ unsigned tag = le16_to_cpu(in->e_tag); -+ -+ prt_str(out, acl_types[tag]); -+ -+ switch (tag) { -+ case ACL_USER_OBJ: -+ case ACL_GROUP_OBJ: -+ case ACL_MASK: -+ case ACL_OTHER: -+ p += sizeof(bch_acl_entry_short); -+ break; -+ case ACL_USER: -+ prt_printf(out, " uid %u", le32_to_cpu(in->e_id)); -+ p += sizeof(bch_acl_entry); -+ break; -+ case ACL_GROUP: -+ prt_printf(out, " gid %u", le32_to_cpu(in->e_id)); -+ p += sizeof(bch_acl_entry); -+ break; -+ } -+ -+ prt_printf(out, " %o", le16_to_cpu(in->e_perm)); -+ -+ if (p != end) -+ prt_char(out, ' '); -+ } -+} -+ -+#ifdef CONFIG_BCACHEFS_POSIX_ACL -+ -+#include "fs.h" -+ -+#include -+#include -+#include -+#include -+ -+static inline size_t bch2_acl_size(unsigned nr_short, unsigned nr_long) -+{ -+ return sizeof(bch_acl_header) + -+ sizeof(bch_acl_entry_short) * nr_short + -+ sizeof(bch_acl_entry) * nr_long; -+} -+ -+static inline int acl_to_xattr_type(int type) -+{ -+ switch (type) { -+ case ACL_TYPE_ACCESS: -+ return KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS; -+ case ACL_TYPE_DEFAULT: -+ return KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT; -+ default: -+ BUG(); -+ } -+} -+ -+/* -+ * Convert from filesystem to in-memory representation. -+ */ -+static struct posix_acl *bch2_acl_from_disk(struct btree_trans *trans, -+ const void *value, size_t size) -+{ -+ const void *p, *end = value + size; -+ struct posix_acl *acl; -+ struct posix_acl_entry *out; -+ unsigned count = 0; -+ int ret; -+ -+ if (!value) -+ return NULL; -+ if (size < sizeof(bch_acl_header)) -+ goto invalid; -+ if (((bch_acl_header *)value)->a_version != -+ cpu_to_le32(BCH_ACL_VERSION)) -+ goto invalid; -+ -+ p = value + sizeof(bch_acl_header); -+ while (p < end) { -+ const bch_acl_entry *entry = p; -+ -+ if (p + sizeof(bch_acl_entry_short) > end) -+ goto invalid; -+ -+ switch (le16_to_cpu(entry->e_tag)) { -+ case ACL_USER_OBJ: -+ case ACL_GROUP_OBJ: -+ case ACL_MASK: -+ case ACL_OTHER: -+ p += sizeof(bch_acl_entry_short); -+ break; -+ case ACL_USER: -+ case ACL_GROUP: -+ p += sizeof(bch_acl_entry); -+ break; -+ default: -+ goto invalid; -+ } -+ -+ count++; -+ } -+ -+ if (p > end) -+ goto invalid; -+ -+ if (!count) -+ return NULL; -+ -+ acl = allocate_dropping_locks(trans, ret, -+ posix_acl_alloc(count, _gfp)); -+ if (!acl) -+ return ERR_PTR(-ENOMEM); -+ if (ret) { -+ kfree(acl); -+ return ERR_PTR(ret); -+ } -+ -+ out = acl->a_entries; -+ -+ p = value + sizeof(bch_acl_header); -+ while (p < end) { -+ const bch_acl_entry *in = p; -+ -+ out->e_tag = le16_to_cpu(in->e_tag); -+ out->e_perm = le16_to_cpu(in->e_perm); -+ -+ switch (out->e_tag) { -+ case ACL_USER_OBJ: -+ case ACL_GROUP_OBJ: -+ case ACL_MASK: -+ case ACL_OTHER: -+ p += sizeof(bch_acl_entry_short); -+ break; -+ case ACL_USER: -+ out->e_uid = make_kuid(&init_user_ns, -+ le32_to_cpu(in->e_id)); -+ p += sizeof(bch_acl_entry); -+ break; -+ case ACL_GROUP: -+ out->e_gid = make_kgid(&init_user_ns, -+ le32_to_cpu(in->e_id)); -+ p += sizeof(bch_acl_entry); -+ break; -+ } -+ -+ out++; -+ } -+ -+ BUG_ON(out != acl->a_entries + acl->a_count); -+ -+ return acl; -+invalid: -+ pr_err("invalid acl entry"); -+ return ERR_PTR(-EINVAL); -+} -+ -+#define acl_for_each_entry(acl, acl_e) \ -+ for (acl_e = acl->a_entries; \ -+ acl_e < acl->a_entries + acl->a_count; \ -+ acl_e++) -+ -+/* -+ * Convert from in-memory to filesystem representation. -+ */ -+static struct bkey_i_xattr * -+bch2_acl_to_xattr(struct btree_trans *trans, -+ const struct posix_acl *acl, -+ int type) -+{ -+ struct bkey_i_xattr *xattr; -+ bch_acl_header *acl_header; -+ const struct posix_acl_entry *acl_e; -+ void *outptr; -+ unsigned nr_short = 0, nr_long = 0, acl_len, u64s; -+ -+ acl_for_each_entry(acl, acl_e) { -+ switch (acl_e->e_tag) { -+ case ACL_USER: -+ case ACL_GROUP: -+ nr_long++; -+ break; -+ case ACL_USER_OBJ: -+ case ACL_GROUP_OBJ: -+ case ACL_MASK: -+ case ACL_OTHER: -+ nr_short++; -+ break; -+ default: -+ return ERR_PTR(-EINVAL); -+ } -+ } -+ -+ acl_len = bch2_acl_size(nr_short, nr_long); -+ u64s = BKEY_U64s + xattr_val_u64s(0, acl_len); -+ -+ if (u64s > U8_MAX) -+ return ERR_PTR(-E2BIG); -+ -+ xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64)); -+ if (IS_ERR(xattr)) -+ return xattr; -+ -+ bkey_xattr_init(&xattr->k_i); -+ xattr->k.u64s = u64s; -+ xattr->v.x_type = acl_to_xattr_type(type); -+ xattr->v.x_name_len = 0; -+ xattr->v.x_val_len = cpu_to_le16(acl_len); -+ -+ acl_header = xattr_val(&xattr->v); -+ acl_header->a_version = cpu_to_le32(BCH_ACL_VERSION); -+ -+ outptr = (void *) acl_header + sizeof(*acl_header); -+ -+ acl_for_each_entry(acl, acl_e) { -+ bch_acl_entry *entry = outptr; -+ -+ entry->e_tag = cpu_to_le16(acl_e->e_tag); -+ entry->e_perm = cpu_to_le16(acl_e->e_perm); -+ switch (acl_e->e_tag) { -+ case ACL_USER: -+ entry->e_id = cpu_to_le32( -+ from_kuid(&init_user_ns, acl_e->e_uid)); -+ outptr += sizeof(bch_acl_entry); -+ break; -+ case ACL_GROUP: -+ entry->e_id = cpu_to_le32( -+ from_kgid(&init_user_ns, acl_e->e_gid)); -+ outptr += sizeof(bch_acl_entry); -+ break; -+ -+ case ACL_USER_OBJ: -+ case ACL_GROUP_OBJ: -+ case ACL_MASK: -+ case ACL_OTHER: -+ outptr += sizeof(bch_acl_entry_short); -+ break; -+ } -+ } -+ -+ BUG_ON(outptr != xattr_val(&xattr->v) + acl_len); -+ -+ return xattr; -+} -+ -+struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap, -+ struct dentry *dentry, int type) -+{ -+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); -+ struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0); -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter = { NULL }; -+ struct bkey_s_c_xattr xattr; -+ struct posix_acl *acl = NULL; -+ struct bkey_s_c k; -+ int ret; -+retry: -+ bch2_trans_begin(trans); -+ -+ ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, -+ &hash, inode_inum(inode), &search, 0); -+ if (ret) { -+ if (!bch2_err_matches(ret, ENOENT)) -+ acl = ERR_PTR(ret); -+ goto out; -+ } -+ -+ k = bch2_btree_iter_peek_slot(&iter); -+ ret = bkey_err(k); -+ if (ret) { -+ acl = ERR_PTR(ret); -+ goto out; -+ } -+ -+ xattr = bkey_s_c_to_xattr(k); -+ acl = bch2_acl_from_disk(trans, xattr_val(xattr.v), -+ le16_to_cpu(xattr.v->x_val_len)); -+ -+ if (!IS_ERR(acl)) -+ set_cached_acl(&inode->v, type, acl); -+out: -+ if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ return acl; -+} -+ -+int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum, -+ struct bch_inode_unpacked *inode_u, -+ struct posix_acl *acl, int type) -+{ -+ struct bch_hash_info hash_info = bch2_hash_info_init(trans->c, inode_u); -+ int ret; -+ -+ if (type == ACL_TYPE_DEFAULT && -+ !S_ISDIR(inode_u->bi_mode)) -+ return acl ? -EACCES : 0; -+ -+ if (acl) { -+ struct bkey_i_xattr *xattr = -+ bch2_acl_to_xattr(trans, acl, type); -+ if (IS_ERR(xattr)) -+ return PTR_ERR(xattr); -+ -+ ret = bch2_hash_set(trans, bch2_xattr_hash_desc, &hash_info, -+ inum, &xattr->k_i, 0); -+ } else { -+ struct xattr_search_key search = -+ X_SEARCH(acl_to_xattr_type(type), "", 0); -+ -+ ret = bch2_hash_delete(trans, bch2_xattr_hash_desc, &hash_info, -+ inum, &search); -+ } -+ -+ return bch2_err_matches(ret, ENOENT) ? 0 : ret; -+} -+ -+int bch2_set_acl(struct mnt_idmap *idmap, -+ struct dentry *dentry, -+ struct posix_acl *_acl, int type) -+{ -+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter inode_iter = { NULL }; -+ struct bch_inode_unpacked inode_u; -+ struct posix_acl *acl; -+ umode_t mode; -+ int ret; -+ -+ mutex_lock(&inode->ei_update_lock); -+retry: -+ bch2_trans_begin(trans); -+ acl = _acl; -+ -+ ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode), -+ BTREE_ITER_INTENT); -+ if (ret) -+ goto btree_err; -+ -+ mode = inode_u.bi_mode; -+ -+ if (type == ACL_TYPE_ACCESS) { -+ ret = posix_acl_update_mode(idmap, &inode->v, &mode, &acl); -+ if (ret) -+ goto btree_err; -+ } -+ -+ ret = bch2_set_acl_trans(trans, inode_inum(inode), &inode_u, acl, type); -+ if (ret) -+ goto btree_err; -+ -+ inode_u.bi_ctime = bch2_current_time(c); -+ inode_u.bi_mode = mode; -+ -+ ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?: -+ bch2_trans_commit(trans, NULL, NULL, 0); -+btree_err: -+ bch2_trans_iter_exit(trans, &inode_iter); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ if (unlikely(ret)) -+ goto err; -+ -+ bch2_inode_update_after_write(trans, inode, &inode_u, -+ ATTR_CTIME|ATTR_MODE); -+ -+ set_cached_acl(&inode->v, type, acl); -+err: -+ mutex_unlock(&inode->ei_update_lock); -+ bch2_trans_put(trans); -+ -+ return ret; -+} -+ -+int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum, -+ struct bch_inode_unpacked *inode, -+ umode_t mode, -+ struct posix_acl **new_acl) -+{ -+ struct bch_hash_info hash_info = bch2_hash_info_init(trans->c, inode); -+ struct xattr_search_key search = X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0); -+ struct btree_iter iter; -+ struct bkey_s_c_xattr xattr; -+ struct bkey_i_xattr *new; -+ struct posix_acl *acl = NULL; -+ struct bkey_s_c k; -+ int ret; -+ -+ ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, -+ &hash_info, inum, &search, BTREE_ITER_INTENT); -+ if (ret) -+ return bch2_err_matches(ret, ENOENT) ? 0 : ret; -+ -+ k = bch2_btree_iter_peek_slot(&iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ xattr = bkey_s_c_to_xattr(k); -+ -+ acl = bch2_acl_from_disk(trans, xattr_val(xattr.v), -+ le16_to_cpu(xattr.v->x_val_len)); -+ ret = PTR_ERR_OR_ZERO(acl); -+ if (IS_ERR_OR_NULL(acl)) -+ goto err; -+ -+ ret = allocate_dropping_locks_errcode(trans, -+ __posix_acl_chmod(&acl, _gfp, mode)); -+ if (ret) -+ goto err; -+ -+ new = bch2_acl_to_xattr(trans, acl, ACL_TYPE_ACCESS); -+ if (IS_ERR(new)) { -+ ret = PTR_ERR(new); -+ goto err; -+ } -+ -+ new->k.p = iter.pos; -+ ret = bch2_trans_update(trans, &iter, &new->k_i, 0); -+ *new_acl = acl; -+ acl = NULL; -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ if (!IS_ERR_OR_NULL(acl)) -+ kfree(acl); -+ return ret; -+} -+ -+#endif /* CONFIG_BCACHEFS_POSIX_ACL */ -diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h -new file mode 100644 -index 000000000000..27e7eec0f278 ---- /dev/null -+++ b/fs/bcachefs/acl.h -@@ -0,0 +1,60 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_ACL_H -+#define _BCACHEFS_ACL_H -+ -+struct bch_inode_unpacked; -+struct bch_hash_info; -+struct bch_inode_info; -+struct posix_acl; -+ -+#define BCH_ACL_VERSION 0x0001 -+ -+typedef struct { -+ __le16 e_tag; -+ __le16 e_perm; -+ __le32 e_id; -+} bch_acl_entry; -+ -+typedef struct { -+ __le16 e_tag; -+ __le16 e_perm; -+} bch_acl_entry_short; -+ -+typedef struct { -+ __le32 a_version; -+} bch_acl_header; -+ -+void bch2_acl_to_text(struct printbuf *, const void *, size_t); -+ -+#ifdef CONFIG_BCACHEFS_POSIX_ACL -+ -+struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int); -+ -+int bch2_set_acl_trans(struct btree_trans *, subvol_inum, -+ struct bch_inode_unpacked *, -+ struct posix_acl *, int); -+int bch2_set_acl(struct mnt_idmap *, struct dentry *, struct posix_acl *, int); -+int bch2_acl_chmod(struct btree_trans *, subvol_inum, -+ struct bch_inode_unpacked *, -+ umode_t, struct posix_acl **); -+ -+#else -+ -+static inline int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum, -+ struct bch_inode_unpacked *inode_u, -+ struct posix_acl *acl, int type) -+{ -+ return 0; -+} -+ -+static inline int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum, -+ struct bch_inode_unpacked *inode, -+ umode_t mode, -+ struct posix_acl **new_acl) -+{ -+ return 0; -+} -+ -+#endif /* CONFIG_BCACHEFS_POSIX_ACL */ -+ -+#endif /* _BCACHEFS_ACL_H */ -diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c -new file mode 100644 -index 000000000000..1fec0e67891f ---- /dev/null -+++ b/fs/bcachefs/alloc_background.c -@@ -0,0 +1,2159 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include "bcachefs.h" -+#include "alloc_background.h" -+#include "alloc_foreground.h" -+#include "backpointers.h" -+#include "btree_cache.h" -+#include "btree_io.h" -+#include "btree_key_cache.h" -+#include "btree_update.h" -+#include "btree_update_interior.h" -+#include "btree_gc.h" -+#include "btree_write_buffer.h" -+#include "buckets.h" -+#include "buckets_waiting_for_journal.h" -+#include "clock.h" -+#include "debug.h" -+#include "ec.h" -+#include "error.h" -+#include "lru.h" -+#include "recovery.h" -+#include "trace.h" -+#include "varint.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* Persistent alloc info: */ -+ -+static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = { -+#define x(name, bits) [BCH_ALLOC_FIELD_V1_##name] = bits / 8, -+ BCH_ALLOC_FIELDS_V1() -+#undef x -+}; -+ -+struct bkey_alloc_unpacked { -+ u64 journal_seq; -+ u8 gen; -+ u8 oldest_gen; -+ u8 data_type; -+ bool need_discard:1; -+ bool need_inc_gen:1; -+#define x(_name, _bits) u##_bits _name; -+ BCH_ALLOC_FIELDS_V2() -+#undef x -+}; -+ -+static inline u64 alloc_field_v1_get(const struct bch_alloc *a, -+ const void **p, unsigned field) -+{ -+ unsigned bytes = BCH_ALLOC_V1_FIELD_BYTES[field]; -+ u64 v; -+ -+ if (!(a->fields & (1 << field))) -+ return 0; -+ -+ switch (bytes) { -+ case 1: -+ v = *((const u8 *) *p); -+ break; -+ case 2: -+ v = le16_to_cpup(*p); -+ break; -+ case 4: -+ v = le32_to_cpup(*p); -+ break; -+ case 8: -+ v = le64_to_cpup(*p); -+ break; -+ default: -+ BUG(); -+ } -+ -+ *p += bytes; -+ return v; -+} -+ -+static void bch2_alloc_unpack_v1(struct bkey_alloc_unpacked *out, -+ struct bkey_s_c k) -+{ -+ const struct bch_alloc *in = bkey_s_c_to_alloc(k).v; -+ const void *d = in->data; -+ unsigned idx = 0; -+ -+ out->gen = in->gen; -+ -+#define x(_name, _bits) out->_name = alloc_field_v1_get(in, &d, idx++); -+ BCH_ALLOC_FIELDS_V1() -+#undef x -+} -+ -+static int bch2_alloc_unpack_v2(struct bkey_alloc_unpacked *out, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_alloc_v2 a = bkey_s_c_to_alloc_v2(k); -+ const u8 *in = a.v->data; -+ const u8 *end = bkey_val_end(a); -+ unsigned fieldnr = 0; -+ int ret; -+ u64 v; -+ -+ out->gen = a.v->gen; -+ out->oldest_gen = a.v->oldest_gen; -+ out->data_type = a.v->data_type; -+ -+#define x(_name, _bits) \ -+ if (fieldnr < a.v->nr_fields) { \ -+ ret = bch2_varint_decode_fast(in, end, &v); \ -+ if (ret < 0) \ -+ return ret; \ -+ in += ret; \ -+ } else { \ -+ v = 0; \ -+ } \ -+ out->_name = v; \ -+ if (v != out->_name) \ -+ return -1; \ -+ fieldnr++; -+ -+ BCH_ALLOC_FIELDS_V2() -+#undef x -+ return 0; -+} -+ -+static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_alloc_v3 a = bkey_s_c_to_alloc_v3(k); -+ const u8 *in = a.v->data; -+ const u8 *end = bkey_val_end(a); -+ unsigned fieldnr = 0; -+ int ret; -+ u64 v; -+ -+ out->gen = a.v->gen; -+ out->oldest_gen = a.v->oldest_gen; -+ out->data_type = a.v->data_type; -+ out->need_discard = BCH_ALLOC_V3_NEED_DISCARD(a.v); -+ out->need_inc_gen = BCH_ALLOC_V3_NEED_INC_GEN(a.v); -+ out->journal_seq = le64_to_cpu(a.v->journal_seq); -+ -+#define x(_name, _bits) \ -+ if (fieldnr < a.v->nr_fields) { \ -+ ret = bch2_varint_decode_fast(in, end, &v); \ -+ if (ret < 0) \ -+ return ret; \ -+ in += ret; \ -+ } else { \ -+ v = 0; \ -+ } \ -+ out->_name = v; \ -+ if (v != out->_name) \ -+ return -1; \ -+ fieldnr++; -+ -+ BCH_ALLOC_FIELDS_V2() -+#undef x -+ return 0; -+} -+ -+static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k) -+{ -+ struct bkey_alloc_unpacked ret = { .gen = 0 }; -+ -+ switch (k.k->type) { -+ case KEY_TYPE_alloc: -+ bch2_alloc_unpack_v1(&ret, k); -+ break; -+ case KEY_TYPE_alloc_v2: -+ bch2_alloc_unpack_v2(&ret, k); -+ break; -+ case KEY_TYPE_alloc_v3: -+ bch2_alloc_unpack_v3(&ret, k); -+ break; -+ } -+ -+ return ret; -+} -+ -+static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) -+{ -+ unsigned i, bytes = offsetof(struct bch_alloc, data); -+ -+ for (i = 0; i < ARRAY_SIZE(BCH_ALLOC_V1_FIELD_BYTES); i++) -+ if (a->fields & (1 << i)) -+ bytes += BCH_ALLOC_V1_FIELD_BYTES[i]; -+ -+ return DIV_ROUND_UP(bytes, sizeof(u64)); -+} -+ -+int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); -+ int ret = 0; -+ -+ /* allow for unknown fields */ -+ bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err, -+ alloc_v1_val_size_bad, -+ "incorrect value size (%zu < %u)", -+ bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v)); -+fsck_err: -+ return ret; -+} -+ -+int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ struct bkey_alloc_unpacked u; -+ int ret = 0; -+ -+ bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err, -+ alloc_v2_unpack_error, -+ "unpack error"); -+fsck_err: -+ return ret; -+} -+ -+int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ struct bkey_alloc_unpacked u; -+ int ret = 0; -+ -+ bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err, -+ alloc_v2_unpack_error, -+ "unpack error"); -+fsck_err: -+ return ret; -+} -+ -+int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, struct printbuf *err) -+{ -+ struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); -+ int ret = 0; -+ -+ bkey_fsck_err_on(alloc_v4_u64s(a.v) > bkey_val_u64s(k.k), c, err, -+ alloc_v4_val_size_bad, -+ "bad val size (%u > %zu)", -+ alloc_v4_u64s(a.v), bkey_val_u64s(k.k)); -+ -+ bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && -+ BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err, -+ alloc_v4_backpointers_start_bad, -+ "invalid backpointers_start"); -+ -+ bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err, -+ alloc_key_data_type_bad, -+ "invalid data type (got %u should be %u)", -+ a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); -+ -+ switch (a.v->data_type) { -+ case BCH_DATA_free: -+ case BCH_DATA_need_gc_gens: -+ case BCH_DATA_need_discard: -+ bkey_fsck_err_on(a.v->dirty_sectors || -+ a.v->cached_sectors || -+ a.v->stripe, c, err, -+ alloc_key_empty_but_have_data, -+ "empty data type free but have data"); -+ break; -+ case BCH_DATA_sb: -+ case BCH_DATA_journal: -+ case BCH_DATA_btree: -+ case BCH_DATA_user: -+ case BCH_DATA_parity: -+ bkey_fsck_err_on(!a.v->dirty_sectors, c, err, -+ alloc_key_dirty_sectors_0, -+ "data_type %s but dirty_sectors==0", -+ bch2_data_types[a.v->data_type]); -+ break; -+ case BCH_DATA_cached: -+ bkey_fsck_err_on(!a.v->cached_sectors || -+ a.v->dirty_sectors || -+ a.v->stripe, c, err, -+ alloc_key_cached_inconsistency, -+ "data type inconsistency"); -+ -+ bkey_fsck_err_on(!a.v->io_time[READ] && -+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, -+ c, err, -+ alloc_key_cached_but_read_time_zero, -+ "cached bucket with read_time == 0"); -+ break; -+ case BCH_DATA_stripe: -+ break; -+ } -+fsck_err: -+ return ret; -+} -+ -+void bch2_alloc_v4_swab(struct bkey_s k) -+{ -+ struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v; -+ struct bch_backpointer *bp, *bps; -+ -+ a->journal_seq = swab64(a->journal_seq); -+ a->flags = swab32(a->flags); -+ a->dirty_sectors = swab32(a->dirty_sectors); -+ a->cached_sectors = swab32(a->cached_sectors); -+ a->io_time[0] = swab64(a->io_time[0]); -+ a->io_time[1] = swab64(a->io_time[1]); -+ a->stripe = swab32(a->stripe); -+ a->nr_external_backpointers = swab32(a->nr_external_backpointers); -+ a->fragmentation_lru = swab64(a->fragmentation_lru); -+ -+ bps = alloc_v4_backpointers(a); -+ for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) { -+ bp->bucket_offset = swab40(bp->bucket_offset); -+ bp->bucket_len = swab32(bp->bucket_len); -+ bch2_bpos_swab(&bp->pos); -+ } -+} -+ -+void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) -+{ -+ struct bch_alloc_v4 _a; -+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); -+ unsigned i; -+ -+ prt_newline(out); -+ printbuf_indent_add(out, 2); -+ -+ prt_printf(out, "gen %u oldest_gen %u data_type %s", -+ a->gen, a->oldest_gen, -+ a->data_type < BCH_DATA_NR -+ ? bch2_data_types[a->data_type] -+ : "(invalid data type)"); -+ prt_newline(out); -+ prt_printf(out, "journal_seq %llu", a->journal_seq); -+ prt_newline(out); -+ prt_printf(out, "need_discard %llu", BCH_ALLOC_V4_NEED_DISCARD(a)); -+ prt_newline(out); -+ prt_printf(out, "need_inc_gen %llu", BCH_ALLOC_V4_NEED_INC_GEN(a)); -+ prt_newline(out); -+ prt_printf(out, "dirty_sectors %u", a->dirty_sectors); -+ prt_newline(out); -+ prt_printf(out, "cached_sectors %u", a->cached_sectors); -+ prt_newline(out); -+ prt_printf(out, "stripe %u", a->stripe); -+ prt_newline(out); -+ prt_printf(out, "stripe_redundancy %u", a->stripe_redundancy); -+ prt_newline(out); -+ prt_printf(out, "io_time[READ] %llu", a->io_time[READ]); -+ prt_newline(out); -+ prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]); -+ prt_newline(out); -+ prt_printf(out, "fragmentation %llu", a->fragmentation_lru); -+ prt_newline(out); -+ prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a)); -+ prt_newline(out); -+ -+ if (BCH_ALLOC_V4_NR_BACKPOINTERS(a)) { -+ struct bkey_s_c_alloc_v4 a_raw = bkey_s_c_to_alloc_v4(k); -+ const struct bch_backpointer *bps = alloc_v4_backpointers_c(a_raw.v); -+ -+ prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v)); -+ printbuf_indent_add(out, 2); -+ -+ for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v); i++) { -+ prt_newline(out); -+ bch2_backpointer_to_text(out, &bps[i]); -+ } -+ -+ printbuf_indent_sub(out, 2); -+ } -+ -+ printbuf_indent_sub(out, 2); -+} -+ -+void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) -+{ -+ if (k.k->type == KEY_TYPE_alloc_v4) { -+ void *src, *dst; -+ -+ *out = *bkey_s_c_to_alloc_v4(k).v; -+ -+ src = alloc_v4_backpointers(out); -+ SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s); -+ dst = alloc_v4_backpointers(out); -+ -+ if (src < dst) -+ memset(src, 0, dst - src); -+ -+ SET_BCH_ALLOC_V4_NR_BACKPOINTERS(out, 0); -+ } else { -+ struct bkey_alloc_unpacked u = bch2_alloc_unpack(k); -+ -+ *out = (struct bch_alloc_v4) { -+ .journal_seq = u.journal_seq, -+ .flags = u.need_discard, -+ .gen = u.gen, -+ .oldest_gen = u.oldest_gen, -+ .data_type = u.data_type, -+ .stripe_redundancy = u.stripe_redundancy, -+ .dirty_sectors = u.dirty_sectors, -+ .cached_sectors = u.cached_sectors, -+ .io_time[READ] = u.read_time, -+ .io_time[WRITE] = u.write_time, -+ .stripe = u.stripe, -+ }; -+ -+ SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s); -+ } -+} -+ -+static noinline struct bkey_i_alloc_v4 * -+__bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) -+{ -+ struct bkey_i_alloc_v4 *ret; -+ -+ ret = bch2_trans_kmalloc(trans, max(bkey_bytes(k.k), sizeof(struct bkey_i_alloc_v4))); -+ if (IS_ERR(ret)) -+ return ret; -+ -+ if (k.k->type == KEY_TYPE_alloc_v4) { -+ void *src, *dst; -+ -+ bkey_reassemble(&ret->k_i, k); -+ -+ src = alloc_v4_backpointers(&ret->v); -+ SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s); -+ dst = alloc_v4_backpointers(&ret->v); -+ -+ if (src < dst) -+ memset(src, 0, dst - src); -+ -+ SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v, 0); -+ set_alloc_v4_u64s(ret); -+ } else { -+ bkey_alloc_v4_init(&ret->k_i); -+ ret->k.p = k.k->p; -+ bch2_alloc_to_v4(k, &ret->v); -+ } -+ return ret; -+} -+ -+static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_trans *trans, struct bkey_s_c k) -+{ -+ struct bkey_s_c_alloc_v4 a; -+ -+ if (likely(k.k->type == KEY_TYPE_alloc_v4) && -+ ((a = bkey_s_c_to_alloc_v4(k), true) && -+ BCH_ALLOC_V4_NR_BACKPOINTERS(a.v) == 0)) -+ return bch2_bkey_make_mut_noupdate_typed(trans, k, alloc_v4); -+ -+ return __bch2_alloc_to_v4_mut(trans, k); -+} -+ -+struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) -+{ -+ return bch2_alloc_to_v4_mut_inlined(trans, k); -+} -+ -+struct bkey_i_alloc_v4 * -+bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter, -+ struct bpos pos) -+{ -+ struct bkey_s_c k; -+ struct bkey_i_alloc_v4 *a; -+ int ret; -+ -+ k = bch2_bkey_get_iter(trans, iter, BTREE_ID_alloc, pos, -+ BTREE_ITER_WITH_UPDATES| -+ BTREE_ITER_CACHED| -+ BTREE_ITER_INTENT); -+ ret = bkey_err(k); -+ if (unlikely(ret)) -+ return ERR_PTR(ret); -+ -+ a = bch2_alloc_to_v4_mut_inlined(trans, k); -+ ret = PTR_ERR_OR_ZERO(a); -+ if (unlikely(ret)) -+ goto err; -+ return a; -+err: -+ bch2_trans_iter_exit(trans, iter); -+ return ERR_PTR(ret); -+} -+ -+static struct bpos alloc_gens_pos(struct bpos pos, unsigned *offset) -+{ -+ *offset = pos.offset & KEY_TYPE_BUCKET_GENS_MASK; -+ -+ pos.offset >>= KEY_TYPE_BUCKET_GENS_BITS; -+ return pos; -+} -+ -+static struct bpos bucket_gens_pos_to_alloc(struct bpos pos, unsigned offset) -+{ -+ pos.offset <<= KEY_TYPE_BUCKET_GENS_BITS; -+ pos.offset += offset; -+ return pos; -+} -+ -+static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) -+{ -+ return k.k->type == KEY_TYPE_bucket_gens -+ ? bkey_s_c_to_bucket_gens(k).v->gens[offset] -+ : 0; -+} -+ -+int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err, -+ bucket_gens_val_size_bad, -+ "bad val size (%zu != %zu)", -+ bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens)); -+fsck_err: -+ return ret; -+} -+ -+void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) -+{ -+ struct bkey_s_c_bucket_gens g = bkey_s_c_to_bucket_gens(k); -+ unsigned i; -+ -+ for (i = 0; i < ARRAY_SIZE(g.v->gens); i++) { -+ if (i) -+ prt_char(out, ' '); -+ prt_printf(out, "%u", g.v->gens[i]); -+ } -+} -+ -+int bch2_bucket_gens_init(struct bch_fs *c) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bch_alloc_v4 a; -+ struct bkey_i_bucket_gens g; -+ bool have_bucket_gens_key = false; -+ unsigned offset; -+ struct bpos pos; -+ u8 gen; -+ int ret; -+ -+ for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, -+ BTREE_ITER_PREFETCH, k, ret) { -+ /* -+ * Not a fsck error because this is checked/repaired by -+ * bch2_check_alloc_key() which runs later: -+ */ -+ if (!bch2_dev_bucket_exists(c, k.k->p)) -+ continue; -+ -+ gen = bch2_alloc_to_v4(k, &a)->gen; -+ pos = alloc_gens_pos(iter.pos, &offset); -+ -+ if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) { -+ ret = commit_do(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL| -+ BTREE_INSERT_LAZY_RW, -+ bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); -+ if (ret) -+ break; -+ have_bucket_gens_key = false; -+ } -+ -+ if (!have_bucket_gens_key) { -+ bkey_bucket_gens_init(&g.k_i); -+ g.k.p = pos; -+ have_bucket_gens_key = true; -+ } -+ -+ g.v.gens[offset] = gen; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (have_bucket_gens_key && !ret) -+ ret = commit_do(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL| -+ BTREE_INSERT_LAZY_RW, -+ bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); -+ -+ bch2_trans_put(trans); -+ -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+int bch2_alloc_read(struct bch_fs *c) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bch_dev *ca; -+ int ret; -+ -+ down_read(&c->gc_lock); -+ -+ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) { -+ const struct bch_bucket_gens *g; -+ u64 b; -+ -+ for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN, -+ BTREE_ITER_PREFETCH, k, ret) { -+ u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; -+ u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; -+ -+ if (k.k->type != KEY_TYPE_bucket_gens) -+ continue; -+ -+ g = bkey_s_c_to_bucket_gens(k).v; -+ -+ /* -+ * Not a fsck error because this is checked/repaired by -+ * bch2_check_alloc_key() which runs later: -+ */ -+ if (!bch2_dev_exists2(c, k.k->p.inode)) -+ continue; -+ -+ ca = bch_dev_bkey_exists(c, k.k->p.inode); -+ -+ for (b = max_t(u64, ca->mi.first_bucket, start); -+ b < min_t(u64, ca->mi.nbuckets, end); -+ b++) -+ *bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK]; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ } else { -+ struct bch_alloc_v4 a; -+ -+ for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, -+ BTREE_ITER_PREFETCH, k, ret) { -+ /* -+ * Not a fsck error because this is checked/repaired by -+ * bch2_check_alloc_key() which runs later: -+ */ -+ if (!bch2_dev_bucket_exists(c, k.k->p)) -+ continue; -+ -+ ca = bch_dev_bkey_exists(c, k.k->p.inode); -+ -+ *bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ } -+ -+ bch2_trans_put(trans); -+ up_read(&c->gc_lock); -+ -+ if (ret) -+ bch_err_fn(c, ret); -+ -+ return ret; -+} -+ -+/* Free space/discard btree: */ -+ -+static int bch2_bucket_do_index(struct btree_trans *trans, -+ struct bkey_s_c alloc_k, -+ const struct bch_alloc_v4 *a, -+ bool set) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, alloc_k.k->p.inode); -+ struct btree_iter iter; -+ struct bkey_s_c old; -+ struct bkey_i *k; -+ enum btree_id btree; -+ enum bch_bkey_type old_type = !set ? KEY_TYPE_set : KEY_TYPE_deleted; -+ enum bch_bkey_type new_type = set ? KEY_TYPE_set : KEY_TYPE_deleted; -+ struct printbuf buf = PRINTBUF; -+ int ret; -+ -+ if (a->data_type != BCH_DATA_free && -+ a->data_type != BCH_DATA_need_discard) -+ return 0; -+ -+ k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k)); -+ if (IS_ERR(k)) -+ return PTR_ERR(k); -+ -+ bkey_init(&k->k); -+ k->k.type = new_type; -+ -+ switch (a->data_type) { -+ case BCH_DATA_free: -+ btree = BTREE_ID_freespace; -+ k->k.p = alloc_freespace_pos(alloc_k.k->p, *a); -+ bch2_key_resize(&k->k, 1); -+ break; -+ case BCH_DATA_need_discard: -+ btree = BTREE_ID_need_discard; -+ k->k.p = alloc_k.k->p; -+ break; -+ default: -+ return 0; -+ } -+ -+ old = bch2_bkey_get_iter(trans, &iter, btree, -+ bkey_start_pos(&k->k), -+ BTREE_ITER_INTENT); -+ ret = bkey_err(old); -+ if (ret) -+ return ret; -+ -+ if (ca->mi.freespace_initialized && -+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info && -+ bch2_trans_inconsistent_on(old.k->type != old_type, trans, -+ "incorrect key when %s %s:%llu:%llu:0 (got %s should be %s)\n" -+ " for %s", -+ set ? "setting" : "clearing", -+ bch2_btree_id_str(btree), -+ iter.pos.inode, -+ iter.pos.offset, -+ bch2_bkey_types[old.k->type], -+ bch2_bkey_types[old_type], -+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { -+ ret = -EIO; -+ goto err; -+ } -+ -+ ret = bch2_trans_update(trans, &iter, k, 0); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static noinline int bch2_bucket_gen_update(struct btree_trans *trans, -+ struct bpos bucket, u8 gen) -+{ -+ struct btree_iter iter; -+ unsigned offset; -+ struct bpos pos = alloc_gens_pos(bucket, &offset); -+ struct bkey_i_bucket_gens *g; -+ struct bkey_s_c k; -+ int ret; -+ -+ g = bch2_trans_kmalloc(trans, sizeof(*g)); -+ ret = PTR_ERR_OR_ZERO(g); -+ if (ret) -+ return ret; -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_bucket_gens, pos, -+ BTREE_ITER_INTENT| -+ BTREE_ITER_WITH_UPDATES); -+ ret = bkey_err(k); -+ if (ret) -+ return ret; -+ -+ if (k.k->type != KEY_TYPE_bucket_gens) { -+ bkey_bucket_gens_init(&g->k_i); -+ g->k.p = iter.pos; -+ } else { -+ bkey_reassemble(&g->k_i, k); -+ } -+ -+ g->v.gens[offset] = gen; -+ -+ ret = bch2_trans_update(trans, &iter, &g->k_i, 0); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int bch2_trans_mark_alloc(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, struct bkey_i *new, -+ unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_alloc_v4 old_a_convert, *new_a; -+ const struct bch_alloc_v4 *old_a; -+ u64 old_lru, new_lru; -+ int ret = 0; -+ -+ /* -+ * Deletion only happens in the device removal path, with -+ * BTREE_TRIGGER_NORUN: -+ */ -+ BUG_ON(new->k.type != KEY_TYPE_alloc_v4); -+ -+ old_a = bch2_alloc_to_v4(old, &old_a_convert); -+ new_a = &bkey_i_to_alloc_v4(new)->v; -+ -+ new_a->data_type = alloc_data_type(*new_a, new_a->data_type); -+ -+ if (new_a->dirty_sectors > old_a->dirty_sectors || -+ new_a->cached_sectors > old_a->cached_sectors) { -+ new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); -+ new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now)); -+ SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); -+ SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true); -+ } -+ -+ if (data_type_is_empty(new_a->data_type) && -+ BCH_ALLOC_V4_NEED_INC_GEN(new_a) && -+ !bch2_bucket_is_open_safe(c, new->k.p.inode, new->k.p.offset)) { -+ new_a->gen++; -+ SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false); -+ } -+ -+ if (old_a->data_type != new_a->data_type || -+ (new_a->data_type == BCH_DATA_free && -+ alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) { -+ ret = bch2_bucket_do_index(trans, old, old_a, false) ?: -+ bch2_bucket_do_index(trans, bkey_i_to_s_c(new), new_a, true); -+ if (ret) -+ return ret; -+ } -+ -+ if (new_a->data_type == BCH_DATA_cached && -+ !new_a->io_time[READ]) -+ new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); -+ -+ old_lru = alloc_lru_idx_read(*old_a); -+ new_lru = alloc_lru_idx_read(*new_a); -+ -+ if (old_lru != new_lru) { -+ ret = bch2_lru_change(trans, new->k.p.inode, -+ bucket_to_u64(new->k.p), -+ old_lru, new_lru); -+ if (ret) -+ return ret; -+ } -+ -+ new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, -+ bch_dev_bkey_exists(c, new->k.p.inode)); -+ -+ if (old_a->fragmentation_lru != new_a->fragmentation_lru) { -+ ret = bch2_lru_change(trans, -+ BCH_LRU_FRAGMENTATION_START, -+ bucket_to_u64(new->k.p), -+ old_a->fragmentation_lru, new_a->fragmentation_lru); -+ if (ret) -+ return ret; -+ } -+ -+ if (old_a->gen != new_a->gen) { -+ ret = bch2_bucket_gen_update(trans, new->k.p, new_a->gen); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+/* -+ * This synthesizes deleted extents for holes, similar to BTREE_ITER_SLOTS for -+ * extents style btrees, but works on non-extents btrees: -+ */ -+static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos end, struct bkey *hole) -+{ -+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); -+ -+ if (bkey_err(k)) -+ return k; -+ -+ if (k.k->type) { -+ return k; -+ } else { -+ struct btree_iter iter2; -+ struct bpos next; -+ -+ bch2_trans_copy_iter(&iter2, iter); -+ -+ if (!bpos_eq(iter->path->l[0].b->key.k.p, SPOS_MAX)) -+ end = bkey_min(end, bpos_nosnap_successor(iter->path->l[0].b->key.k.p)); -+ -+ end = bkey_min(end, POS(iter->pos.inode, iter->pos.offset + U32_MAX - 1)); -+ -+ /* -+ * btree node min/max is a closed interval, upto takes a half -+ * open interval: -+ */ -+ k = bch2_btree_iter_peek_upto(&iter2, end); -+ next = iter2.pos; -+ bch2_trans_iter_exit(iter->trans, &iter2); -+ -+ BUG_ON(next.offset >= iter->pos.offset + U32_MAX); -+ -+ if (bkey_err(k)) -+ return k; -+ -+ bkey_init(hole); -+ hole->p = iter->pos; -+ -+ bch2_key_resize(hole, next.offset - iter->pos.offset); -+ return (struct bkey_s_c) { hole, NULL }; -+ } -+} -+ -+static bool next_bucket(struct bch_fs *c, struct bpos *bucket) -+{ -+ struct bch_dev *ca; -+ unsigned iter; -+ -+ if (bch2_dev_bucket_exists(c, *bucket)) -+ return true; -+ -+ if (bch2_dev_exists2(c, bucket->inode)) { -+ ca = bch_dev_bkey_exists(c, bucket->inode); -+ -+ if (bucket->offset < ca->mi.first_bucket) { -+ bucket->offset = ca->mi.first_bucket; -+ return true; -+ } -+ -+ bucket->inode++; -+ bucket->offset = 0; -+ } -+ -+ rcu_read_lock(); -+ iter = bucket->inode; -+ ca = __bch2_next_dev(c, &iter, NULL); -+ if (ca) -+ *bucket = POS(ca->dev_idx, ca->mi.first_bucket); -+ rcu_read_unlock(); -+ -+ return ca != NULL; -+} -+ -+static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter, struct bkey *hole) -+{ -+ struct bch_fs *c = iter->trans->c; -+ struct bkey_s_c k; -+again: -+ k = bch2_get_key_or_hole(iter, POS_MAX, hole); -+ if (bkey_err(k)) -+ return k; -+ -+ if (!k.k->type) { -+ struct bpos bucket = bkey_start_pos(k.k); -+ -+ if (!bch2_dev_bucket_exists(c, bucket)) { -+ if (!next_bucket(c, &bucket)) -+ return bkey_s_c_null; -+ -+ bch2_btree_iter_set_pos(iter, bucket); -+ goto again; -+ } -+ -+ if (!bch2_dev_bucket_exists(c, k.k->p)) { -+ struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode); -+ -+ bch2_key_resize(hole, ca->mi.nbuckets - bucket.offset); -+ } -+ } -+ -+ return k; -+} -+ -+static noinline_for_stack -+int bch2_check_alloc_key(struct btree_trans *trans, -+ struct bkey_s_c alloc_k, -+ struct btree_iter *alloc_iter, -+ struct btree_iter *discard_iter, -+ struct btree_iter *freespace_iter, -+ struct btree_iter *bucket_gens_iter) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_dev *ca; -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a; -+ unsigned discard_key_type, freespace_key_type; -+ unsigned gens_offset; -+ struct bkey_s_c k; -+ struct printbuf buf = PRINTBUF; -+ int ret; -+ -+ if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_k.k->p), c, -+ alloc_key_to_missing_dev_bucket, -+ "alloc key for invalid device:bucket %llu:%llu", -+ alloc_k.k->p.inode, alloc_k.k->p.offset)) -+ return bch2_btree_delete_at(trans, alloc_iter, 0); -+ -+ ca = bch_dev_bkey_exists(c, alloc_k.k->p.inode); -+ if (!ca->mi.freespace_initialized) -+ return 0; -+ -+ a = bch2_alloc_to_v4(alloc_k, &a_convert); -+ -+ discard_key_type = a->data_type == BCH_DATA_need_discard ? KEY_TYPE_set : 0; -+ bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p); -+ k = bch2_btree_iter_peek_slot(discard_iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (k.k->type != discard_key_type && -+ (c->opts.reconstruct_alloc || -+ fsck_err(c, need_discard_key_wrong, -+ "incorrect key in need_discard btree (got %s should be %s)\n" -+ " %s", -+ bch2_bkey_types[k.k->type], -+ bch2_bkey_types[discard_key_type], -+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) { -+ struct bkey_i *update = -+ bch2_trans_kmalloc(trans, sizeof(*update)); -+ -+ ret = PTR_ERR_OR_ZERO(update); -+ if (ret) -+ goto err; -+ -+ bkey_init(&update->k); -+ update->k.type = discard_key_type; -+ update->k.p = discard_iter->pos; -+ -+ ret = bch2_trans_update(trans, discard_iter, update, 0); -+ if (ret) -+ goto err; -+ } -+ -+ freespace_key_type = a->data_type == BCH_DATA_free ? KEY_TYPE_set : 0; -+ bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); -+ k = bch2_btree_iter_peek_slot(freespace_iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (k.k->type != freespace_key_type && -+ (c->opts.reconstruct_alloc || -+ fsck_err(c, freespace_key_wrong, -+ "incorrect key in freespace btree (got %s should be %s)\n" -+ " %s", -+ bch2_bkey_types[k.k->type], -+ bch2_bkey_types[freespace_key_type], -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) { -+ struct bkey_i *update = -+ bch2_trans_kmalloc(trans, sizeof(*update)); -+ -+ ret = PTR_ERR_OR_ZERO(update); -+ if (ret) -+ goto err; -+ -+ bkey_init(&update->k); -+ update->k.type = freespace_key_type; -+ update->k.p = freespace_iter->pos; -+ bch2_key_resize(&update->k, 1); -+ -+ ret = bch2_trans_update(trans, freespace_iter, update, 0); -+ if (ret) -+ goto err; -+ } -+ -+ bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset)); -+ k = bch2_btree_iter_peek_slot(bucket_gens_iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (a->gen != alloc_gen(k, gens_offset) && -+ (c->opts.reconstruct_alloc || -+ fsck_err(c, bucket_gens_key_wrong, -+ "incorrect gen in bucket_gens btree (got %u should be %u)\n" -+ " %s", -+ alloc_gen(k, gens_offset), a->gen, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) { -+ struct bkey_i_bucket_gens *g = -+ bch2_trans_kmalloc(trans, sizeof(*g)); -+ -+ ret = PTR_ERR_OR_ZERO(g); -+ if (ret) -+ goto err; -+ -+ if (k.k->type == KEY_TYPE_bucket_gens) { -+ bkey_reassemble(&g->k_i, k); -+ } else { -+ bkey_bucket_gens_init(&g->k_i); -+ g->k.p = alloc_gens_pos(alloc_k.k->p, &gens_offset); -+ } -+ -+ g->v.gens[gens_offset] = a->gen; -+ -+ ret = bch2_trans_update(trans, bucket_gens_iter, &g->k_i, 0); -+ if (ret) -+ goto err; -+ } -+err: -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static noinline_for_stack -+int bch2_check_alloc_hole_freespace(struct btree_trans *trans, -+ struct bpos start, -+ struct bpos *end, -+ struct btree_iter *freespace_iter) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_dev *ca; -+ struct bkey_s_c k; -+ struct printbuf buf = PRINTBUF; -+ int ret; -+ -+ ca = bch_dev_bkey_exists(c, start.inode); -+ if (!ca->mi.freespace_initialized) -+ return 0; -+ -+ bch2_btree_iter_set_pos(freespace_iter, start); -+ -+ k = bch2_btree_iter_peek_slot(freespace_iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ *end = bkey_min(k.k->p, *end); -+ -+ if (k.k->type != KEY_TYPE_set && -+ (c->opts.reconstruct_alloc || -+ fsck_err(c, freespace_hole_missing, -+ "hole in alloc btree missing in freespace btree\n" -+ " device %llu buckets %llu-%llu", -+ freespace_iter->pos.inode, -+ freespace_iter->pos.offset, -+ end->offset))) { -+ struct bkey_i *update = -+ bch2_trans_kmalloc(trans, sizeof(*update)); -+ -+ ret = PTR_ERR_OR_ZERO(update); -+ if (ret) -+ goto err; -+ -+ bkey_init(&update->k); -+ update->k.type = KEY_TYPE_set; -+ update->k.p = freespace_iter->pos; -+ bch2_key_resize(&update->k, -+ min_t(u64, U32_MAX, end->offset - -+ freespace_iter->pos.offset)); -+ -+ ret = bch2_trans_update(trans, freespace_iter, update, 0); -+ if (ret) -+ goto err; -+ } -+err: -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static noinline_for_stack -+int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, -+ struct bpos start, -+ struct bpos *end, -+ struct btree_iter *bucket_gens_iter) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_s_c k; -+ struct printbuf buf = PRINTBUF; -+ unsigned i, gens_offset, gens_end_offset; -+ int ret; -+ -+ if (c->sb.version < bcachefs_metadata_version_bucket_gens) -+ return 0; -+ -+ bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset)); -+ -+ k = bch2_btree_iter_peek_slot(bucket_gens_iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (bkey_cmp(alloc_gens_pos(start, &gens_offset), -+ alloc_gens_pos(*end, &gens_end_offset))) -+ gens_end_offset = KEY_TYPE_BUCKET_GENS_NR; -+ -+ if (k.k->type == KEY_TYPE_bucket_gens) { -+ struct bkey_i_bucket_gens g; -+ bool need_update = false; -+ -+ bkey_reassemble(&g.k_i, k); -+ -+ for (i = gens_offset; i < gens_end_offset; i++) { -+ if (fsck_err_on(g.v.gens[i], c, -+ bucket_gens_hole_wrong, -+ "hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u)", -+ bucket_gens_pos_to_alloc(k.k->p, i).inode, -+ bucket_gens_pos_to_alloc(k.k->p, i).offset, -+ g.v.gens[i])) { -+ g.v.gens[i] = 0; -+ need_update = true; -+ } -+ } -+ -+ if (need_update) { -+ struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g)); -+ -+ ret = PTR_ERR_OR_ZERO(u); -+ if (ret) -+ goto err; -+ -+ memcpy(u, &g, sizeof(g)); -+ -+ ret = bch2_trans_update(trans, bucket_gens_iter, u, 0); -+ if (ret) -+ goto err; -+ } -+ } -+ -+ *end = bkey_min(*end, bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0)); -+err: -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_trans *trans, -+ struct btree_iter *iter) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter alloc_iter; -+ struct bkey_s_c alloc_k; -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a; -+ u64 genbits; -+ struct bpos pos; -+ enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard -+ ? BCH_DATA_need_discard -+ : BCH_DATA_free; -+ struct printbuf buf = PRINTBUF; -+ int ret; -+ -+ pos = iter->pos; -+ pos.offset &= ~(~0ULL << 56); -+ genbits = iter->pos.offset & (~0ULL << 56); -+ -+ alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, pos, 0); -+ ret = bkey_err(alloc_k); -+ if (ret) -+ return ret; -+ -+ if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c, -+ need_discard_freespace_key_to_invalid_dev_bucket, -+ "entry in %s btree for nonexistant dev:bucket %llu:%llu", -+ bch2_btree_id_str(iter->btree_id), pos.inode, pos.offset)) -+ goto delete; -+ -+ a = bch2_alloc_to_v4(alloc_k, &a_convert); -+ -+ if (fsck_err_on(a->data_type != state || -+ (state == BCH_DATA_free && -+ genbits != alloc_freespace_genbits(*a)), c, -+ need_discard_freespace_key_bad, -+ "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", -+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), -+ bch2_btree_id_str(iter->btree_id), -+ iter->pos.inode, -+ iter->pos.offset, -+ a->data_type == state, -+ genbits >> 56, alloc_freespace_genbits(*a) >> 56)) -+ goto delete; -+out: -+fsck_err: -+ set_btree_iter_dontneed(&alloc_iter); -+ bch2_trans_iter_exit(trans, &alloc_iter); -+ printbuf_exit(&buf); -+ return ret; -+delete: -+ ret = bch2_btree_delete_extent_at(trans, iter, -+ iter->btree_id == BTREE_ID_freespace ? 1 : 0, 0) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW); -+ goto out; -+} -+ -+static int bch2_check_discard_freespace_key(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bpos end) -+{ -+ if (!btree_id_is_extents(iter->btree_id)) { -+ return __bch2_check_discard_freespace_key(trans, iter); -+ } else { -+ int ret = 0; -+ -+ while (!bkey_eq(iter->pos, end) && -+ !(ret = btree_trans_too_many_iters(trans) ?: -+ __bch2_check_discard_freespace_key(trans, iter))) -+ bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos)); -+ -+ return ret; -+ } -+} -+ -+/* -+ * We've already checked that generation numbers in the bucket_gens btree are -+ * valid for buckets that exist; this just checks for keys for nonexistent -+ * buckets. -+ */ -+static noinline_for_stack -+int bch2_check_bucket_gens_key(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_i_bucket_gens g; -+ struct bch_dev *ca; -+ u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; -+ u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; -+ u64 b; -+ bool need_update = false, dev_exists; -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ BUG_ON(k.k->type != KEY_TYPE_bucket_gens); -+ bkey_reassemble(&g.k_i, k); -+ -+ /* if no bch_dev, skip out whether we repair or not */ -+ dev_exists = bch2_dev_exists2(c, k.k->p.inode); -+ if (!dev_exists) { -+ if (fsck_err_on(!dev_exists, c, -+ bucket_gens_to_invalid_dev, -+ "bucket_gens key for invalid device:\n %s", -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -+ ret = bch2_btree_delete_at(trans, iter, 0); -+ } -+ goto out; -+ } -+ -+ ca = bch_dev_bkey_exists(c, k.k->p.inode); -+ if (fsck_err_on(end <= ca->mi.first_bucket || -+ start >= ca->mi.nbuckets, c, -+ bucket_gens_to_invalid_buckets, -+ "bucket_gens key for invalid buckets:\n %s", -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -+ ret = bch2_btree_delete_at(trans, iter, 0); -+ goto out; -+ } -+ -+ for (b = start; b < ca->mi.first_bucket; b++) -+ if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c, -+ bucket_gens_nonzero_for_invalid_buckets, -+ "bucket_gens key has nonzero gen for invalid bucket")) { -+ g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0; -+ need_update = true; -+ } -+ -+ for (b = ca->mi.nbuckets; b < end; b++) -+ if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c, -+ bucket_gens_nonzero_for_invalid_buckets, -+ "bucket_gens key has nonzero gen for invalid bucket")) { -+ g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0; -+ need_update = true; -+ } -+ -+ if (need_update) { -+ struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g)); -+ -+ ret = PTR_ERR_OR_ZERO(u); -+ if (ret) -+ goto out; -+ -+ memcpy(u, &g, sizeof(g)); -+ ret = bch2_trans_update(trans, iter, u, 0); -+ } -+out: -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+int bch2_check_alloc_info(struct bch_fs *c) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter; -+ struct bkey hole; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN, -+ BTREE_ITER_PREFETCH); -+ bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN, -+ BTREE_ITER_PREFETCH); -+ bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace, POS_MIN, -+ BTREE_ITER_PREFETCH); -+ bch2_trans_iter_init(trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN, -+ BTREE_ITER_PREFETCH); -+ -+ while (1) { -+ struct bpos next; -+ -+ bch2_trans_begin(trans); -+ -+ k = bch2_get_key_or_real_bucket_hole(&iter, &hole); -+ ret = bkey_err(k); -+ if (ret) -+ goto bkey_err; -+ -+ if (!k.k) -+ break; -+ -+ if (k.k->type) { -+ next = bpos_nosnap_successor(k.k->p); -+ -+ ret = bch2_check_alloc_key(trans, -+ k, &iter, -+ &discard_iter, -+ &freespace_iter, -+ &bucket_gens_iter); -+ if (ret) -+ goto bkey_err; -+ } else { -+ next = k.k->p; -+ -+ ret = bch2_check_alloc_hole_freespace(trans, -+ bkey_start_pos(k.k), -+ &next, -+ &freespace_iter) ?: -+ bch2_check_alloc_hole_bucket_gens(trans, -+ bkey_start_pos(k.k), -+ &next, -+ &bucket_gens_iter); -+ if (ret) -+ goto bkey_err; -+ } -+ -+ ret = bch2_trans_commit(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL| -+ BTREE_INSERT_LAZY_RW); -+ if (ret) -+ goto bkey_err; -+ -+ bch2_btree_iter_set_pos(&iter, next); -+bkey_err: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret) -+ break; -+ } -+ bch2_trans_iter_exit(trans, &bucket_gens_iter); -+ bch2_trans_iter_exit(trans, &freespace_iter); -+ bch2_trans_iter_exit(trans, &discard_iter); -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (ret < 0) -+ goto err; -+ -+ ret = for_each_btree_key2(trans, iter, -+ BTREE_ID_need_discard, POS_MIN, -+ BTREE_ITER_PREFETCH, k, -+ bch2_check_discard_freespace_key(trans, &iter, k.k->p)) ?: -+ for_each_btree_key2(trans, iter, -+ BTREE_ID_freespace, POS_MIN, -+ BTREE_ITER_PREFETCH, k, -+ bch2_check_discard_freespace_key(trans, &iter, k.k->p)) ?: -+ for_each_btree_key_commit(trans, iter, -+ BTREE_ID_bucket_gens, POS_MIN, -+ BTREE_ITER_PREFETCH, k, -+ NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, -+ bch2_check_bucket_gens_key(trans, &iter, k)); -+err: -+ bch2_trans_put(trans); -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, -+ struct btree_iter *alloc_iter) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter lru_iter; -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a; -+ struct bkey_s_c alloc_k, lru_k; -+ struct printbuf buf = PRINTBUF; -+ int ret; -+ -+ alloc_k = bch2_btree_iter_peek(alloc_iter); -+ if (!alloc_k.k) -+ return 0; -+ -+ ret = bkey_err(alloc_k); -+ if (ret) -+ return ret; -+ -+ a = bch2_alloc_to_v4(alloc_k, &a_convert); -+ -+ if (a->data_type != BCH_DATA_cached) -+ return 0; -+ -+ lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, -+ lru_pos(alloc_k.k->p.inode, -+ bucket_to_u64(alloc_k.k->p), -+ a->io_time[READ]), 0); -+ ret = bkey_err(lru_k); -+ if (ret) -+ return ret; -+ -+ if (fsck_err_on(!a->io_time[READ], c, -+ alloc_key_cached_but_read_time_zero, -+ "cached bucket with read_time 0\n" -+ " %s", -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) || -+ fsck_err_on(lru_k.k->type != KEY_TYPE_set, c, -+ alloc_key_to_missing_lru_entry, -+ "missing lru entry\n" -+ " %s", -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { -+ u64 read_time = a->io_time[READ] ?: -+ atomic64_read(&c->io_clock[READ].now); -+ -+ ret = bch2_lru_set(trans, -+ alloc_k.k->p.inode, -+ bucket_to_u64(alloc_k.k->p), -+ read_time); -+ if (ret) -+ goto err; -+ -+ if (a->io_time[READ] != read_time) { -+ struct bkey_i_alloc_v4 *a_mut = -+ bch2_alloc_to_v4_mut(trans, alloc_k); -+ ret = PTR_ERR_OR_ZERO(a_mut); -+ if (ret) -+ goto err; -+ -+ a_mut->v.io_time[READ] = read_time; -+ ret = bch2_trans_update(trans, alloc_iter, -+ &a_mut->k_i, BTREE_TRIGGER_NORUN); -+ if (ret) -+ goto err; -+ } -+ } -+err: -+fsck_err: -+ bch2_trans_iter_exit(trans, &lru_iter); -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+int bch2_check_alloc_to_lru_refs(struct bch_fs *c) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ ret = bch2_trans_run(c, -+ for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, -+ POS_MIN, BTREE_ITER_PREFETCH, k, -+ NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, -+ bch2_check_alloc_to_lru_ref(trans, &iter))); -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int bch2_discard_one_bucket(struct btree_trans *trans, -+ struct btree_iter *need_discard_iter, -+ struct bpos *discard_pos_done, -+ u64 *seen, -+ u64 *open, -+ u64 *need_journal_commit, -+ u64 *discarded) -+{ -+ struct bch_fs *c = trans->c; -+ struct bpos pos = need_discard_iter->pos; -+ struct btree_iter iter = { NULL }; -+ struct bkey_s_c k; -+ struct bch_dev *ca; -+ struct bkey_i_alloc_v4 *a; -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ ca = bch_dev_bkey_exists(c, pos.inode); -+ if (!percpu_ref_tryget(&ca->io_ref)) { -+ bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0)); -+ return 0; -+ } -+ -+ if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) { -+ (*open)++; -+ goto out; -+ } -+ -+ if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, -+ c->journal.flushed_seq_ondisk, -+ pos.inode, pos.offset)) { -+ (*need_journal_commit)++; -+ goto out; -+ } -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, -+ need_discard_iter->pos, -+ BTREE_ITER_CACHED); -+ ret = bkey_err(k); -+ if (ret) -+ goto out; -+ -+ a = bch2_alloc_to_v4_mut(trans, k); -+ ret = PTR_ERR_OR_ZERO(a); -+ if (ret) -+ goto out; -+ -+ if (BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) { -+ a->v.gen++; -+ SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); -+ goto write; -+ } -+ -+ if (a->v.journal_seq > c->journal.flushed_seq_ondisk) { -+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { -+ bch2_trans_inconsistent(trans, -+ "clearing need_discard but journal_seq %llu > flushed_seq %llu\n" -+ "%s", -+ a->v.journal_seq, -+ c->journal.flushed_seq_ondisk, -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); -+ ret = -EIO; -+ } -+ goto out; -+ } -+ -+ if (a->v.data_type != BCH_DATA_need_discard) { -+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { -+ bch2_trans_inconsistent(trans, -+ "bucket incorrectly set in need_discard btree\n" -+ "%s", -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); -+ ret = -EIO; -+ } -+ -+ goto out; -+ } -+ -+ if (!bkey_eq(*discard_pos_done, iter.pos) && -+ ca->mi.discard && !c->opts.nochanges) { -+ /* -+ * This works without any other locks because this is the only -+ * thread that removes items from the need_discard tree -+ */ -+ bch2_trans_unlock(trans); -+ blkdev_issue_discard(ca->disk_sb.bdev, -+ k.k->p.offset * ca->mi.bucket_size, -+ ca->mi.bucket_size, -+ GFP_KERNEL); -+ *discard_pos_done = iter.pos; -+ -+ ret = bch2_trans_relock_notrace(trans); -+ if (ret) -+ goto out; -+ } -+ -+ SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); -+ a->v.data_type = alloc_data_type(a->v, a->v.data_type); -+write: -+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BCH_WATERMARK_btree| -+ BTREE_INSERT_NOFAIL); -+ if (ret) -+ goto out; -+ -+ this_cpu_inc(c->counters[BCH_COUNTER_bucket_discard]); -+ (*discarded)++; -+out: -+ (*seen)++; -+ bch2_trans_iter_exit(trans, &iter); -+ percpu_ref_put(&ca->io_ref); -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static void bch2_do_discards_work(struct work_struct *work) -+{ -+ struct bch_fs *c = container_of(work, struct bch_fs, discard_work); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0; -+ struct bpos discard_pos_done = POS_MAX; -+ int ret; -+ -+ /* -+ * We're doing the commit in bch2_discard_one_bucket instead of using -+ * for_each_btree_key_commit() so that we can increment counters after -+ * successful commit: -+ */ -+ ret = bch2_trans_run(c, -+ for_each_btree_key2(trans, iter, -+ BTREE_ID_need_discard, POS_MIN, 0, k, -+ bch2_discard_one_bucket(trans, &iter, &discard_pos_done, -+ &seen, -+ &open, -+ &need_journal_commit, -+ &discarded))); -+ -+ if (need_journal_commit * 2 > seen) -+ bch2_journal_flush_async(&c->journal, NULL); -+ -+ bch2_write_ref_put(c, BCH_WRITE_REF_discard); -+ -+ trace_discard_buckets(c, seen, open, need_journal_commit, discarded, -+ bch2_err_str(ret)); -+} -+ -+void bch2_do_discards(struct bch_fs *c) -+{ -+ if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) && -+ !queue_work(c->write_ref_wq, &c->discard_work)) -+ bch2_write_ref_put(c, BCH_WRITE_REF_discard); -+} -+ -+static int invalidate_one_bucket(struct btree_trans *trans, -+ struct btree_iter *lru_iter, -+ struct bkey_s_c lru_k, -+ s64 *nr_to_invalidate) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter alloc_iter = { NULL }; -+ struct bkey_i_alloc_v4 *a = NULL; -+ struct printbuf buf = PRINTBUF; -+ struct bpos bucket = u64_to_bucket(lru_k.k->p.offset); -+ unsigned cached_sectors; -+ int ret = 0; -+ -+ if (*nr_to_invalidate <= 0) -+ return 1; -+ -+ if (!bch2_dev_bucket_exists(c, bucket)) { -+ prt_str(&buf, "lru entry points to invalid bucket"); -+ goto err; -+ } -+ -+ if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset)) -+ return 0; -+ -+ a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket); -+ ret = PTR_ERR_OR_ZERO(a); -+ if (ret) -+ goto out; -+ -+ /* We expect harmless races here due to the btree write buffer: */ -+ if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(a->v)) -+ goto out; -+ -+ BUG_ON(a->v.data_type != BCH_DATA_cached); -+ -+ if (!a->v.cached_sectors) -+ bch_err(c, "invalidating empty bucket, confused"); -+ -+ cached_sectors = a->v.cached_sectors; -+ -+ SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); -+ a->v.gen++; -+ a->v.data_type = 0; -+ a->v.dirty_sectors = 0; -+ a->v.cached_sectors = 0; -+ a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now); -+ a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now); -+ -+ ret = bch2_trans_update(trans, &alloc_iter, &a->k_i, -+ BTREE_TRIGGER_BUCKET_INVALIDATE) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BCH_WATERMARK_btree| -+ BTREE_INSERT_NOFAIL); -+ if (ret) -+ goto out; -+ -+ trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors); -+ --*nr_to_invalidate; -+out: -+ bch2_trans_iter_exit(trans, &alloc_iter); -+ printbuf_exit(&buf); -+ return ret; -+err: -+ prt_str(&buf, "\n lru key: "); -+ bch2_bkey_val_to_text(&buf, c, lru_k); -+ -+ prt_str(&buf, "\n lru entry: "); -+ bch2_lru_pos_to_text(&buf, lru_iter->pos); -+ -+ prt_str(&buf, "\n alloc key: "); -+ if (!a) -+ bch2_bpos_to_text(&buf, bucket); -+ else -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); -+ -+ bch_err(c, "%s", buf.buf); -+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_lrus) { -+ bch2_inconsistent_error(c); -+ ret = -EINVAL; -+ } -+ -+ goto out; -+} -+ -+static void bch2_do_invalidates_work(struct work_struct *work) -+{ -+ struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work); -+ struct bch_dev *ca; -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ unsigned i; -+ int ret = 0; -+ -+ ret = bch2_btree_write_buffer_flush(trans); -+ if (ret) -+ goto err; -+ -+ for_each_member_device(ca, c, i) { -+ s64 nr_to_invalidate = -+ should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); -+ -+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru, -+ lru_pos(ca->dev_idx, 0, 0), -+ lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX), -+ BTREE_ITER_INTENT, k, -+ invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate)); -+ -+ if (ret < 0) { -+ percpu_ref_put(&ca->ref); -+ break; -+ } -+ } -+err: -+ bch2_trans_put(trans); -+ bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); -+} -+ -+void bch2_do_invalidates(struct bch_fs *c) -+{ -+ if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) && -+ !queue_work(c->write_ref_wq, &c->invalidate_work)) -+ bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); -+} -+ -+int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, -+ u64 bucket_start, u64 bucket_end) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bkey hole; -+ struct bpos end = POS(ca->dev_idx, bucket_end); -+ struct bch_member *m; -+ unsigned long last_updated = jiffies; -+ int ret; -+ -+ BUG_ON(bucket_start > bucket_end); -+ BUG_ON(bucket_end > ca->mi.nbuckets); -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, -+ POS(ca->dev_idx, max_t(u64, ca->mi.first_bucket, bucket_start)), -+ BTREE_ITER_PREFETCH); -+ /* -+ * Scan the alloc btree for every bucket on @ca, and add buckets to the -+ * freespace/need_discard/need_gc_gens btrees as needed: -+ */ -+ while (1) { -+ if (last_updated + HZ * 10 < jiffies) { -+ bch_info(ca, "%s: currently at %llu/%llu", -+ __func__, iter.pos.offset, ca->mi.nbuckets); -+ last_updated = jiffies; -+ } -+ -+ bch2_trans_begin(trans); -+ -+ if (bkey_ge(iter.pos, end)) { -+ ret = 0; -+ break; -+ } -+ -+ k = bch2_get_key_or_hole(&iter, end, &hole); -+ ret = bkey_err(k); -+ if (ret) -+ goto bkey_err; -+ -+ if (k.k->type) { -+ /* -+ * We process live keys in the alloc btree one at a -+ * time: -+ */ -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert); -+ -+ ret = bch2_bucket_do_index(trans, k, a, true) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BTREE_INSERT_LAZY_RW| -+ BTREE_INSERT_NOFAIL); -+ if (ret) -+ goto bkey_err; -+ -+ bch2_btree_iter_advance(&iter); -+ } else { -+ struct bkey_i *freespace; -+ -+ freespace = bch2_trans_kmalloc(trans, sizeof(*freespace)); -+ ret = PTR_ERR_OR_ZERO(freespace); -+ if (ret) -+ goto bkey_err; -+ -+ bkey_init(&freespace->k); -+ freespace->k.type = KEY_TYPE_set; -+ freespace->k.p = k.k->p; -+ freespace->k.size = k.k->size; -+ -+ ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BTREE_INSERT_LAZY_RW| -+ BTREE_INSERT_NOFAIL); -+ if (ret) -+ goto bkey_err; -+ -+ bch2_btree_iter_set_pos(&iter, k.k->p); -+ } -+bkey_err: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret) -+ break; -+ } -+ -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ -+ if (ret < 0) { -+ bch_err_msg(ca, ret, "initializing free space"); -+ return ret; -+ } -+ -+ mutex_lock(&c->sb_lock); -+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -+ SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true); -+ mutex_unlock(&c->sb_lock); -+ -+ return 0; -+} -+ -+int bch2_fs_freespace_init(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ unsigned i; -+ int ret = 0; -+ bool doing_init = false; -+ -+ /* -+ * We can crash during the device add path, so we need to check this on -+ * every mount: -+ */ -+ -+ for_each_member_device(ca, c, i) { -+ if (ca->mi.freespace_initialized) -+ continue; -+ -+ if (!doing_init) { -+ bch_info(c, "initializing freespace"); -+ doing_init = true; -+ } -+ -+ ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); -+ if (ret) { -+ percpu_ref_put(&ca->ref); -+ bch_err_fn(c, ret); -+ return ret; -+ } -+ } -+ -+ if (doing_init) { -+ mutex_lock(&c->sb_lock); -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ bch_verbose(c, "done initializing freespace"); -+ } -+ -+ return 0; -+} -+ -+/* Bucket IO clocks: */ -+ -+int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, -+ size_t bucket_nr, int rw) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_i_alloc_v4 *a; -+ u64 now; -+ int ret = 0; -+ -+ a = bch2_trans_start_alloc_update(trans, &iter, POS(dev, bucket_nr)); -+ ret = PTR_ERR_OR_ZERO(a); -+ if (ret) -+ return ret; -+ -+ now = atomic64_read(&c->io_clock[rw].now); -+ if (a->v.io_time[rw] == now) -+ goto out; -+ -+ a->v.io_time[rw] = now; -+ -+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: -+ bch2_trans_commit(trans, NULL, NULL, 0); -+out: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+/* Startup/shutdown (ro/rw): */ -+ -+void bch2_recalc_capacity(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ u64 capacity = 0, reserved_sectors = 0, gc_reserve; -+ unsigned bucket_size_max = 0; -+ unsigned long ra_pages = 0; -+ unsigned i; -+ -+ lockdep_assert_held(&c->state_lock); -+ -+ for_each_online_member(ca, c, i) { -+ struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_disk->bdi; -+ -+ ra_pages += bdi->ra_pages; -+ } -+ -+ bch2_set_ra_pages(c, ra_pages); -+ -+ for_each_rw_member(ca, c, i) { -+ u64 dev_reserve = 0; -+ -+ /* -+ * We need to reserve buckets (from the number -+ * of currently available buckets) against -+ * foreground writes so that mainly copygc can -+ * make forward progress. -+ * -+ * We need enough to refill the various reserves -+ * from scratch - copygc will use its entire -+ * reserve all at once, then run against when -+ * its reserve is refilled (from the formerly -+ * available buckets). -+ * -+ * This reserve is just used when considering if -+ * allocations for foreground writes must wait - -+ * not -ENOSPC calculations. -+ */ -+ -+ dev_reserve += ca->nr_btree_reserve * 2; -+ dev_reserve += ca->mi.nbuckets >> 6; /* copygc reserve */ -+ -+ dev_reserve += 1; /* btree write point */ -+ dev_reserve += 1; /* copygc write point */ -+ dev_reserve += 1; /* rebalance write point */ -+ -+ dev_reserve *= ca->mi.bucket_size; -+ -+ capacity += bucket_to_sector(ca, ca->mi.nbuckets - -+ ca->mi.first_bucket); -+ -+ reserved_sectors += dev_reserve * 2; -+ -+ bucket_size_max = max_t(unsigned, bucket_size_max, -+ ca->mi.bucket_size); -+ } -+ -+ gc_reserve = c->opts.gc_reserve_bytes -+ ? c->opts.gc_reserve_bytes >> 9 -+ : div64_u64(capacity * c->opts.gc_reserve_percent, 100); -+ -+ reserved_sectors = max(gc_reserve, reserved_sectors); -+ -+ reserved_sectors = min(reserved_sectors, capacity); -+ -+ c->capacity = capacity - reserved_sectors; -+ -+ c->bucket_size_max = bucket_size_max; -+ -+ /* Wake up case someone was waiting for buckets */ -+ closure_wake_up(&c->freelist_wait); -+} -+ -+u64 bch2_min_rw_member_capacity(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ unsigned i; -+ u64 ret = U64_MAX; -+ -+ for_each_rw_member(ca, c, i) -+ ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size); -+ return ret; -+} -+ -+static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca) -+{ -+ struct open_bucket *ob; -+ bool ret = false; -+ -+ for (ob = c->open_buckets; -+ ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); -+ ob++) { -+ spin_lock(&ob->lock); -+ if (ob->valid && !ob->on_partial_list && -+ ob->dev == ca->dev_idx) -+ ret = true; -+ spin_unlock(&ob->lock); -+ } -+ -+ return ret; -+} -+ -+/* device goes ro: */ -+void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca) -+{ -+ unsigned i; -+ -+ /* First, remove device from allocation groups: */ -+ -+ for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++) -+ clear_bit(ca->dev_idx, c->rw_devs[i].d); -+ -+ /* -+ * Capacity is calculated based off of devices in allocation groups: -+ */ -+ bch2_recalc_capacity(c); -+ -+ bch2_open_buckets_stop(c, ca, false); -+ -+ /* -+ * Wake up threads that were blocked on allocation, so they can notice -+ * the device can no longer be removed and the capacity has changed: -+ */ -+ closure_wake_up(&c->freelist_wait); -+ -+ /* -+ * journal_res_get() can block waiting for free space in the journal - -+ * it needs to notice there may not be devices to allocate from anymore: -+ */ -+ wake_up(&c->journal.wait); -+ -+ /* Now wait for any in flight writes: */ -+ -+ closure_wait_event(&c->open_buckets_wait, -+ !bch2_dev_has_open_write_point(c, ca)); -+} -+ -+/* device goes rw: */ -+void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca) -+{ -+ unsigned i; -+ -+ for (i = 0; i < ARRAY_SIZE(c->rw_devs); i++) -+ if (ca->mi.data_allowed & (1 << i)) -+ set_bit(ca->dev_idx, c->rw_devs[i].d); -+} -+ -+void bch2_fs_allocator_background_init(struct bch_fs *c) -+{ -+ spin_lock_init(&c->freelist_lock); -+ INIT_WORK(&c->discard_work, bch2_do_discards_work); -+ INIT_WORK(&c->invalidate_work, bch2_do_invalidates_work); -+} -diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h -new file mode 100644 -index 000000000000..73faf99a222a ---- /dev/null -+++ b/fs/bcachefs/alloc_background.h -@@ -0,0 +1,259 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_ALLOC_BACKGROUND_H -+#define _BCACHEFS_ALLOC_BACKGROUND_H -+ -+#include "bcachefs.h" -+#include "alloc_types.h" -+#include "buckets.h" -+#include "debug.h" -+#include "super.h" -+ -+enum bkey_invalid_flags; -+ -+/* How out of date a pointer gen is allowed to be: */ -+#define BUCKET_GC_GEN_MAX 96U -+ -+static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos) -+{ -+ struct bch_dev *ca; -+ -+ if (!bch2_dev_exists2(c, pos.inode)) -+ return false; -+ -+ ca = bch_dev_bkey_exists(c, pos.inode); -+ return pos.offset >= ca->mi.first_bucket && -+ pos.offset < ca->mi.nbuckets; -+} -+ -+static inline u64 bucket_to_u64(struct bpos bucket) -+{ -+ return (bucket.inode << 48) | bucket.offset; -+} -+ -+static inline struct bpos u64_to_bucket(u64 bucket) -+{ -+ return POS(bucket >> 48, bucket & ~(~0ULL << 48)); -+} -+ -+static inline u8 alloc_gc_gen(struct bch_alloc_v4 a) -+{ -+ return a.gen - a.oldest_gen; -+} -+ -+static inline enum bch_data_type __alloc_data_type(u32 dirty_sectors, -+ u32 cached_sectors, -+ u32 stripe, -+ struct bch_alloc_v4 a, -+ enum bch_data_type data_type) -+{ -+ if (stripe) -+ return data_type == BCH_DATA_parity ? data_type : BCH_DATA_stripe; -+ if (dirty_sectors) -+ return data_type; -+ if (cached_sectors) -+ return BCH_DATA_cached; -+ if (BCH_ALLOC_V4_NEED_DISCARD(&a)) -+ return BCH_DATA_need_discard; -+ if (alloc_gc_gen(a) >= BUCKET_GC_GEN_MAX) -+ return BCH_DATA_need_gc_gens; -+ return BCH_DATA_free; -+} -+ -+static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a, -+ enum bch_data_type data_type) -+{ -+ return __alloc_data_type(a.dirty_sectors, a.cached_sectors, -+ a.stripe, a, data_type); -+} -+ -+static inline enum bch_data_type bucket_data_type(enum bch_data_type data_type) -+{ -+ return data_type == BCH_DATA_stripe ? BCH_DATA_user : data_type; -+} -+ -+static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) -+{ -+ return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; -+} -+ -+#define DATA_TYPES_MOVABLE \ -+ ((1U << BCH_DATA_btree)| \ -+ (1U << BCH_DATA_user)| \ -+ (1U << BCH_DATA_stripe)) -+ -+static inline bool data_type_movable(enum bch_data_type type) -+{ -+ return (1U << type) & DATA_TYPES_MOVABLE; -+} -+ -+static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, -+ struct bch_dev *ca) -+{ -+ if (!data_type_movable(a.data_type) || -+ a.dirty_sectors >= ca->mi.bucket_size) -+ return 0; -+ -+ return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size); -+} -+ -+static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a) -+{ -+ return ((u64) alloc_gc_gen(a) >> 4) << 56; -+} -+ -+static inline struct bpos alloc_freespace_pos(struct bpos pos, struct bch_alloc_v4 a) -+{ -+ pos.offset |= alloc_freespace_genbits(a); -+ return pos; -+} -+ -+static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a) -+{ -+ unsigned ret = (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?: -+ BCH_ALLOC_V4_U64s_V0) + -+ BCH_ALLOC_V4_NR_BACKPOINTERS(a) * -+ (sizeof(struct bch_backpointer) / sizeof(u64)); -+ -+ BUG_ON(ret > U8_MAX - BKEY_U64s); -+ return ret; -+} -+ -+static inline void set_alloc_v4_u64s(struct bkey_i_alloc_v4 *a) -+{ -+ set_bkey_val_u64s(&a->k, alloc_v4_u64s(&a->v)); -+} -+ -+struct bkey_i_alloc_v4 * -+bch2_trans_start_alloc_update(struct btree_trans *, struct btree_iter *, struct bpos); -+ -+void __bch2_alloc_to_v4(struct bkey_s_c, struct bch_alloc_v4 *); -+ -+static inline const struct bch_alloc_v4 *bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *convert) -+{ -+ const struct bch_alloc_v4 *ret; -+ -+ if (unlikely(k.k->type != KEY_TYPE_alloc_v4)) -+ goto slowpath; -+ -+ ret = bkey_s_c_to_alloc_v4(k).v; -+ if (BCH_ALLOC_V4_BACKPOINTERS_START(ret) != BCH_ALLOC_V4_U64s) -+ goto slowpath; -+ -+ return ret; -+slowpath: -+ __bch2_alloc_to_v4(k, convert); -+ return convert; -+} -+ -+struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s_c); -+ -+int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); -+ -+int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_alloc_v4_swab(struct bkey_s); -+void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+ -+#define bch2_bkey_ops_alloc ((struct bkey_ops) { \ -+ .key_invalid = bch2_alloc_v1_invalid, \ -+ .val_to_text = bch2_alloc_to_text, \ -+ .trans_trigger = bch2_trans_mark_alloc, \ -+ .atomic_trigger = bch2_mark_alloc, \ -+ .min_val_size = 8, \ -+}) -+ -+#define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \ -+ .key_invalid = bch2_alloc_v2_invalid, \ -+ .val_to_text = bch2_alloc_to_text, \ -+ .trans_trigger = bch2_trans_mark_alloc, \ -+ .atomic_trigger = bch2_mark_alloc, \ -+ .min_val_size = 8, \ -+}) -+ -+#define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \ -+ .key_invalid = bch2_alloc_v3_invalid, \ -+ .val_to_text = bch2_alloc_to_text, \ -+ .trans_trigger = bch2_trans_mark_alloc, \ -+ .atomic_trigger = bch2_mark_alloc, \ -+ .min_val_size = 16, \ -+}) -+ -+#define bch2_bkey_ops_alloc_v4 ((struct bkey_ops) { \ -+ .key_invalid = bch2_alloc_v4_invalid, \ -+ .val_to_text = bch2_alloc_to_text, \ -+ .swab = bch2_alloc_v4_swab, \ -+ .trans_trigger = bch2_trans_mark_alloc, \ -+ .atomic_trigger = bch2_mark_alloc, \ -+ .min_val_size = 48, \ -+}) -+ -+int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+ -+#define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \ -+ .key_invalid = bch2_bucket_gens_invalid, \ -+ .val_to_text = bch2_bucket_gens_to_text, \ -+}) -+ -+int bch2_bucket_gens_init(struct bch_fs *); -+ -+static inline bool bkey_is_alloc(const struct bkey *k) -+{ -+ return k->type == KEY_TYPE_alloc || -+ k->type == KEY_TYPE_alloc_v2 || -+ k->type == KEY_TYPE_alloc_v3; -+} -+ -+int bch2_alloc_read(struct bch_fs *); -+ -+int bch2_trans_mark_alloc(struct btree_trans *, enum btree_id, unsigned, -+ struct bkey_s_c, struct bkey_i *, unsigned); -+int bch2_check_alloc_info(struct bch_fs *); -+int bch2_check_alloc_to_lru_refs(struct bch_fs *); -+void bch2_do_discards(struct bch_fs *); -+ -+static inline u64 should_invalidate_buckets(struct bch_dev *ca, -+ struct bch_dev_usage u) -+{ -+ u64 want_free = ca->mi.nbuckets >> 7; -+ u64 free = max_t(s64, 0, -+ u.d[BCH_DATA_free].buckets -+ + u.d[BCH_DATA_need_discard].buckets -+ - bch2_dev_buckets_reserved(ca, BCH_WATERMARK_stripe)); -+ -+ return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets); -+} -+ -+void bch2_do_invalidates(struct bch_fs *); -+ -+static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a) -+{ -+ return (void *) ((u64 *) &a->v + -+ (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?: -+ BCH_ALLOC_V4_U64s_V0)); -+} -+ -+static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct bch_alloc_v4 *a) -+{ -+ return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a)); -+} -+ -+int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64); -+int bch2_fs_freespace_init(struct bch_fs *); -+ -+void bch2_recalc_capacity(struct bch_fs *); -+u64 bch2_min_rw_member_capacity(struct bch_fs *); -+ -+void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *); -+void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *); -+ -+void bch2_fs_allocator_background_init(struct bch_fs *); -+ -+#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */ -diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c -new file mode 100644 -index 000000000000..b85c7765272f ---- /dev/null -+++ b/fs/bcachefs/alloc_foreground.c -@@ -0,0 +1,1600 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright 2012 Google, Inc. -+ * -+ * Foreground allocator code: allocate buckets from freelist, and allocate in -+ * sector granularity from writepoints. -+ * -+ * bch2_bucket_alloc() allocates a single bucket from a specific device. -+ * -+ * bch2_bucket_alloc_set() allocates one or more buckets from different devices -+ * in a given filesystem. -+ */ -+ -+#include "bcachefs.h" -+#include "alloc_background.h" -+#include "alloc_foreground.h" -+#include "backpointers.h" -+#include "btree_iter.h" -+#include "btree_update.h" -+#include "btree_gc.h" -+#include "buckets.h" -+#include "buckets_waiting_for_journal.h" -+#include "clock.h" -+#include "debug.h" -+#include "disk_groups.h" -+#include "ec.h" -+#include "error.h" -+#include "io_write.h" -+#include "journal.h" -+#include "movinggc.h" -+#include "nocow_locking.h" -+#include "trace.h" -+ -+#include -+#include -+#include -+ -+static void bch2_trans_mutex_lock_norelock(struct btree_trans *trans, -+ struct mutex *lock) -+{ -+ if (!mutex_trylock(lock)) { -+ bch2_trans_unlock(trans); -+ mutex_lock(lock); -+ } -+} -+ -+const char * const bch2_watermarks[] = { -+#define x(t) #t, -+ BCH_WATERMARKS() -+#undef x -+ NULL -+}; -+ -+/* -+ * Open buckets represent a bucket that's currently being allocated from. They -+ * serve two purposes: -+ * -+ * - They track buckets that have been partially allocated, allowing for -+ * sub-bucket sized allocations - they're used by the sector allocator below -+ * -+ * - They provide a reference to the buckets they own that mark and sweep GC -+ * can find, until the new allocation has a pointer to it inserted into the -+ * btree -+ * -+ * When allocating some space with the sector allocator, the allocation comes -+ * with a reference to an open bucket - the caller is required to put that -+ * reference _after_ doing the index update that makes its allocation reachable. -+ */ -+ -+void bch2_reset_alloc_cursors(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ unsigned i; -+ -+ rcu_read_lock(); -+ for_each_member_device_rcu(ca, c, i, NULL) -+ ca->alloc_cursor = 0; -+ rcu_read_unlock(); -+} -+ -+static void bch2_open_bucket_hash_add(struct bch_fs *c, struct open_bucket *ob) -+{ -+ open_bucket_idx_t idx = ob - c->open_buckets; -+ open_bucket_idx_t *slot = open_bucket_hashslot(c, ob->dev, ob->bucket); -+ -+ ob->hash = *slot; -+ *slot = idx; -+} -+ -+static void bch2_open_bucket_hash_remove(struct bch_fs *c, struct open_bucket *ob) -+{ -+ open_bucket_idx_t idx = ob - c->open_buckets; -+ open_bucket_idx_t *slot = open_bucket_hashslot(c, ob->dev, ob->bucket); -+ -+ while (*slot != idx) { -+ BUG_ON(!*slot); -+ slot = &c->open_buckets[*slot].hash; -+ } -+ -+ *slot = ob->hash; -+ ob->hash = 0; -+} -+ -+void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) -+{ -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); -+ -+ if (ob->ec) { -+ ec_stripe_new_put(c, ob->ec, STRIPE_REF_io); -+ return; -+ } -+ -+ percpu_down_read(&c->mark_lock); -+ spin_lock(&ob->lock); -+ -+ ob->valid = false; -+ ob->data_type = 0; -+ -+ spin_unlock(&ob->lock); -+ percpu_up_read(&c->mark_lock); -+ -+ spin_lock(&c->freelist_lock); -+ bch2_open_bucket_hash_remove(c, ob); -+ -+ ob->freelist = c->open_buckets_freelist; -+ c->open_buckets_freelist = ob - c->open_buckets; -+ -+ c->open_buckets_nr_free++; -+ ca->nr_open_buckets--; -+ spin_unlock(&c->freelist_lock); -+ -+ closure_wake_up(&c->open_buckets_wait); -+} -+ -+void bch2_open_bucket_write_error(struct bch_fs *c, -+ struct open_buckets *obs, -+ unsigned dev) -+{ -+ struct open_bucket *ob; -+ unsigned i; -+ -+ open_bucket_for_each(c, obs, ob, i) -+ if (ob->dev == dev && ob->ec) -+ bch2_ec_bucket_cancel(c, ob); -+} -+ -+static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c) -+{ -+ struct open_bucket *ob; -+ -+ BUG_ON(!c->open_buckets_freelist || !c->open_buckets_nr_free); -+ -+ ob = c->open_buckets + c->open_buckets_freelist; -+ c->open_buckets_freelist = ob->freelist; -+ atomic_set(&ob->pin, 1); -+ ob->data_type = 0; -+ -+ c->open_buckets_nr_free--; -+ return ob; -+} -+ -+static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) -+{ -+ BUG_ON(c->open_buckets_partial_nr >= -+ ARRAY_SIZE(c->open_buckets_partial)); -+ -+ spin_lock(&c->freelist_lock); -+ ob->on_partial_list = true; -+ c->open_buckets_partial[c->open_buckets_partial_nr++] = -+ ob - c->open_buckets; -+ spin_unlock(&c->freelist_lock); -+ -+ closure_wake_up(&c->open_buckets_wait); -+ closure_wake_up(&c->freelist_wait); -+} -+ -+/* _only_ for allocating the journal on a new device: */ -+long bch2_bucket_alloc_new_fs(struct bch_dev *ca) -+{ -+ while (ca->new_fs_bucket_idx < ca->mi.nbuckets) { -+ u64 b = ca->new_fs_bucket_idx++; -+ -+ if (!is_superblock_bucket(ca, b) && -+ (!ca->buckets_nouse || !test_bit(b, ca->buckets_nouse))) -+ return b; -+ } -+ -+ return -1; -+} -+ -+static inline unsigned open_buckets_reserved(enum bch_watermark watermark) -+{ -+ switch (watermark) { -+ case BCH_WATERMARK_reclaim: -+ return 0; -+ case BCH_WATERMARK_btree: -+ case BCH_WATERMARK_btree_copygc: -+ return OPEN_BUCKETS_COUNT / 4; -+ case BCH_WATERMARK_copygc: -+ return OPEN_BUCKETS_COUNT / 3; -+ default: -+ return OPEN_BUCKETS_COUNT / 2; -+ } -+} -+ -+static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, -+ u64 bucket, -+ enum bch_watermark watermark, -+ const struct bch_alloc_v4 *a, -+ struct bucket_alloc_state *s, -+ struct closure *cl) -+{ -+ struct open_bucket *ob; -+ -+ if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) { -+ s->skipped_nouse++; -+ return NULL; -+ } -+ -+ if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) { -+ s->skipped_open++; -+ return NULL; -+ } -+ -+ if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, -+ c->journal.flushed_seq_ondisk, ca->dev_idx, bucket)) { -+ s->skipped_need_journal_commit++; -+ return NULL; -+ } -+ -+ if (bch2_bucket_nocow_is_locked(&c->nocow_locks, POS(ca->dev_idx, bucket))) { -+ s->skipped_nocow++; -+ return NULL; -+ } -+ -+ spin_lock(&c->freelist_lock); -+ -+ if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(watermark))) { -+ if (cl) -+ closure_wait(&c->open_buckets_wait, cl); -+ -+ if (!c->blocked_allocate_open_bucket) -+ c->blocked_allocate_open_bucket = local_clock(); -+ -+ spin_unlock(&c->freelist_lock); -+ return ERR_PTR(-BCH_ERR_open_buckets_empty); -+ } -+ -+ /* Recheck under lock: */ -+ if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) { -+ spin_unlock(&c->freelist_lock); -+ s->skipped_open++; -+ return NULL; -+ } -+ -+ ob = bch2_open_bucket_alloc(c); -+ -+ spin_lock(&ob->lock); -+ -+ ob->valid = true; -+ ob->sectors_free = ca->mi.bucket_size; -+ ob->dev = ca->dev_idx; -+ ob->gen = a->gen; -+ ob->bucket = bucket; -+ spin_unlock(&ob->lock); -+ -+ ca->nr_open_buckets++; -+ bch2_open_bucket_hash_add(c, ob); -+ -+ if (c->blocked_allocate_open_bucket) { -+ bch2_time_stats_update( -+ &c->times[BCH_TIME_blocked_allocate_open_bucket], -+ c->blocked_allocate_open_bucket); -+ c->blocked_allocate_open_bucket = 0; -+ } -+ -+ if (c->blocked_allocate) { -+ bch2_time_stats_update( -+ &c->times[BCH_TIME_blocked_allocate], -+ c->blocked_allocate); -+ c->blocked_allocate = 0; -+ } -+ -+ spin_unlock(&c->freelist_lock); -+ return ob; -+} -+ -+static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca, -+ enum bch_watermark watermark, u64 free_entry, -+ struct bucket_alloc_state *s, -+ struct bkey_s_c freespace_k, -+ struct closure *cl) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter = { NULL }; -+ struct bkey_s_c k; -+ struct open_bucket *ob; -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a; -+ u64 b = free_entry & ~(~0ULL << 56); -+ unsigned genbits = free_entry >> 56; -+ struct printbuf buf = PRINTBUF; -+ int ret; -+ -+ if (b < ca->mi.first_bucket || b >= ca->mi.nbuckets) { -+ prt_printf(&buf, "freespace btree has bucket outside allowed range %u-%llu\n" -+ " freespace key ", -+ ca->mi.first_bucket, ca->mi.nbuckets); -+ bch2_bkey_val_to_text(&buf, c, freespace_k); -+ bch2_trans_inconsistent(trans, "%s", buf.buf); -+ ob = ERR_PTR(-EIO); -+ goto err; -+ } -+ -+ k = bch2_bkey_get_iter(trans, &iter, -+ BTREE_ID_alloc, POS(ca->dev_idx, b), -+ BTREE_ITER_CACHED); -+ ret = bkey_err(k); -+ if (ret) { -+ ob = ERR_PTR(ret); -+ goto err; -+ } -+ -+ a = bch2_alloc_to_v4(k, &a_convert); -+ -+ if (a->data_type != BCH_DATA_free) { -+ if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) { -+ ob = NULL; -+ goto err; -+ } -+ -+ prt_printf(&buf, "non free bucket in freespace btree\n" -+ " freespace key "); -+ bch2_bkey_val_to_text(&buf, c, freespace_k); -+ prt_printf(&buf, "\n "); -+ bch2_bkey_val_to_text(&buf, c, k); -+ bch2_trans_inconsistent(trans, "%s", buf.buf); -+ ob = ERR_PTR(-EIO); -+ goto err; -+ } -+ -+ if (genbits != (alloc_freespace_genbits(*a) >> 56) && -+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { -+ prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n" -+ " freespace key ", -+ genbits, alloc_freespace_genbits(*a) >> 56); -+ bch2_bkey_val_to_text(&buf, c, freespace_k); -+ prt_printf(&buf, "\n "); -+ bch2_bkey_val_to_text(&buf, c, k); -+ bch2_trans_inconsistent(trans, "%s", buf.buf); -+ ob = ERR_PTR(-EIO); -+ goto err; -+ } -+ -+ if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_extents_to_backpointers) { -+ struct bch_backpointer bp; -+ struct bpos bp_pos = POS_MIN; -+ -+ ret = bch2_get_next_backpointer(trans, POS(ca->dev_idx, b), -1, -+ &bp_pos, &bp, -+ BTREE_ITER_NOPRESERVE); -+ if (ret) { -+ ob = ERR_PTR(ret); -+ goto err; -+ } -+ -+ if (!bkey_eq(bp_pos, POS_MAX)) { -+ /* -+ * Bucket may have data in it - we don't call -+ * bc2h_trans_inconnsistent() because fsck hasn't -+ * finished yet -+ */ -+ ob = NULL; -+ goto err; -+ } -+ } -+ -+ ob = __try_alloc_bucket(c, ca, b, watermark, a, s, cl); -+ if (!ob) -+ iter.path->preserve = false; -+err: -+ if (iter.trans && iter.path) -+ set_btree_iter_dontneed(&iter); -+ bch2_trans_iter_exit(trans, &iter); -+ printbuf_exit(&buf); -+ return ob; -+} -+ -+/* -+ * This path is for before the freespace btree is initialized: -+ * -+ * If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock & -+ * journal buckets - journal buckets will be < ca->new_fs_bucket_idx -+ */ -+static noinline struct open_bucket * -+bch2_bucket_alloc_early(struct btree_trans *trans, -+ struct bch_dev *ca, -+ enum bch_watermark watermark, -+ struct bucket_alloc_state *s, -+ struct closure *cl) -+{ -+ struct btree_iter iter, citer; -+ struct bkey_s_c k, ck; -+ struct open_bucket *ob = NULL; -+ u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx); -+ u64 alloc_start = max(first_bucket, READ_ONCE(ca->alloc_cursor)); -+ u64 alloc_cursor = alloc_start; -+ int ret; -+ -+ /* -+ * Scan with an uncached iterator to avoid polluting the key cache. An -+ * uncached iter will return a cached key if one exists, but if not -+ * there is no other underlying protection for the associated key cache -+ * slot. To avoid racing bucket allocations, look up the cached key slot -+ * of any likely allocation candidate before attempting to proceed with -+ * the allocation. This provides proper exclusion on the associated -+ * bucket. -+ */ -+again: -+ for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor), -+ BTREE_ITER_SLOTS, k, ret) { -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a; -+ -+ if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets))) -+ break; -+ -+ if (ca->new_fs_bucket_idx && -+ is_superblock_bucket(ca, k.k->p.offset)) -+ continue; -+ -+ a = bch2_alloc_to_v4(k, &a_convert); -+ if (a->data_type != BCH_DATA_free) -+ continue; -+ -+ /* now check the cached key to serialize concurrent allocs of the bucket */ -+ ck = bch2_bkey_get_iter(trans, &citer, BTREE_ID_alloc, k.k->p, BTREE_ITER_CACHED); -+ ret = bkey_err(ck); -+ if (ret) -+ break; -+ -+ a = bch2_alloc_to_v4(ck, &a_convert); -+ if (a->data_type != BCH_DATA_free) -+ goto next; -+ -+ s->buckets_seen++; -+ -+ ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl); -+next: -+ citer.path->preserve = false; -+ bch2_trans_iter_exit(trans, &citer); -+ if (ob) -+ break; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ alloc_cursor = iter.pos.offset; -+ ca->alloc_cursor = alloc_cursor; -+ -+ if (!ob && ret) -+ ob = ERR_PTR(ret); -+ -+ if (!ob && alloc_start > first_bucket) { -+ alloc_cursor = alloc_start = first_bucket; -+ goto again; -+ } -+ -+ return ob; -+} -+ -+static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, -+ struct bch_dev *ca, -+ enum bch_watermark watermark, -+ struct bucket_alloc_state *s, -+ struct closure *cl) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct open_bucket *ob = NULL; -+ u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(ca->alloc_cursor)); -+ u64 alloc_cursor = alloc_start; -+ int ret; -+ -+ BUG_ON(ca->new_fs_bucket_idx); -+again: -+ for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace, -+ POS(ca->dev_idx, alloc_cursor), 0, k, ret) { -+ if (k.k->p.inode != ca->dev_idx) -+ break; -+ -+ for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k)); -+ alloc_cursor < k.k->p.offset; -+ alloc_cursor++) { -+ ret = btree_trans_too_many_iters(trans); -+ if (ret) { -+ ob = ERR_PTR(ret); -+ break; -+ } -+ -+ s->buckets_seen++; -+ -+ ob = try_alloc_bucket(trans, ca, watermark, -+ alloc_cursor, s, k, cl); -+ if (ob) { -+ iter.path->preserve = false; -+ break; -+ } -+ } -+ -+ if (ob || ret) -+ break; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ ca->alloc_cursor = alloc_cursor; -+ -+ if (!ob && ret) -+ ob = ERR_PTR(ret); -+ -+ if (!ob && alloc_start > ca->mi.first_bucket) { -+ alloc_cursor = alloc_start = ca->mi.first_bucket; -+ goto again; -+ } -+ -+ return ob; -+} -+ -+/** -+ * bch2_bucket_alloc_trans - allocate a single bucket from a specific device -+ * @trans: transaction object -+ * @ca: device to allocate from -+ * @watermark: how important is this allocation? -+ * @cl: if not NULL, closure to be used to wait if buckets not available -+ * @usage: for secondarily also returning the current device usage -+ * -+ * Returns: an open_bucket on success, or an ERR_PTR() on failure. -+ */ -+static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, -+ struct bch_dev *ca, -+ enum bch_watermark watermark, -+ struct closure *cl, -+ struct bch_dev_usage *usage) -+{ -+ struct bch_fs *c = trans->c; -+ struct open_bucket *ob = NULL; -+ bool freespace = READ_ONCE(ca->mi.freespace_initialized); -+ u64 avail; -+ struct bucket_alloc_state s = { 0 }; -+ bool waiting = false; -+again: -+ bch2_dev_usage_read_fast(ca, usage); -+ avail = dev_buckets_free(ca, *usage, watermark); -+ -+ if (usage->d[BCH_DATA_need_discard].buckets > avail) -+ bch2_do_discards(c); -+ -+ if (usage->d[BCH_DATA_need_gc_gens].buckets > avail) -+ bch2_do_gc_gens(c); -+ -+ if (should_invalidate_buckets(ca, *usage)) -+ bch2_do_invalidates(c); -+ -+ if (!avail) { -+ if (cl && !waiting) { -+ closure_wait(&c->freelist_wait, cl); -+ waiting = true; -+ goto again; -+ } -+ -+ if (!c->blocked_allocate) -+ c->blocked_allocate = local_clock(); -+ -+ ob = ERR_PTR(-BCH_ERR_freelist_empty); -+ goto err; -+ } -+ -+ if (waiting) -+ closure_wake_up(&c->freelist_wait); -+alloc: -+ ob = likely(freespace) -+ ? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl) -+ : bch2_bucket_alloc_early(trans, ca, watermark, &s, cl); -+ -+ if (s.skipped_need_journal_commit * 2 > avail) -+ bch2_journal_flush_async(&c->journal, NULL); -+ -+ if (!ob && freespace && c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) { -+ freespace = false; -+ goto alloc; -+ } -+err: -+ if (!ob) -+ ob = ERR_PTR(-BCH_ERR_no_buckets_found); -+ -+ if (!IS_ERR(ob)) -+ trace_and_count(c, bucket_alloc, ca, -+ bch2_watermarks[watermark], -+ ob->bucket, -+ usage->d[BCH_DATA_free].buckets, -+ avail, -+ bch2_copygc_wait_amount(c), -+ c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now), -+ &s, -+ cl == NULL, -+ ""); -+ else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart)) -+ trace_and_count(c, bucket_alloc_fail, ca, -+ bch2_watermarks[watermark], -+ 0, -+ usage->d[BCH_DATA_free].buckets, -+ avail, -+ bch2_copygc_wait_amount(c), -+ c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now), -+ &s, -+ cl == NULL, -+ bch2_err_str(PTR_ERR(ob))); -+ -+ return ob; -+} -+ -+struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, -+ enum bch_watermark watermark, -+ struct closure *cl) -+{ -+ struct bch_dev_usage usage; -+ struct open_bucket *ob; -+ -+ bch2_trans_do(c, NULL, NULL, 0, -+ PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark, -+ cl, &usage))); -+ return ob; -+} -+ -+static int __dev_stripe_cmp(struct dev_stripe_state *stripe, -+ unsigned l, unsigned r) -+{ -+ return ((stripe->next_alloc[l] > stripe->next_alloc[r]) - -+ (stripe->next_alloc[l] < stripe->next_alloc[r])); -+} -+ -+#define dev_stripe_cmp(l, r) __dev_stripe_cmp(stripe, l, r) -+ -+struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c, -+ struct dev_stripe_state *stripe, -+ struct bch_devs_mask *devs) -+{ -+ struct dev_alloc_list ret = { .nr = 0 }; -+ unsigned i; -+ -+ for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX) -+ ret.devs[ret.nr++] = i; -+ -+ bubble_sort(ret.devs, ret.nr, dev_stripe_cmp); -+ return ret; -+} -+ -+static inline void bch2_dev_stripe_increment_inlined(struct bch_dev *ca, -+ struct dev_stripe_state *stripe, -+ struct bch_dev_usage *usage) -+{ -+ u64 *v = stripe->next_alloc + ca->dev_idx; -+ u64 free_space = dev_buckets_available(ca, BCH_WATERMARK_normal); -+ u64 free_space_inv = free_space -+ ? div64_u64(1ULL << 48, free_space) -+ : 1ULL << 48; -+ u64 scale = *v / 4; -+ -+ if (*v + free_space_inv >= *v) -+ *v += free_space_inv; -+ else -+ *v = U64_MAX; -+ -+ for (v = stripe->next_alloc; -+ v < stripe->next_alloc + ARRAY_SIZE(stripe->next_alloc); v++) -+ *v = *v < scale ? 0 : *v - scale; -+} -+ -+void bch2_dev_stripe_increment(struct bch_dev *ca, -+ struct dev_stripe_state *stripe) -+{ -+ struct bch_dev_usage usage; -+ -+ bch2_dev_usage_read_fast(ca, &usage); -+ bch2_dev_stripe_increment_inlined(ca, stripe, &usage); -+} -+ -+static int add_new_bucket(struct bch_fs *c, -+ struct open_buckets *ptrs, -+ struct bch_devs_mask *devs_may_alloc, -+ unsigned nr_replicas, -+ unsigned *nr_effective, -+ bool *have_cache, -+ unsigned flags, -+ struct open_bucket *ob) -+{ -+ unsigned durability = -+ bch_dev_bkey_exists(c, ob->dev)->mi.durability; -+ -+ BUG_ON(*nr_effective >= nr_replicas); -+ BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS); -+ -+ __clear_bit(ob->dev, devs_may_alloc->d); -+ *nr_effective += (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS) -+ ? durability : 1; -+ *have_cache |= !durability; -+ -+ ob_push(c, ptrs, ob); -+ -+ if (*nr_effective >= nr_replicas) -+ return 1; -+ if (ob->ec) -+ return 1; -+ return 0; -+} -+ -+int bch2_bucket_alloc_set_trans(struct btree_trans *trans, -+ struct open_buckets *ptrs, -+ struct dev_stripe_state *stripe, -+ struct bch_devs_mask *devs_may_alloc, -+ unsigned nr_replicas, -+ unsigned *nr_effective, -+ bool *have_cache, -+ unsigned flags, -+ enum bch_data_type data_type, -+ enum bch_watermark watermark, -+ struct closure *cl) -+{ -+ struct bch_fs *c = trans->c; -+ struct dev_alloc_list devs_sorted = -+ bch2_dev_alloc_list(c, stripe, devs_may_alloc); -+ unsigned dev; -+ struct bch_dev *ca; -+ int ret = -BCH_ERR_insufficient_devices; -+ unsigned i; -+ -+ BUG_ON(*nr_effective >= nr_replicas); -+ -+ for (i = 0; i < devs_sorted.nr; i++) { -+ struct bch_dev_usage usage; -+ struct open_bucket *ob; -+ -+ dev = devs_sorted.devs[i]; -+ -+ rcu_read_lock(); -+ ca = rcu_dereference(c->devs[dev]); -+ if (ca) -+ percpu_ref_get(&ca->ref); -+ rcu_read_unlock(); -+ -+ if (!ca) -+ continue; -+ -+ if (!ca->mi.durability && *have_cache) { -+ percpu_ref_put(&ca->ref); -+ continue; -+ } -+ -+ ob = bch2_bucket_alloc_trans(trans, ca, watermark, cl, &usage); -+ if (!IS_ERR(ob)) -+ bch2_dev_stripe_increment_inlined(ca, stripe, &usage); -+ percpu_ref_put(&ca->ref); -+ -+ if (IS_ERR(ob)) { -+ ret = PTR_ERR(ob); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || cl) -+ break; -+ continue; -+ } -+ -+ ob->data_type = data_type; -+ -+ if (add_new_bucket(c, ptrs, devs_may_alloc, -+ nr_replicas, nr_effective, -+ have_cache, flags, ob)) { -+ ret = 0; -+ break; -+ } -+ } -+ -+ return ret; -+} -+ -+/* Allocate from stripes: */ -+ -+/* -+ * if we can't allocate a new stripe because there are already too many -+ * partially filled stripes, force allocating from an existing stripe even when -+ * it's to a device we don't want: -+ */ -+ -+static int bucket_alloc_from_stripe(struct btree_trans *trans, -+ struct open_buckets *ptrs, -+ struct write_point *wp, -+ struct bch_devs_mask *devs_may_alloc, -+ u16 target, -+ unsigned nr_replicas, -+ unsigned *nr_effective, -+ bool *have_cache, -+ enum bch_watermark watermark, -+ unsigned flags, -+ struct closure *cl) -+{ -+ struct bch_fs *c = trans->c; -+ struct dev_alloc_list devs_sorted; -+ struct ec_stripe_head *h; -+ struct open_bucket *ob; -+ unsigned i, ec_idx; -+ int ret = 0; -+ -+ if (nr_replicas < 2) -+ return 0; -+ -+ if (ec_open_bucket(c, ptrs)) -+ return 0; -+ -+ h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl); -+ if (IS_ERR(h)) -+ return PTR_ERR(h); -+ if (!h) -+ return 0; -+ -+ devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc); -+ -+ for (i = 0; i < devs_sorted.nr; i++) -+ for (ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) { -+ if (!h->s->blocks[ec_idx]) -+ continue; -+ -+ ob = c->open_buckets + h->s->blocks[ec_idx]; -+ if (ob->dev == devs_sorted.devs[i] && -+ !test_and_set_bit(ec_idx, h->s->blocks_allocated)) -+ goto got_bucket; -+ } -+ goto out_put_head; -+got_bucket: -+ ob->ec_idx = ec_idx; -+ ob->ec = h->s; -+ ec_stripe_new_get(h->s, STRIPE_REF_io); -+ -+ ret = add_new_bucket(c, ptrs, devs_may_alloc, -+ nr_replicas, nr_effective, -+ have_cache, flags, ob); -+out_put_head: -+ bch2_ec_stripe_head_put(c, h); -+ return ret; -+} -+ -+/* Sector allocator */ -+ -+static bool want_bucket(struct bch_fs *c, -+ struct write_point *wp, -+ struct bch_devs_mask *devs_may_alloc, -+ bool *have_cache, bool ec, -+ struct open_bucket *ob) -+{ -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); -+ -+ if (!test_bit(ob->dev, devs_may_alloc->d)) -+ return false; -+ -+ if (ob->data_type != wp->data_type) -+ return false; -+ -+ if (!ca->mi.durability && -+ (wp->data_type == BCH_DATA_btree || ec || *have_cache)) -+ return false; -+ -+ if (ec != (ob->ec != NULL)) -+ return false; -+ -+ return true; -+} -+ -+static int bucket_alloc_set_writepoint(struct bch_fs *c, -+ struct open_buckets *ptrs, -+ struct write_point *wp, -+ struct bch_devs_mask *devs_may_alloc, -+ unsigned nr_replicas, -+ unsigned *nr_effective, -+ bool *have_cache, -+ bool ec, unsigned flags) -+{ -+ struct open_buckets ptrs_skip = { .nr = 0 }; -+ struct open_bucket *ob; -+ unsigned i; -+ int ret = 0; -+ -+ open_bucket_for_each(c, &wp->ptrs, ob, i) { -+ if (!ret && want_bucket(c, wp, devs_may_alloc, -+ have_cache, ec, ob)) -+ ret = add_new_bucket(c, ptrs, devs_may_alloc, -+ nr_replicas, nr_effective, -+ have_cache, flags, ob); -+ else -+ ob_push(c, &ptrs_skip, ob); -+ } -+ wp->ptrs = ptrs_skip; -+ -+ return ret; -+} -+ -+static int bucket_alloc_set_partial(struct bch_fs *c, -+ struct open_buckets *ptrs, -+ struct write_point *wp, -+ struct bch_devs_mask *devs_may_alloc, -+ unsigned nr_replicas, -+ unsigned *nr_effective, -+ bool *have_cache, bool ec, -+ enum bch_watermark watermark, -+ unsigned flags) -+{ -+ int i, ret = 0; -+ -+ if (!c->open_buckets_partial_nr) -+ return 0; -+ -+ spin_lock(&c->freelist_lock); -+ -+ if (!c->open_buckets_partial_nr) -+ goto unlock; -+ -+ for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) { -+ struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i]; -+ -+ if (want_bucket(c, wp, devs_may_alloc, have_cache, ec, ob)) { -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); -+ struct bch_dev_usage usage; -+ u64 avail; -+ -+ bch2_dev_usage_read_fast(ca, &usage); -+ avail = dev_buckets_free(ca, usage, watermark); -+ if (!avail) -+ continue; -+ -+ array_remove_item(c->open_buckets_partial, -+ c->open_buckets_partial_nr, -+ i); -+ ob->on_partial_list = false; -+ -+ ret = add_new_bucket(c, ptrs, devs_may_alloc, -+ nr_replicas, nr_effective, -+ have_cache, flags, ob); -+ if (ret) -+ break; -+ } -+ } -+unlock: -+ spin_unlock(&c->freelist_lock); -+ return ret; -+} -+ -+static int __open_bucket_add_buckets(struct btree_trans *trans, -+ struct open_buckets *ptrs, -+ struct write_point *wp, -+ struct bch_devs_list *devs_have, -+ u16 target, -+ bool erasure_code, -+ unsigned nr_replicas, -+ unsigned *nr_effective, -+ bool *have_cache, -+ enum bch_watermark watermark, -+ unsigned flags, -+ struct closure *_cl) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_devs_mask devs; -+ struct open_bucket *ob; -+ struct closure *cl = NULL; -+ unsigned i; -+ int ret; -+ -+ devs = target_rw_devs(c, wp->data_type, target); -+ -+ /* Don't allocate from devices we already have pointers to: */ -+ for (i = 0; i < devs_have->nr; i++) -+ __clear_bit(devs_have->devs[i], devs.d); -+ -+ open_bucket_for_each(c, ptrs, ob, i) -+ __clear_bit(ob->dev, devs.d); -+ -+ if (erasure_code && ec_open_bucket(c, ptrs)) -+ return 0; -+ -+ ret = bucket_alloc_set_writepoint(c, ptrs, wp, &devs, -+ nr_replicas, nr_effective, -+ have_cache, erasure_code, flags); -+ if (ret) -+ return ret; -+ -+ ret = bucket_alloc_set_partial(c, ptrs, wp, &devs, -+ nr_replicas, nr_effective, -+ have_cache, erasure_code, watermark, flags); -+ if (ret) -+ return ret; -+ -+ if (erasure_code) { -+ ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs, -+ target, -+ nr_replicas, nr_effective, -+ have_cache, -+ watermark, flags, _cl); -+ } else { -+retry_blocking: -+ /* -+ * Try nonblocking first, so that if one device is full we'll try from -+ * other devices: -+ */ -+ ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs, -+ nr_replicas, nr_effective, have_cache, -+ flags, wp->data_type, watermark, cl); -+ if (ret && -+ !bch2_err_matches(ret, BCH_ERR_transaction_restart) && -+ !bch2_err_matches(ret, BCH_ERR_insufficient_devices) && -+ !cl && _cl) { -+ cl = _cl; -+ goto retry_blocking; -+ } -+ } -+ -+ return ret; -+} -+ -+static int open_bucket_add_buckets(struct btree_trans *trans, -+ struct open_buckets *ptrs, -+ struct write_point *wp, -+ struct bch_devs_list *devs_have, -+ u16 target, -+ unsigned erasure_code, -+ unsigned nr_replicas, -+ unsigned *nr_effective, -+ bool *have_cache, -+ enum bch_watermark watermark, -+ unsigned flags, -+ struct closure *cl) -+{ -+ int ret; -+ -+ if (erasure_code) { -+ ret = __open_bucket_add_buckets(trans, ptrs, wp, -+ devs_have, target, erasure_code, -+ nr_replicas, nr_effective, have_cache, -+ watermark, flags, cl); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || -+ bch2_err_matches(ret, BCH_ERR_operation_blocked) || -+ bch2_err_matches(ret, BCH_ERR_freelist_empty) || -+ bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) -+ return ret; -+ if (*nr_effective >= nr_replicas) -+ return 0; -+ } -+ -+ ret = __open_bucket_add_buckets(trans, ptrs, wp, -+ devs_have, target, false, -+ nr_replicas, nr_effective, have_cache, -+ watermark, flags, cl); -+ return ret < 0 ? ret : 0; -+} -+ -+/** -+ * should_drop_bucket - check if this is open_bucket should go away -+ * @ob: open_bucket to predicate on -+ * @c: filesystem handle -+ * @ca: if set, we're killing buckets for a particular device -+ * @ec: if true, we're shutting down erasure coding and killing all ec -+ * open_buckets -+ * otherwise, return true -+ * Returns: true if we should kill this open_bucket -+ * -+ * We're killing open_buckets because we're shutting down a device, erasure -+ * coding, or the entire filesystem - check if this open_bucket matches: -+ */ -+static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c, -+ struct bch_dev *ca, bool ec) -+{ -+ if (ec) { -+ return ob->ec != NULL; -+ } else if (ca) { -+ bool drop = ob->dev == ca->dev_idx; -+ struct open_bucket *ob2; -+ unsigned i; -+ -+ if (!drop && ob->ec) { -+ unsigned nr_blocks; -+ -+ mutex_lock(&ob->ec->lock); -+ nr_blocks = bkey_i_to_stripe(&ob->ec->new_stripe.key)->v.nr_blocks; -+ -+ for (i = 0; i < nr_blocks; i++) { -+ if (!ob->ec->blocks[i]) -+ continue; -+ -+ ob2 = c->open_buckets + ob->ec->blocks[i]; -+ drop |= ob2->dev == ca->dev_idx; -+ } -+ mutex_unlock(&ob->ec->lock); -+ } -+ -+ return drop; -+ } else { -+ return true; -+ } -+} -+ -+static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca, -+ bool ec, struct write_point *wp) -+{ -+ struct open_buckets ptrs = { .nr = 0 }; -+ struct open_bucket *ob; -+ unsigned i; -+ -+ mutex_lock(&wp->lock); -+ open_bucket_for_each(c, &wp->ptrs, ob, i) -+ if (should_drop_bucket(ob, c, ca, ec)) -+ bch2_open_bucket_put(c, ob); -+ else -+ ob_push(c, &ptrs, ob); -+ wp->ptrs = ptrs; -+ mutex_unlock(&wp->lock); -+} -+ -+void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca, -+ bool ec) -+{ -+ unsigned i; -+ -+ /* Next, close write points that point to this device... */ -+ for (i = 0; i < ARRAY_SIZE(c->write_points); i++) -+ bch2_writepoint_stop(c, ca, ec, &c->write_points[i]); -+ -+ bch2_writepoint_stop(c, ca, ec, &c->copygc_write_point); -+ bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point); -+ bch2_writepoint_stop(c, ca, ec, &c->btree_write_point); -+ -+ mutex_lock(&c->btree_reserve_cache_lock); -+ while (c->btree_reserve_cache_nr) { -+ struct btree_alloc *a = -+ &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; -+ -+ bch2_open_buckets_put(c, &a->ob); -+ } -+ mutex_unlock(&c->btree_reserve_cache_lock); -+ -+ spin_lock(&c->freelist_lock); -+ i = 0; -+ while (i < c->open_buckets_partial_nr) { -+ struct open_bucket *ob = -+ c->open_buckets + c->open_buckets_partial[i]; -+ -+ if (should_drop_bucket(ob, c, ca, ec)) { -+ --c->open_buckets_partial_nr; -+ swap(c->open_buckets_partial[i], -+ c->open_buckets_partial[c->open_buckets_partial_nr]); -+ ob->on_partial_list = false; -+ spin_unlock(&c->freelist_lock); -+ bch2_open_bucket_put(c, ob); -+ spin_lock(&c->freelist_lock); -+ } else { -+ i++; -+ } -+ } -+ spin_unlock(&c->freelist_lock); -+ -+ bch2_ec_stop_dev(c, ca); -+} -+ -+static inline struct hlist_head *writepoint_hash(struct bch_fs *c, -+ unsigned long write_point) -+{ -+ unsigned hash = -+ hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash))); -+ -+ return &c->write_points_hash[hash]; -+} -+ -+static struct write_point *__writepoint_find(struct hlist_head *head, -+ unsigned long write_point) -+{ -+ struct write_point *wp; -+ -+ rcu_read_lock(); -+ hlist_for_each_entry_rcu(wp, head, node) -+ if (wp->write_point == write_point) -+ goto out; -+ wp = NULL; -+out: -+ rcu_read_unlock(); -+ return wp; -+} -+ -+static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor) -+{ -+ u64 stranded = c->write_points_nr * c->bucket_size_max; -+ u64 free = bch2_fs_usage_read_short(c).free; -+ -+ return stranded * factor > free; -+} -+ -+static bool try_increase_writepoints(struct bch_fs *c) -+{ -+ struct write_point *wp; -+ -+ if (c->write_points_nr == ARRAY_SIZE(c->write_points) || -+ too_many_writepoints(c, 32)) -+ return false; -+ -+ wp = c->write_points + c->write_points_nr++; -+ hlist_add_head_rcu(&wp->node, writepoint_hash(c, wp->write_point)); -+ return true; -+} -+ -+static bool try_decrease_writepoints(struct btree_trans *trans, unsigned old_nr) -+{ -+ struct bch_fs *c = trans->c; -+ struct write_point *wp; -+ struct open_bucket *ob; -+ unsigned i; -+ -+ mutex_lock(&c->write_points_hash_lock); -+ if (c->write_points_nr < old_nr) { -+ mutex_unlock(&c->write_points_hash_lock); -+ return true; -+ } -+ -+ if (c->write_points_nr == 1 || -+ !too_many_writepoints(c, 8)) { -+ mutex_unlock(&c->write_points_hash_lock); -+ return false; -+ } -+ -+ wp = c->write_points + --c->write_points_nr; -+ -+ hlist_del_rcu(&wp->node); -+ mutex_unlock(&c->write_points_hash_lock); -+ -+ bch2_trans_mutex_lock_norelock(trans, &wp->lock); -+ open_bucket_for_each(c, &wp->ptrs, ob, i) -+ open_bucket_free_unused(c, ob); -+ wp->ptrs.nr = 0; -+ mutex_unlock(&wp->lock); -+ return true; -+} -+ -+static struct write_point *writepoint_find(struct btree_trans *trans, -+ unsigned long write_point) -+{ -+ struct bch_fs *c = trans->c; -+ struct write_point *wp, *oldest; -+ struct hlist_head *head; -+ -+ if (!(write_point & 1UL)) { -+ wp = (struct write_point *) write_point; -+ bch2_trans_mutex_lock_norelock(trans, &wp->lock); -+ return wp; -+ } -+ -+ head = writepoint_hash(c, write_point); -+restart_find: -+ wp = __writepoint_find(head, write_point); -+ if (wp) { -+lock_wp: -+ bch2_trans_mutex_lock_norelock(trans, &wp->lock); -+ if (wp->write_point == write_point) -+ goto out; -+ mutex_unlock(&wp->lock); -+ goto restart_find; -+ } -+restart_find_oldest: -+ oldest = NULL; -+ for (wp = c->write_points; -+ wp < c->write_points + c->write_points_nr; wp++) -+ if (!oldest || time_before64(wp->last_used, oldest->last_used)) -+ oldest = wp; -+ -+ bch2_trans_mutex_lock_norelock(trans, &oldest->lock); -+ bch2_trans_mutex_lock_norelock(trans, &c->write_points_hash_lock); -+ if (oldest >= c->write_points + c->write_points_nr || -+ try_increase_writepoints(c)) { -+ mutex_unlock(&c->write_points_hash_lock); -+ mutex_unlock(&oldest->lock); -+ goto restart_find_oldest; -+ } -+ -+ wp = __writepoint_find(head, write_point); -+ if (wp && wp != oldest) { -+ mutex_unlock(&c->write_points_hash_lock); -+ mutex_unlock(&oldest->lock); -+ goto lock_wp; -+ } -+ -+ wp = oldest; -+ hlist_del_rcu(&wp->node); -+ wp->write_point = write_point; -+ hlist_add_head_rcu(&wp->node, head); -+ mutex_unlock(&c->write_points_hash_lock); -+out: -+ wp->last_used = local_clock(); -+ return wp; -+} -+ -+/* -+ * Get us an open_bucket we can allocate from, return with it locked: -+ */ -+int bch2_alloc_sectors_start_trans(struct btree_trans *trans, -+ unsigned target, -+ unsigned erasure_code, -+ struct write_point_specifier write_point, -+ struct bch_devs_list *devs_have, -+ unsigned nr_replicas, -+ unsigned nr_replicas_required, -+ enum bch_watermark watermark, -+ unsigned flags, -+ struct closure *cl, -+ struct write_point **wp_ret) -+{ -+ struct bch_fs *c = trans->c; -+ struct write_point *wp; -+ struct open_bucket *ob; -+ struct open_buckets ptrs; -+ unsigned nr_effective, write_points_nr; -+ bool have_cache; -+ int ret; -+ int i; -+ -+ BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS); -+ -+ BUG_ON(!nr_replicas || !nr_replicas_required); -+retry: -+ ptrs.nr = 0; -+ nr_effective = 0; -+ write_points_nr = c->write_points_nr; -+ have_cache = false; -+ -+ *wp_ret = wp = writepoint_find(trans, write_point.v); -+ -+ /* metadata may not allocate on cache devices: */ -+ if (wp->data_type != BCH_DATA_user) -+ have_cache = true; -+ -+ if (target && !(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) { -+ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, -+ target, erasure_code, -+ nr_replicas, &nr_effective, -+ &have_cache, watermark, -+ flags, NULL); -+ if (!ret || -+ bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto alloc_done; -+ -+ /* Don't retry from all devices if we're out of open buckets: */ -+ if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) -+ goto allocate_blocking; -+ -+ /* -+ * Only try to allocate cache (durability = 0 devices) from the -+ * specified target: -+ */ -+ have_cache = true; -+ -+ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, -+ 0, erasure_code, -+ nr_replicas, &nr_effective, -+ &have_cache, watermark, -+ flags, cl); -+ } else { -+allocate_blocking: -+ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, -+ target, erasure_code, -+ nr_replicas, &nr_effective, -+ &have_cache, watermark, -+ flags, cl); -+ } -+alloc_done: -+ BUG_ON(!ret && nr_effective < nr_replicas); -+ -+ if (erasure_code && !ec_open_bucket(c, &ptrs)) -+ pr_debug("failed to get ec bucket: ret %u", ret); -+ -+ if (ret == -BCH_ERR_insufficient_devices && -+ nr_effective >= nr_replicas_required) -+ ret = 0; -+ -+ if (ret) -+ goto err; -+ -+ /* Free buckets we didn't use: */ -+ open_bucket_for_each(c, &wp->ptrs, ob, i) -+ open_bucket_free_unused(c, ob); -+ -+ wp->ptrs = ptrs; -+ -+ wp->sectors_free = UINT_MAX; -+ -+ open_bucket_for_each(c, &wp->ptrs, ob, i) -+ wp->sectors_free = min(wp->sectors_free, ob->sectors_free); -+ -+ BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX); -+ -+ return 0; -+err: -+ open_bucket_for_each(c, &wp->ptrs, ob, i) -+ if (ptrs.nr < ARRAY_SIZE(ptrs.v)) -+ ob_push(c, &ptrs, ob); -+ else -+ open_bucket_free_unused(c, ob); -+ wp->ptrs = ptrs; -+ -+ mutex_unlock(&wp->lock); -+ -+ if (bch2_err_matches(ret, BCH_ERR_freelist_empty) && -+ try_decrease_writepoints(trans, write_points_nr)) -+ goto retry; -+ -+ if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty) || -+ bch2_err_matches(ret, BCH_ERR_freelist_empty)) -+ return cl -+ ? -BCH_ERR_bucket_alloc_blocked -+ : -BCH_ERR_ENOSPC_bucket_alloc; -+ -+ return ret; -+} -+ -+struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob) -+{ -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); -+ -+ return (struct bch_extent_ptr) { -+ .type = 1 << BCH_EXTENT_ENTRY_ptr, -+ .gen = ob->gen, -+ .dev = ob->dev, -+ .offset = bucket_to_sector(ca, ob->bucket) + -+ ca->mi.bucket_size - -+ ob->sectors_free, -+ }; -+} -+ -+void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp, -+ struct bkey_i *k, unsigned sectors, -+ bool cached) -+{ -+ bch2_alloc_sectors_append_ptrs_inlined(c, wp, k, sectors, cached); -+} -+ -+/* -+ * Append pointers to the space we just allocated to @k, and mark @sectors space -+ * as allocated out of @ob -+ */ -+void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp) -+{ -+ bch2_alloc_sectors_done_inlined(c, wp); -+} -+ -+static inline void writepoint_init(struct write_point *wp, -+ enum bch_data_type type) -+{ -+ mutex_init(&wp->lock); -+ wp->data_type = type; -+ -+ INIT_WORK(&wp->index_update_work, bch2_write_point_do_index_updates); -+ INIT_LIST_HEAD(&wp->writes); -+ spin_lock_init(&wp->writes_lock); -+} -+ -+void bch2_fs_allocator_foreground_init(struct bch_fs *c) -+{ -+ struct open_bucket *ob; -+ struct write_point *wp; -+ -+ mutex_init(&c->write_points_hash_lock); -+ c->write_points_nr = ARRAY_SIZE(c->write_points); -+ -+ /* open bucket 0 is a sentinal NULL: */ -+ spin_lock_init(&c->open_buckets[0].lock); -+ -+ for (ob = c->open_buckets + 1; -+ ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) { -+ spin_lock_init(&ob->lock); -+ c->open_buckets_nr_free++; -+ -+ ob->freelist = c->open_buckets_freelist; -+ c->open_buckets_freelist = ob - c->open_buckets; -+ } -+ -+ writepoint_init(&c->btree_write_point, BCH_DATA_btree); -+ writepoint_init(&c->rebalance_write_point, BCH_DATA_user); -+ writepoint_init(&c->copygc_write_point, BCH_DATA_user); -+ -+ for (wp = c->write_points; -+ wp < c->write_points + c->write_points_nr; wp++) { -+ writepoint_init(wp, BCH_DATA_user); -+ -+ wp->last_used = local_clock(); -+ wp->write_point = (unsigned long) wp; -+ hlist_add_head_rcu(&wp->node, -+ writepoint_hash(c, wp->write_point)); -+ } -+} -+ -+static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob) -+{ -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); -+ unsigned data_type = ob->data_type; -+ barrier(); /* READ_ONCE() doesn't work on bitfields */ -+ -+ prt_printf(out, "%zu ref %u %s %u:%llu gen %u allocated %u/%u", -+ ob - c->open_buckets, -+ atomic_read(&ob->pin), -+ data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type", -+ ob->dev, ob->bucket, ob->gen, -+ ca->mi.bucket_size - ob->sectors_free, ca->mi.bucket_size); -+ if (ob->ec) -+ prt_printf(out, " ec idx %llu", ob->ec->idx); -+ if (ob->on_partial_list) -+ prt_str(out, " partial"); -+ prt_newline(out); -+} -+ -+void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ struct open_bucket *ob; -+ -+ out->atomic++; -+ -+ for (ob = c->open_buckets; -+ ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); -+ ob++) { -+ spin_lock(&ob->lock); -+ if (ob->valid && !ob->on_partial_list) -+ bch2_open_bucket_to_text(out, c, ob); -+ spin_unlock(&ob->lock); -+ } -+ -+ --out->atomic; -+} -+ -+void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ unsigned i; -+ -+ out->atomic++; -+ spin_lock(&c->freelist_lock); -+ -+ for (i = 0; i < c->open_buckets_partial_nr; i++) -+ bch2_open_bucket_to_text(out, c, -+ c->open_buckets + c->open_buckets_partial[i]); -+ -+ spin_unlock(&c->freelist_lock); -+ --out->atomic; -+} -+ -+static const char * const bch2_write_point_states[] = { -+#define x(n) #n, -+ WRITE_POINT_STATES() -+#undef x -+ NULL -+}; -+ -+static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c, -+ struct write_point *wp) -+{ -+ struct open_bucket *ob; -+ unsigned i; -+ -+ prt_printf(out, "%lu: ", wp->write_point); -+ prt_human_readable_u64(out, wp->sectors_allocated); -+ -+ prt_printf(out, " last wrote: "); -+ bch2_pr_time_units(out, sched_clock() - wp->last_used); -+ -+ for (i = 0; i < WRITE_POINT_STATE_NR; i++) { -+ prt_printf(out, " %s: ", bch2_write_point_states[i]); -+ bch2_pr_time_units(out, wp->time[i]); -+ } -+ -+ prt_newline(out); -+ -+ printbuf_indent_add(out, 2); -+ open_bucket_for_each(c, &wp->ptrs, ob, i) -+ bch2_open_bucket_to_text(out, c, ob); -+ printbuf_indent_sub(out, 2); -+} -+ -+void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ struct write_point *wp; -+ -+ prt_str(out, "Foreground write points\n"); -+ for (wp = c->write_points; -+ wp < c->write_points + ARRAY_SIZE(c->write_points); -+ wp++) -+ bch2_write_point_to_text(out, c, wp); -+ -+ prt_str(out, "Copygc write point\n"); -+ bch2_write_point_to_text(out, c, &c->copygc_write_point); -+ -+ prt_str(out, "Rebalance write point\n"); -+ bch2_write_point_to_text(out, c, &c->rebalance_write_point); -+ -+ prt_str(out, "Btree write point\n"); -+ bch2_write_point_to_text(out, c, &c->btree_write_point); -+} -diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h -new file mode 100644 -index 000000000000..7aaeec44c746 ---- /dev/null -+++ b/fs/bcachefs/alloc_foreground.h -@@ -0,0 +1,224 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_ALLOC_FOREGROUND_H -+#define _BCACHEFS_ALLOC_FOREGROUND_H -+ -+#include "bcachefs.h" -+#include "alloc_types.h" -+#include "extents.h" -+#include "sb-members.h" -+ -+#include -+ -+struct bkey; -+struct bch_dev; -+struct bch_fs; -+struct bch_devs_List; -+ -+extern const char * const bch2_watermarks[]; -+ -+void bch2_reset_alloc_cursors(struct bch_fs *); -+ -+struct dev_alloc_list { -+ unsigned nr; -+ u8 devs[BCH_SB_MEMBERS_MAX]; -+}; -+ -+struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *, -+ struct dev_stripe_state *, -+ struct bch_devs_mask *); -+void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *); -+ -+long bch2_bucket_alloc_new_fs(struct bch_dev *); -+ -+struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *, -+ enum bch_watermark, struct closure *); -+ -+static inline void ob_push(struct bch_fs *c, struct open_buckets *obs, -+ struct open_bucket *ob) -+{ -+ BUG_ON(obs->nr >= ARRAY_SIZE(obs->v)); -+ -+ obs->v[obs->nr++] = ob - c->open_buckets; -+} -+ -+#define open_bucket_for_each(_c, _obs, _ob, _i) \ -+ for ((_i) = 0; \ -+ (_i) < (_obs)->nr && \ -+ ((_ob) = (_c)->open_buckets + (_obs)->v[_i], true); \ -+ (_i)++) -+ -+static inline struct open_bucket *ec_open_bucket(struct bch_fs *c, -+ struct open_buckets *obs) -+{ -+ struct open_bucket *ob; -+ unsigned i; -+ -+ open_bucket_for_each(c, obs, ob, i) -+ if (ob->ec) -+ return ob; -+ -+ return NULL; -+} -+ -+void bch2_open_bucket_write_error(struct bch_fs *, -+ struct open_buckets *, unsigned); -+ -+void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *); -+ -+static inline void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) -+{ -+ if (atomic_dec_and_test(&ob->pin)) -+ __bch2_open_bucket_put(c, ob); -+} -+ -+static inline void bch2_open_buckets_put(struct bch_fs *c, -+ struct open_buckets *ptrs) -+{ -+ struct open_bucket *ob; -+ unsigned i; -+ -+ open_bucket_for_each(c, ptrs, ob, i) -+ bch2_open_bucket_put(c, ob); -+ ptrs->nr = 0; -+} -+ -+static inline void bch2_alloc_sectors_done_inlined(struct bch_fs *c, struct write_point *wp) -+{ -+ struct open_buckets ptrs = { .nr = 0 }, keep = { .nr = 0 }; -+ struct open_bucket *ob; -+ unsigned i; -+ -+ open_bucket_for_each(c, &wp->ptrs, ob, i) -+ ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob); -+ wp->ptrs = keep; -+ -+ mutex_unlock(&wp->lock); -+ -+ bch2_open_buckets_put(c, &ptrs); -+} -+ -+static inline void bch2_open_bucket_get(struct bch_fs *c, -+ struct write_point *wp, -+ struct open_buckets *ptrs) -+{ -+ struct open_bucket *ob; -+ unsigned i; -+ -+ open_bucket_for_each(c, &wp->ptrs, ob, i) { -+ ob->data_type = wp->data_type; -+ atomic_inc(&ob->pin); -+ ob_push(c, ptrs, ob); -+ } -+} -+ -+static inline open_bucket_idx_t *open_bucket_hashslot(struct bch_fs *c, -+ unsigned dev, u64 bucket) -+{ -+ return c->open_buckets_hash + -+ (jhash_3words(dev, bucket, bucket >> 32, 0) & -+ (OPEN_BUCKETS_COUNT - 1)); -+} -+ -+static inline bool bch2_bucket_is_open(struct bch_fs *c, unsigned dev, u64 bucket) -+{ -+ open_bucket_idx_t slot = *open_bucket_hashslot(c, dev, bucket); -+ -+ while (slot) { -+ struct open_bucket *ob = &c->open_buckets[slot]; -+ -+ if (ob->dev == dev && ob->bucket == bucket) -+ return true; -+ -+ slot = ob->hash; -+ } -+ -+ return false; -+} -+ -+static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64 bucket) -+{ -+ bool ret; -+ -+ if (bch2_bucket_is_open(c, dev, bucket)) -+ return true; -+ -+ spin_lock(&c->freelist_lock); -+ ret = bch2_bucket_is_open(c, dev, bucket); -+ spin_unlock(&c->freelist_lock); -+ -+ return ret; -+} -+ -+int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *, -+ struct dev_stripe_state *, struct bch_devs_mask *, -+ unsigned, unsigned *, bool *, unsigned, -+ enum bch_data_type, enum bch_watermark, -+ struct closure *); -+ -+int bch2_alloc_sectors_start_trans(struct btree_trans *, -+ unsigned, unsigned, -+ struct write_point_specifier, -+ struct bch_devs_list *, -+ unsigned, unsigned, -+ enum bch_watermark, -+ unsigned, -+ struct closure *, -+ struct write_point **); -+ -+struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *, struct open_bucket *); -+ -+/* -+ * Append pointers to the space we just allocated to @k, and mark @sectors space -+ * as allocated out of @ob -+ */ -+static inline void -+bch2_alloc_sectors_append_ptrs_inlined(struct bch_fs *c, struct write_point *wp, -+ struct bkey_i *k, unsigned sectors, -+ bool cached) -+{ -+ struct open_bucket *ob; -+ unsigned i; -+ -+ BUG_ON(sectors > wp->sectors_free); -+ wp->sectors_free -= sectors; -+ wp->sectors_allocated += sectors; -+ -+ open_bucket_for_each(c, &wp->ptrs, ob, i) { -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); -+ struct bch_extent_ptr ptr = bch2_ob_ptr(c, ob); -+ -+ ptr.cached = cached || -+ (!ca->mi.durability && -+ wp->data_type == BCH_DATA_user); -+ -+ bch2_bkey_append_ptr(k, ptr); -+ -+ BUG_ON(sectors > ob->sectors_free); -+ ob->sectors_free -= sectors; -+ } -+} -+ -+void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *, -+ struct bkey_i *, unsigned, bool); -+void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *); -+ -+void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *, bool); -+ -+static inline struct write_point_specifier writepoint_hashed(unsigned long v) -+{ -+ return (struct write_point_specifier) { .v = v | 1 }; -+} -+ -+static inline struct write_point_specifier writepoint_ptr(struct write_point *wp) -+{ -+ return (struct write_point_specifier) { .v = (unsigned long) wp }; -+} -+ -+void bch2_fs_allocator_foreground_init(struct bch_fs *); -+ -+void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *); -+void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *); -+ -+void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); -+ -+#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */ -diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h -new file mode 100644 -index 000000000000..b91b7a461056 ---- /dev/null -+++ b/fs/bcachefs/alloc_types.h -@@ -0,0 +1,126 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_ALLOC_TYPES_H -+#define _BCACHEFS_ALLOC_TYPES_H -+ -+#include -+#include -+ -+#include "clock_types.h" -+#include "fifo.h" -+ -+struct bucket_alloc_state { -+ u64 buckets_seen; -+ u64 skipped_open; -+ u64 skipped_need_journal_commit; -+ u64 skipped_nocow; -+ u64 skipped_nouse; -+}; -+ -+#define BCH_WATERMARKS() \ -+ x(stripe) \ -+ x(normal) \ -+ x(copygc) \ -+ x(btree) \ -+ x(btree_copygc) \ -+ x(reclaim) -+ -+enum bch_watermark { -+#define x(name) BCH_WATERMARK_##name, -+ BCH_WATERMARKS() -+#undef x -+ BCH_WATERMARK_NR, -+}; -+ -+#define BCH_WATERMARK_BITS 3 -+#define BCH_WATERMARK_MASK ~(~0U << BCH_WATERMARK_BITS) -+ -+#define OPEN_BUCKETS_COUNT 1024 -+ -+#define WRITE_POINT_HASH_NR 32 -+#define WRITE_POINT_MAX 32 -+ -+/* -+ * 0 is never a valid open_bucket_idx_t: -+ */ -+typedef u16 open_bucket_idx_t; -+ -+struct open_bucket { -+ spinlock_t lock; -+ atomic_t pin; -+ open_bucket_idx_t freelist; -+ open_bucket_idx_t hash; -+ -+ /* -+ * When an open bucket has an ec_stripe attached, this is the index of -+ * the block in the stripe this open_bucket corresponds to: -+ */ -+ u8 ec_idx; -+ enum bch_data_type data_type:6; -+ unsigned valid:1; -+ unsigned on_partial_list:1; -+ -+ u8 dev; -+ u8 gen; -+ u32 sectors_free; -+ u64 bucket; -+ struct ec_stripe_new *ec; -+}; -+ -+#define OPEN_BUCKET_LIST_MAX 15 -+ -+struct open_buckets { -+ open_bucket_idx_t nr; -+ open_bucket_idx_t v[OPEN_BUCKET_LIST_MAX]; -+}; -+ -+struct dev_stripe_state { -+ u64 next_alloc[BCH_SB_MEMBERS_MAX]; -+}; -+ -+#define WRITE_POINT_STATES() \ -+ x(stopped) \ -+ x(waiting_io) \ -+ x(waiting_work) \ -+ x(running) -+ -+enum write_point_state { -+#define x(n) WRITE_POINT_##n, -+ WRITE_POINT_STATES() -+#undef x -+ WRITE_POINT_STATE_NR -+}; -+ -+struct write_point { -+ struct { -+ struct hlist_node node; -+ struct mutex lock; -+ u64 last_used; -+ unsigned long write_point; -+ enum bch_data_type data_type; -+ -+ /* calculated based on how many pointers we're actually going to use: */ -+ unsigned sectors_free; -+ -+ struct open_buckets ptrs; -+ struct dev_stripe_state stripe; -+ -+ u64 sectors_allocated; -+ } __aligned(SMP_CACHE_BYTES); -+ -+ struct { -+ struct work_struct index_update_work; -+ -+ struct list_head writes; -+ spinlock_t writes_lock; -+ -+ enum write_point_state state; -+ u64 last_state_change; -+ u64 time[WRITE_POINT_STATE_NR]; -+ } __aligned(SMP_CACHE_BYTES); -+}; -+ -+struct write_point_specifier { -+ unsigned long v; -+}; -+ -+#endif /* _BCACHEFS_ALLOC_TYPES_H */ -diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c -new file mode 100644 -index 000000000000..ef02c9bb0354 ---- /dev/null -+++ b/fs/bcachefs/backpointers.c -@@ -0,0 +1,860 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include "bcachefs.h" -+#include "bbpos.h" -+#include "alloc_background.h" -+#include "backpointers.h" -+#include "btree_cache.h" -+#include "btree_update.h" -+#include "btree_update_interior.h" -+#include "btree_write_buffer.h" -+#include "error.h" -+ -+#include -+ -+static bool extent_matches_bp(struct bch_fs *c, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c k, -+ struct bpos bucket, -+ struct bch_backpointer bp) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ struct bpos bucket2; -+ struct bch_backpointer bp2; -+ -+ if (p.ptr.cached) -+ continue; -+ -+ bch2_extent_ptr_to_bp(c, btree_id, level, k, p, -+ &bucket2, &bp2); -+ if (bpos_eq(bucket, bucket2) && -+ !memcmp(&bp, &bp2, sizeof(bp))) -+ return true; -+ } -+ -+ return false; -+} -+ -+int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); -+ struct bpos bucket = bp_pos_to_bucket(c, bp.k->p); -+ int ret = 0; -+ -+ bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)), -+ c, err, -+ backpointer_pos_wrong, -+ "backpointer at wrong pos"); -+fsck_err: -+ return ret; -+} -+ -+void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) -+{ -+ prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", -+ bch2_btree_id_str(bp->btree_id), -+ bp->level, -+ (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), -+ (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), -+ bp->bucket_len); -+ bch2_bpos_to_text(out, bp->pos); -+} -+ -+void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) -+{ -+ prt_str(out, "bucket="); -+ bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p)); -+ prt_str(out, " "); -+ -+ bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); -+} -+ -+void bch2_backpointer_swab(struct bkey_s k) -+{ -+ struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); -+ -+ bp.v->bucket_offset = swab40(bp.v->bucket_offset); -+ bp.v->bucket_len = swab32(bp.v->bucket_len); -+ bch2_bpos_swab(&bp.v->pos); -+} -+ -+static noinline int backpointer_mod_err(struct btree_trans *trans, -+ struct bch_backpointer bp, -+ struct bkey_s_c bp_k, -+ struct bkey_s_c orig_k, -+ bool insert) -+{ -+ struct bch_fs *c = trans->c; -+ struct printbuf buf = PRINTBUF; -+ -+ if (insert) { -+ prt_printf(&buf, "existing backpointer found when inserting "); -+ bch2_backpointer_to_text(&buf, &bp); -+ prt_newline(&buf); -+ printbuf_indent_add(&buf, 2); -+ -+ prt_printf(&buf, "found "); -+ bch2_bkey_val_to_text(&buf, c, bp_k); -+ prt_newline(&buf); -+ -+ prt_printf(&buf, "for "); -+ bch2_bkey_val_to_text(&buf, c, orig_k); -+ -+ bch_err(c, "%s", buf.buf); -+ } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { -+ prt_printf(&buf, "backpointer not found when deleting"); -+ prt_newline(&buf); -+ printbuf_indent_add(&buf, 2); -+ -+ prt_printf(&buf, "searching for "); -+ bch2_backpointer_to_text(&buf, &bp); -+ prt_newline(&buf); -+ -+ prt_printf(&buf, "got "); -+ bch2_bkey_val_to_text(&buf, c, bp_k); -+ prt_newline(&buf); -+ -+ prt_printf(&buf, "for "); -+ bch2_bkey_val_to_text(&buf, c, orig_k); -+ -+ bch_err(c, "%s", buf.buf); -+ } -+ -+ printbuf_exit(&buf); -+ -+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { -+ bch2_inconsistent_error(c); -+ return -EIO; -+ } else { -+ return 0; -+ } -+} -+ -+int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, -+ struct bkey_i_backpointer *bp_k, -+ struct bch_backpointer bp, -+ struct bkey_s_c orig_k, -+ bool insert) -+{ -+ struct btree_iter bp_iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, -+ bp_k->k.p, -+ BTREE_ITER_INTENT| -+ BTREE_ITER_SLOTS| -+ BTREE_ITER_WITH_UPDATES); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (insert -+ ? k.k->type -+ : (k.k->type != KEY_TYPE_backpointer || -+ memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) { -+ ret = backpointer_mod_err(trans, bp, k, orig_k, insert); -+ if (ret) -+ goto err; -+ } -+ -+ ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); -+err: -+ bch2_trans_iter_exit(trans, &bp_iter); -+ return ret; -+} -+ -+/* -+ * Find the next backpointer >= *bp_offset: -+ */ -+int bch2_get_next_backpointer(struct btree_trans *trans, -+ struct bpos bucket, int gen, -+ struct bpos *bp_pos, -+ struct bch_backpointer *bp, -+ unsigned iter_flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0); -+ struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL }; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ if (bpos_ge(*bp_pos, bp_end_pos)) -+ goto done; -+ -+ if (gen >= 0) { -+ k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, -+ bucket, BTREE_ITER_CACHED|iter_flags); -+ ret = bkey_err(k); -+ if (ret) -+ goto out; -+ -+ if (k.k->type != KEY_TYPE_alloc_v4 || -+ bkey_s_c_to_alloc_v4(k).v->gen != gen) -+ goto done; -+ } -+ -+ *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0)); -+ -+ for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers, -+ *bp_pos, iter_flags, k, ret) { -+ if (bpos_ge(k.k->p, bp_end_pos)) -+ break; -+ -+ *bp_pos = k.k->p; -+ *bp = *bkey_s_c_to_backpointer(k).v; -+ goto out; -+ } -+done: -+ *bp_pos = SPOS_MAX; -+out: -+ bch2_trans_iter_exit(trans, &bp_iter); -+ bch2_trans_iter_exit(trans, &alloc_iter); -+ return ret; -+} -+ -+static void backpointer_not_found(struct btree_trans *trans, -+ struct bpos bp_pos, -+ struct bch_backpointer bp, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ struct printbuf buf = PRINTBUF; -+ struct bpos bucket = bp_pos_to_bucket(c, bp_pos); -+ -+ /* -+ * If we're using the btree write buffer, the backpointer we were -+ * looking at may have already been deleted - failure to find what it -+ * pointed to is not an error: -+ */ -+ if (likely(!bch2_backpointers_no_use_write_buffer)) -+ return; -+ -+ prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", -+ bp.level ? "btree node" : "extent"); -+ prt_printf(&buf, "bucket: "); -+ bch2_bpos_to_text(&buf, bucket); -+ prt_printf(&buf, "\n "); -+ -+ prt_printf(&buf, "backpointer pos: "); -+ bch2_bpos_to_text(&buf, bp_pos); -+ prt_printf(&buf, "\n "); -+ -+ bch2_backpointer_to_text(&buf, &bp); -+ prt_printf(&buf, "\n "); -+ bch2_bkey_val_to_text(&buf, c, k); -+ if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) -+ bch_err_ratelimited(c, "%s", buf.buf); -+ else -+ bch2_trans_inconsistent(trans, "%s", buf.buf); -+ -+ printbuf_exit(&buf); -+} -+ -+struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bpos bp_pos, -+ struct bch_backpointer bp, -+ unsigned iter_flags) -+{ -+ if (likely(!bp.level)) { -+ struct bch_fs *c = trans->c; -+ struct bpos bucket = bp_pos_to_bucket(c, bp_pos); -+ struct bkey_s_c k; -+ -+ bch2_trans_node_iter_init(trans, iter, -+ bp.btree_id, -+ bp.pos, -+ 0, 0, -+ iter_flags); -+ k = bch2_btree_iter_peek_slot(iter); -+ if (bkey_err(k)) { -+ bch2_trans_iter_exit(trans, iter); -+ return k; -+ } -+ -+ if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) -+ return k; -+ -+ bch2_trans_iter_exit(trans, iter); -+ backpointer_not_found(trans, bp_pos, bp, k); -+ return bkey_s_c_null; -+ } else { -+ struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); -+ -+ if (IS_ERR_OR_NULL(b)) { -+ bch2_trans_iter_exit(trans, iter); -+ return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; -+ } -+ return bkey_i_to_s_c(&b->key); -+ } -+} -+ -+struct btree *bch2_backpointer_get_node(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bpos bp_pos, -+ struct bch_backpointer bp) -+{ -+ struct bch_fs *c = trans->c; -+ struct bpos bucket = bp_pos_to_bucket(c, bp_pos); -+ struct btree *b; -+ -+ BUG_ON(!bp.level); -+ -+ bch2_trans_node_iter_init(trans, iter, -+ bp.btree_id, -+ bp.pos, -+ 0, -+ bp.level - 1, -+ 0); -+ b = bch2_btree_iter_peek_node(iter); -+ if (IS_ERR(b)) -+ goto err; -+ -+ BUG_ON(b->c.level != bp.level - 1); -+ -+ if (b && extent_matches_bp(c, bp.btree_id, bp.level, -+ bkey_i_to_s_c(&b->key), -+ bucket, bp)) -+ return b; -+ -+ if (b && btree_node_will_make_reachable(b)) { -+ b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); -+ } else { -+ backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key)); -+ b = NULL; -+ } -+err: -+ bch2_trans_iter_exit(trans, iter); -+ return b; -+} -+ -+static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter alloc_iter = { NULL }; -+ struct bkey_s_c alloc_k; -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c, -+ backpointer_to_missing_device, -+ "backpointer for missing device:\n%s", -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -+ ret = bch2_btree_delete_at(trans, bp_iter, 0); -+ goto out; -+ } -+ -+ alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, -+ bp_pos_to_bucket(c, k.k->p), 0); -+ ret = bkey_err(alloc_k); -+ if (ret) -+ goto out; -+ -+ if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c, -+ backpointer_to_missing_alloc, -+ "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", -+ alloc_iter.pos.inode, alloc_iter.pos.offset, -+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { -+ ret = bch2_btree_delete_at(trans, bp_iter, 0); -+ goto out; -+ } -+out: -+fsck_err: -+ bch2_trans_iter_exit(trans, &alloc_iter); -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+/* verify that every backpointer has a corresponding alloc key */ -+int bch2_check_btree_backpointers(struct bch_fs *c) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ ret = bch2_trans_run(c, -+ for_each_btree_key_commit(trans, iter, -+ BTREE_ID_backpointers, POS_MIN, 0, k, -+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, -+ bch2_check_btree_backpointer(trans, &iter, k))); -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+struct bpos_level { -+ unsigned level; -+ struct bpos pos; -+}; -+ -+static int check_bp_exists(struct btree_trans *trans, -+ struct bpos bucket, -+ struct bch_backpointer bp, -+ struct bkey_s_c orig_k, -+ struct bpos bucket_start, -+ struct bpos bucket_end, -+ struct bpos_level *last_flushed) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter bp_iter = { NULL }; -+ struct printbuf buf = PRINTBUF; -+ struct bkey_s_c bp_k; -+ int ret; -+ -+ if (bpos_lt(bucket, bucket_start) || -+ bpos_gt(bucket, bucket_end)) -+ return 0; -+ -+ if (!bch2_dev_bucket_exists(c, bucket)) -+ goto missing; -+ -+ bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, -+ bucket_pos_to_bp(c, bucket, bp.bucket_offset), -+ 0); -+ ret = bkey_err(bp_k); -+ if (ret) -+ goto err; -+ -+ if (bp_k.k->type != KEY_TYPE_backpointer || -+ memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { -+ if (last_flushed->level != bp.level || -+ !bpos_eq(last_flushed->pos, orig_k.k->p)) { -+ last_flushed->level = bp.level; -+ last_flushed->pos = orig_k.k->p; -+ -+ ret = bch2_btree_write_buffer_flush_sync(trans) ?: -+ -BCH_ERR_transaction_restart_write_buffer_flush; -+ goto out; -+ } -+ goto missing; -+ } -+out: -+err: -+fsck_err: -+ bch2_trans_iter_exit(trans, &bp_iter); -+ printbuf_exit(&buf); -+ return ret; -+missing: -+ prt_printf(&buf, "missing backpointer for btree=%s l=%u ", -+ bch2_btree_id_str(bp.btree_id), bp.level); -+ bch2_bkey_val_to_text(&buf, c, orig_k); -+ prt_printf(&buf, "\nbp pos "); -+ bch2_bpos_to_text(&buf, bp_iter.pos); -+ -+ if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointers || -+ c->opts.reconstruct_alloc || -+ fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf)) -+ ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true); -+ -+ goto out; -+} -+ -+static int check_extent_to_backpointers(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bpos bucket_start, -+ struct bpos bucket_end, -+ struct bpos_level *last_flushed) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_ptrs_c ptrs; -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ struct bkey_s_c k; -+ int ret; -+ -+ k = bch2_btree_iter_peek_all_levels(iter); -+ ret = bkey_err(k); -+ if (ret) -+ return ret; -+ if (!k.k) -+ return 0; -+ -+ ptrs = bch2_bkey_ptrs_c(k); -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ struct bpos bucket_pos; -+ struct bch_backpointer bp; -+ -+ if (p.ptr.cached) -+ continue; -+ -+ bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level, -+ k, p, &bucket_pos, &bp); -+ -+ ret = check_bp_exists(trans, bucket_pos, bp, k, -+ bucket_start, bucket_end, -+ last_flushed); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int check_btree_root_to_backpointers(struct btree_trans *trans, -+ enum btree_id btree_id, -+ struct bpos bucket_start, -+ struct bpos bucket_end, -+ struct bpos_level *last_flushed) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_root *r = bch2_btree_id_root(c, btree_id); -+ struct btree_iter iter; -+ struct btree *b; -+ struct bkey_s_c k; -+ struct bkey_ptrs_c ptrs; -+ struct extent_ptr_decoded p; -+ const union bch_extent_entry *entry; -+ int ret; -+ -+ bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, r->level, 0); -+ b = bch2_btree_iter_peek_node(&iter); -+ ret = PTR_ERR_OR_ZERO(b); -+ if (ret) -+ goto err; -+ -+ BUG_ON(b != btree_node_root(c, b)); -+ -+ k = bkey_i_to_s_c(&b->key); -+ ptrs = bch2_bkey_ptrs_c(k); -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ struct bpos bucket_pos; -+ struct bch_backpointer bp; -+ -+ if (p.ptr.cached) -+ continue; -+ -+ bch2_extent_ptr_to_bp(c, iter.btree_id, b->c.level + 1, -+ k, p, &bucket_pos, &bp); -+ -+ ret = check_bp_exists(trans, bucket_pos, bp, k, -+ bucket_start, bucket_end, -+ last_flushed); -+ if (ret) -+ goto err; -+ } -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp) -+{ -+ return (struct bbpos) { -+ .btree = bp.btree_id, -+ .pos = bp.pos, -+ }; -+} -+ -+static size_t btree_nodes_fit_in_ram(struct bch_fs *c) -+{ -+ struct sysinfo i; -+ u64 mem_bytes; -+ -+ si_meminfo(&i); -+ mem_bytes = i.totalram * i.mem_unit; -+ return div_u64(mem_bytes >> 1, btree_bytes(c)); -+} -+ -+static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, -+ unsigned btree_leaf_mask, -+ unsigned btree_interior_mask, -+ struct bbpos start, struct bbpos *end) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ size_t btree_nodes = btree_nodes_fit_in_ram(trans->c); -+ enum btree_id btree; -+ int ret = 0; -+ -+ for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) { -+ unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2; -+ -+ if (!((1U << btree) & btree_leaf_mask) && -+ !((1U << btree) & btree_interior_mask)) -+ continue; -+ -+ bch2_trans_node_iter_init(trans, &iter, btree, -+ btree == start.btree ? start.pos : POS_MIN, -+ 0, depth, 0); -+ /* -+ * for_each_btree_key_contineu() doesn't check the return value -+ * from bch2_btree_iter_advance(), which is needed when -+ * iterating over interior nodes where we'll see keys at -+ * SPOS_MAX: -+ */ -+ do { -+ k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0); -+ ret = bkey_err(k); -+ if (!k.k || ret) -+ break; -+ -+ --btree_nodes; -+ if (!btree_nodes) { -+ *end = BBPOS(btree, k.k->p); -+ bch2_trans_iter_exit(trans, &iter); -+ return 0; -+ } -+ } while (bch2_btree_iter_advance(&iter)); -+ bch2_trans_iter_exit(trans, &iter); -+ } -+ -+ *end = BBPOS_MAX; -+ return ret; -+} -+ -+static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, -+ struct bpos bucket_start, -+ struct bpos bucket_end) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ enum btree_id btree_id; -+ struct bpos_level last_flushed = { UINT_MAX, POS_MIN }; -+ int ret = 0; -+ -+ for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { -+ unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1; -+ -+ bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, -+ depth, -+ BTREE_ITER_ALL_LEVELS| -+ BTREE_ITER_PREFETCH); -+ -+ do { -+ ret = commit_do(trans, NULL, NULL, -+ BTREE_INSERT_LAZY_RW| -+ BTREE_INSERT_NOFAIL, -+ check_extent_to_backpointers(trans, &iter, -+ bucket_start, bucket_end, -+ &last_flushed)); -+ if (ret) -+ break; -+ } while (!bch2_btree_iter_advance(&iter)); -+ -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (ret) -+ break; -+ -+ ret = commit_do(trans, NULL, NULL, -+ BTREE_INSERT_LAZY_RW| -+ BTREE_INSERT_NOFAIL, -+ check_btree_root_to_backpointers(trans, btree_id, -+ bucket_start, bucket_end, -+ &last_flushed)); -+ if (ret) -+ break; -+ } -+ return ret; -+} -+ -+static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c, -+ struct bpos bucket) -+{ -+ return bch2_dev_exists2(c, bucket.inode) -+ ? bucket_pos_to_bp(c, bucket, 0) -+ : bucket; -+} -+ -+static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans, -+ struct bpos start, struct bpos *end) -+{ -+ struct btree_iter alloc_iter; -+ struct btree_iter bp_iter; -+ struct bkey_s_c alloc_k, bp_k; -+ size_t btree_nodes = btree_nodes_fit_in_ram(trans->c); -+ bool alloc_end = false, bp_end = false; -+ int ret = 0; -+ -+ bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc, -+ start, 0, 1, 0); -+ bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers, -+ bucket_pos_to_bp_safe(trans->c, start), 0, 1, 0); -+ while (1) { -+ alloc_k = !alloc_end -+ ? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0) -+ : bkey_s_c_null; -+ bp_k = !bp_end -+ ? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0) -+ : bkey_s_c_null; -+ -+ ret = bkey_err(alloc_k) ?: bkey_err(bp_k); -+ if ((!alloc_k.k && !bp_k.k) || ret) { -+ *end = SPOS_MAX; -+ break; -+ } -+ -+ --btree_nodes; -+ if (!btree_nodes) { -+ *end = alloc_k.k ? alloc_k.k->p : SPOS_MAX; -+ break; -+ } -+ -+ if (bpos_lt(alloc_iter.pos, SPOS_MAX) && -+ bpos_lt(bucket_pos_to_bp_safe(trans->c, alloc_iter.pos), bp_iter.pos)) { -+ if (!bch2_btree_iter_advance(&alloc_iter)) -+ alloc_end = true; -+ } else { -+ if (!bch2_btree_iter_advance(&bp_iter)) -+ bp_end = true; -+ } -+ } -+ bch2_trans_iter_exit(trans, &bp_iter); -+ bch2_trans_iter_exit(trans, &alloc_iter); -+ return ret; -+} -+ -+int bch2_check_extents_to_backpointers(struct bch_fs *c) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct bpos start = POS_MIN, end; -+ int ret; -+ -+ while (1) { -+ ret = bch2_get_alloc_in_memory_pos(trans, start, &end); -+ if (ret) -+ break; -+ -+ if (bpos_eq(start, POS_MIN) && !bpos_eq(end, SPOS_MAX)) -+ bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", -+ __func__, btree_nodes_fit_in_ram(c)); -+ -+ if (!bpos_eq(start, POS_MIN) || !bpos_eq(end, SPOS_MAX)) { -+ struct printbuf buf = PRINTBUF; -+ -+ prt_str(&buf, "check_extents_to_backpointers(): "); -+ bch2_bpos_to_text(&buf, start); -+ prt_str(&buf, "-"); -+ bch2_bpos_to_text(&buf, end); -+ -+ bch_verbose(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ } -+ -+ ret = bch2_check_extents_to_backpointers_pass(trans, start, end); -+ if (ret || bpos_eq(end, SPOS_MAX)) -+ break; -+ -+ start = bpos_successor(end); -+ } -+ bch2_trans_put(trans); -+ -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int check_one_backpointer(struct btree_trans *trans, -+ struct bbpos start, -+ struct bbpos end, -+ struct bkey_s_c_backpointer bp, -+ struct bpos *last_flushed_pos) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bbpos pos = bp_to_bbpos(*bp.v); -+ struct bkey_s_c k; -+ struct printbuf buf = PRINTBUF; -+ int ret; -+ -+ if (bbpos_cmp(pos, start) < 0 || -+ bbpos_cmp(pos, end) > 0) -+ return 0; -+ -+ k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0); -+ ret = bkey_err(k); -+ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) -+ return 0; -+ if (ret) -+ return ret; -+ -+ if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) { -+ *last_flushed_pos = bp.k->p; -+ ret = bch2_btree_write_buffer_flush_sync(trans) ?: -+ -BCH_ERR_transaction_restart_write_buffer_flush; -+ goto out; -+ } -+ -+ if (fsck_err_on(!k.k, c, -+ backpointer_to_missing_ptr, -+ "backpointer for missing %s\n %s", -+ bp.v->level ? "btree node" : "extent", -+ (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { -+ ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); -+ goto out; -+ } -+out: -+fsck_err: -+ bch2_trans_iter_exit(trans, &iter); -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, -+ struct bbpos start, -+ struct bbpos end) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bpos last_flushed_pos = SPOS_MAX; -+ -+ return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, -+ POS_MIN, BTREE_ITER_PREFETCH, k, -+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, -+ check_one_backpointer(trans, start, end, -+ bkey_s_c_to_backpointer(k), -+ &last_flushed_pos)); -+} -+ -+int bch2_check_backpointers_to_extents(struct bch_fs *c) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; -+ int ret; -+ -+ while (1) { -+ ret = bch2_get_btree_in_memory_pos(trans, -+ (1U << BTREE_ID_extents)| -+ (1U << BTREE_ID_reflink), -+ ~0, -+ start, &end); -+ if (ret) -+ break; -+ -+ if (!bbpos_cmp(start, BBPOS_MIN) && -+ bbpos_cmp(end, BBPOS_MAX)) -+ bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass", -+ __func__, btree_nodes_fit_in_ram(c)); -+ -+ if (bbpos_cmp(start, BBPOS_MIN) || -+ bbpos_cmp(end, BBPOS_MAX)) { -+ struct printbuf buf = PRINTBUF; -+ -+ prt_str(&buf, "check_backpointers_to_extents(): "); -+ bch2_bbpos_to_text(&buf, start); -+ prt_str(&buf, "-"); -+ bch2_bbpos_to_text(&buf, end); -+ -+ bch_verbose(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ } -+ -+ ret = bch2_check_backpointers_to_extents_pass(trans, start, end); -+ if (ret || !bbpos_cmp(end, BBPOS_MAX)) -+ break; -+ -+ start = bbpos_successor(end); -+ } -+ bch2_trans_put(trans); -+ -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h -new file mode 100644 -index 000000000000..ab866feeaf66 ---- /dev/null -+++ b/fs/bcachefs/backpointers.h -@@ -0,0 +1,140 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H -+#define _BCACHEFS_BACKPOINTERS_BACKGROUND_H -+ -+#include "btree_iter.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "super.h" -+ -+static inline u64 swab40(u64 x) -+{ -+ return (((x & 0x00000000ffULL) << 32)| -+ ((x & 0x000000ff00ULL) << 16)| -+ ((x & 0x0000ff0000ULL) >> 0)| -+ ((x & 0x00ff000000ULL) >> 16)| -+ ((x & 0xff00000000ULL) >> 32)); -+} -+ -+int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); -+void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+void bch2_backpointer_swab(struct bkey_s); -+ -+#define bch2_bkey_ops_backpointer ((struct bkey_ops) { \ -+ .key_invalid = bch2_backpointer_invalid, \ -+ .val_to_text = bch2_backpointer_k_to_text, \ -+ .swab = bch2_backpointer_swab, \ -+ .min_val_size = 32, \ -+}) -+ -+#define MAX_EXTENT_COMPRESS_RATIO_SHIFT 10 -+ -+/* -+ * Convert from pos in backpointer btree to pos of corresponding bucket in alloc -+ * btree: -+ */ -+static inline struct bpos bp_pos_to_bucket(const struct bch_fs *c, -+ struct bpos bp_pos) -+{ -+ struct bch_dev *ca = bch_dev_bkey_exists(c, bp_pos.inode); -+ u64 bucket_sector = bp_pos.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT; -+ -+ return POS(bp_pos.inode, sector_to_bucket(ca, bucket_sector)); -+} -+ -+/* -+ * Convert from pos in alloc btree + bucket offset to pos in backpointer btree: -+ */ -+static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c, -+ struct bpos bucket, -+ u64 bucket_offset) -+{ -+ struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode); -+ struct bpos ret; -+ -+ ret = POS(bucket.inode, -+ (bucket_to_sector(ca, bucket.offset) << -+ MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset); -+ -+ EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret))); -+ -+ return ret; -+} -+ -+int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *, struct bkey_i_backpointer *, -+ struct bch_backpointer, struct bkey_s_c, bool); -+ -+static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans, -+ struct bpos bucket, -+ struct bch_backpointer bp, -+ struct bkey_s_c orig_k, -+ bool insert) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_i_backpointer *bp_k; -+ int ret; -+ -+ bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); -+ ret = PTR_ERR_OR_ZERO(bp_k); -+ if (ret) -+ return ret; -+ -+ bkey_backpointer_init(&bp_k->k_i); -+ bp_k->k.p = bucket_pos_to_bp(c, bucket, bp.bucket_offset); -+ bp_k->v = bp; -+ -+ if (!insert) { -+ bp_k->k.type = KEY_TYPE_deleted; -+ set_bkey_val_u64s(&bp_k->k, 0); -+ } -+ -+ if (unlikely(bch2_backpointers_no_use_write_buffer)) -+ return bch2_bucket_backpointer_mod_nowritebuffer(trans, bp_k, bp, orig_k, insert); -+ -+ return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i); -+} -+ -+static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level, -+ struct bkey_s_c k, struct extent_ptr_decoded p) -+{ -+ return level ? BCH_DATA_btree : -+ p.has_ec ? BCH_DATA_stripe : -+ BCH_DATA_user; -+} -+ -+static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c k, struct extent_ptr_decoded p, -+ struct bpos *bucket_pos, struct bch_backpointer *bp) -+{ -+ enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p); -+ s64 sectors = level ? btree_sectors(c) : k.k->size; -+ u32 bucket_offset; -+ -+ *bucket_pos = PTR_BUCKET_POS_OFFSET(c, &p.ptr, &bucket_offset); -+ *bp = (struct bch_backpointer) { -+ .btree_id = btree_id, -+ .level = level, -+ .data_type = data_type, -+ .bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) + -+ p.crc.offset, -+ .bucket_len = ptr_disk_sectors(sectors, p), -+ .pos = k.k->p, -+ }; -+} -+ -+int bch2_get_next_backpointer(struct btree_trans *, struct bpos, int, -+ struct bpos *, struct bch_backpointer *, unsigned); -+struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct btree_iter *, -+ struct bpos, struct bch_backpointer, -+ unsigned); -+struct btree *bch2_backpointer_get_node(struct btree_trans *, struct btree_iter *, -+ struct bpos, struct bch_backpointer); -+ -+int bch2_check_btree_backpointers(struct bch_fs *); -+int bch2_check_extents_to_backpointers(struct bch_fs *); -+int bch2_check_backpointers_to_extents(struct bch_fs *); -+ -+#endif /* _BCACHEFS_BACKPOINTERS_BACKGROUND_H */ -diff --git a/fs/bcachefs/bbpos.h b/fs/bcachefs/bbpos.h -new file mode 100644 -index 000000000000..be2edced5213 ---- /dev/null -+++ b/fs/bcachefs/bbpos.h -@@ -0,0 +1,37 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BBPOS_H -+#define _BCACHEFS_BBPOS_H -+ -+#include "bbpos_types.h" -+#include "bkey_methods.h" -+#include "btree_cache.h" -+ -+static inline int bbpos_cmp(struct bbpos l, struct bbpos r) -+{ -+ return cmp_int(l.btree, r.btree) ?: bpos_cmp(l.pos, r.pos); -+} -+ -+static inline struct bbpos bbpos_successor(struct bbpos pos) -+{ -+ if (bpos_cmp(pos.pos, SPOS_MAX)) { -+ pos.pos = bpos_successor(pos.pos); -+ return pos; -+ } -+ -+ if (pos.btree != BTREE_ID_NR) { -+ pos.btree++; -+ pos.pos = POS_MIN; -+ return pos; -+ } -+ -+ BUG(); -+} -+ -+static inline void bch2_bbpos_to_text(struct printbuf *out, struct bbpos pos) -+{ -+ prt_str(out, bch2_btree_id_str(pos.btree)); -+ prt_char(out, ':'); -+ bch2_bpos_to_text(out, pos.pos); -+} -+ -+#endif /* _BCACHEFS_BBPOS_H */ -diff --git a/fs/bcachefs/bbpos_types.h b/fs/bcachefs/bbpos_types.h -new file mode 100644 -index 000000000000..5198e94cf3b8 ---- /dev/null -+++ b/fs/bcachefs/bbpos_types.h -@@ -0,0 +1,18 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BBPOS_TYPES_H -+#define _BCACHEFS_BBPOS_TYPES_H -+ -+struct bbpos { -+ enum btree_id btree; -+ struct bpos pos; -+}; -+ -+static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos) -+{ -+ return (struct bbpos) { btree, pos }; -+} -+ -+#define BBPOS_MIN BBPOS(0, POS_MIN) -+#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, POS_MAX) -+ -+#endif /* _BCACHEFS_BBPOS_TYPES_H */ -diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h -new file mode 100644 -index 000000000000..9cb8684959ee ---- /dev/null -+++ b/fs/bcachefs/bcachefs.h -@@ -0,0 +1,1161 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_H -+#define _BCACHEFS_H -+ -+/* -+ * SOME HIGH LEVEL CODE DOCUMENTATION: -+ * -+ * Bcache mostly works with cache sets, cache devices, and backing devices. -+ * -+ * Support for multiple cache devices hasn't quite been finished off yet, but -+ * it's about 95% plumbed through. A cache set and its cache devices is sort of -+ * like a md raid array and its component devices. Most of the code doesn't care -+ * about individual cache devices, the main abstraction is the cache set. -+ * -+ * Multiple cache devices is intended to give us the ability to mirror dirty -+ * cached data and metadata, without mirroring clean cached data. -+ * -+ * Backing devices are different, in that they have a lifetime independent of a -+ * cache set. When you register a newly formatted backing device it'll come up -+ * in passthrough mode, and then you can attach and detach a backing device from -+ * a cache set at runtime - while it's mounted and in use. Detaching implicitly -+ * invalidates any cached data for that backing device. -+ * -+ * A cache set can have multiple (many) backing devices attached to it. -+ * -+ * There's also flash only volumes - this is the reason for the distinction -+ * between struct cached_dev and struct bcache_device. A flash only volume -+ * works much like a bcache device that has a backing device, except the -+ * "cached" data is always dirty. The end result is that we get thin -+ * provisioning with very little additional code. -+ * -+ * Flash only volumes work but they're not production ready because the moving -+ * garbage collector needs more work. More on that later. -+ * -+ * BUCKETS/ALLOCATION: -+ * -+ * Bcache is primarily designed for caching, which means that in normal -+ * operation all of our available space will be allocated. Thus, we need an -+ * efficient way of deleting things from the cache so we can write new things to -+ * it. -+ * -+ * To do this, we first divide the cache device up into buckets. A bucket is the -+ * unit of allocation; they're typically around 1 mb - anywhere from 128k to 2M+ -+ * works efficiently. -+ * -+ * Each bucket has a 16 bit priority, and an 8 bit generation associated with -+ * it. The gens and priorities for all the buckets are stored contiguously and -+ * packed on disk (in a linked list of buckets - aside from the superblock, all -+ * of bcache's metadata is stored in buckets). -+ * -+ * The priority is used to implement an LRU. We reset a bucket's priority when -+ * we allocate it or on cache it, and every so often we decrement the priority -+ * of each bucket. It could be used to implement something more sophisticated, -+ * if anyone ever gets around to it. -+ * -+ * The generation is used for invalidating buckets. Each pointer also has an 8 -+ * bit generation embedded in it; for a pointer to be considered valid, its gen -+ * must match the gen of the bucket it points into. Thus, to reuse a bucket all -+ * we have to do is increment its gen (and write its new gen to disk; we batch -+ * this up). -+ * -+ * Bcache is entirely COW - we never write twice to a bucket, even buckets that -+ * contain metadata (including btree nodes). -+ * -+ * THE BTREE: -+ * -+ * Bcache is in large part design around the btree. -+ * -+ * At a high level, the btree is just an index of key -> ptr tuples. -+ * -+ * Keys represent extents, and thus have a size field. Keys also have a variable -+ * number of pointers attached to them (potentially zero, which is handy for -+ * invalidating the cache). -+ * -+ * The key itself is an inode:offset pair. The inode number corresponds to a -+ * backing device or a flash only volume. The offset is the ending offset of the -+ * extent within the inode - not the starting offset; this makes lookups -+ * slightly more convenient. -+ * -+ * Pointers contain the cache device id, the offset on that device, and an 8 bit -+ * generation number. More on the gen later. -+ * -+ * Index lookups are not fully abstracted - cache lookups in particular are -+ * still somewhat mixed in with the btree code, but things are headed in that -+ * direction. -+ * -+ * Updates are fairly well abstracted, though. There are two different ways of -+ * updating the btree; insert and replace. -+ * -+ * BTREE_INSERT will just take a list of keys and insert them into the btree - -+ * overwriting (possibly only partially) any extents they overlap with. This is -+ * used to update the index after a write. -+ * -+ * BTREE_REPLACE is really cmpxchg(); it inserts a key into the btree iff it is -+ * overwriting a key that matches another given key. This is used for inserting -+ * data into the cache after a cache miss, and for background writeback, and for -+ * the moving garbage collector. -+ * -+ * There is no "delete" operation; deleting things from the index is -+ * accomplished by either by invalidating pointers (by incrementing a bucket's -+ * gen) or by inserting a key with 0 pointers - which will overwrite anything -+ * previously present at that location in the index. -+ * -+ * This means that there are always stale/invalid keys in the btree. They're -+ * filtered out by the code that iterates through a btree node, and removed when -+ * a btree node is rewritten. -+ * -+ * BTREE NODES: -+ * -+ * Our unit of allocation is a bucket, and we can't arbitrarily allocate and -+ * free smaller than a bucket - so, that's how big our btree nodes are. -+ * -+ * (If buckets are really big we'll only use part of the bucket for a btree node -+ * - no less than 1/4th - but a bucket still contains no more than a single -+ * btree node. I'd actually like to change this, but for now we rely on the -+ * bucket's gen for deleting btree nodes when we rewrite/split a node.) -+ * -+ * Anyways, btree nodes are big - big enough to be inefficient with a textbook -+ * btree implementation. -+ * -+ * The way this is solved is that btree nodes are internally log structured; we -+ * can append new keys to an existing btree node without rewriting it. This -+ * means each set of keys we write is sorted, but the node is not. -+ * -+ * We maintain this log structure in memory - keeping 1Mb of keys sorted would -+ * be expensive, and we have to distinguish between the keys we have written and -+ * the keys we haven't. So to do a lookup in a btree node, we have to search -+ * each sorted set. But we do merge written sets together lazily, so the cost of -+ * these extra searches is quite low (normally most of the keys in a btree node -+ * will be in one big set, and then there'll be one or two sets that are much -+ * smaller). -+ * -+ * This log structure makes bcache's btree more of a hybrid between a -+ * conventional btree and a compacting data structure, with some of the -+ * advantages of both. -+ * -+ * GARBAGE COLLECTION: -+ * -+ * We can't just invalidate any bucket - it might contain dirty data or -+ * metadata. If it once contained dirty data, other writes might overwrite it -+ * later, leaving no valid pointers into that bucket in the index. -+ * -+ * Thus, the primary purpose of garbage collection is to find buckets to reuse. -+ * It also counts how much valid data it each bucket currently contains, so that -+ * allocation can reuse buckets sooner when they've been mostly overwritten. -+ * -+ * It also does some things that are really internal to the btree -+ * implementation. If a btree node contains pointers that are stale by more than -+ * some threshold, it rewrites the btree node to avoid the bucket's generation -+ * wrapping around. It also merges adjacent btree nodes if they're empty enough. -+ * -+ * THE JOURNAL: -+ * -+ * Bcache's journal is not necessary for consistency; we always strictly -+ * order metadata writes so that the btree and everything else is consistent on -+ * disk in the event of an unclean shutdown, and in fact bcache had writeback -+ * caching (with recovery from unclean shutdown) before journalling was -+ * implemented. -+ * -+ * Rather, the journal is purely a performance optimization; we can't complete a -+ * write until we've updated the index on disk, otherwise the cache would be -+ * inconsistent in the event of an unclean shutdown. This means that without the -+ * journal, on random write workloads we constantly have to update all the leaf -+ * nodes in the btree, and those writes will be mostly empty (appending at most -+ * a few keys each) - highly inefficient in terms of amount of metadata writes, -+ * and it puts more strain on the various btree resorting/compacting code. -+ * -+ * The journal is just a log of keys we've inserted; on startup we just reinsert -+ * all the keys in the open journal entries. That means that when we're updating -+ * a node in the btree, we can wait until a 4k block of keys fills up before -+ * writing them out. -+ * -+ * For simplicity, we only journal updates to leaf nodes; updates to parent -+ * nodes are rare enough (since our leaf nodes are huge) that it wasn't worth -+ * the complexity to deal with journalling them (in particular, journal replay) -+ * - updates to non leaf nodes just happen synchronously (see btree_split()). -+ */ -+ -+#undef pr_fmt -+#ifdef __KERNEL__ -+#define pr_fmt(fmt) "bcachefs: %s() " fmt "\n", __func__ -+#else -+#define pr_fmt(fmt) "%s() " fmt "\n", __func__ -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "bcachefs_format.h" -+#include "errcode.h" -+#include "fifo.h" -+#include "nocow_locking_types.h" -+#include "opts.h" -+#include "recovery_types.h" -+#include "sb-errors_types.h" -+#include "seqmutex.h" -+#include "util.h" -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+#define BCH_WRITE_REF_DEBUG -+#endif -+ -+#ifndef dynamic_fault -+#define dynamic_fault(...) 0 -+#endif -+ -+#define race_fault(...) dynamic_fault("bcachefs:race") -+ -+#define trace_and_count(_c, _name, ...) \ -+do { \ -+ this_cpu_inc((_c)->counters[BCH_COUNTER_##_name]); \ -+ trace_##_name(__VA_ARGS__); \ -+} while (0) -+ -+#define bch2_fs_init_fault(name) \ -+ dynamic_fault("bcachefs:bch_fs_init:" name) -+#define bch2_meta_read_fault(name) \ -+ dynamic_fault("bcachefs:meta:read:" name) -+#define bch2_meta_write_fault(name) \ -+ dynamic_fault("bcachefs:meta:write:" name) -+ -+#ifdef __KERNEL__ -+#define BCACHEFS_LOG_PREFIX -+#endif -+ -+#ifdef BCACHEFS_LOG_PREFIX -+ -+#define bch2_log_msg(_c, fmt) "bcachefs (%s): " fmt, ((_c)->name) -+#define bch2_fmt_dev(_ca, fmt) "bcachefs (%s): " fmt "\n", ((_ca)->name) -+#define bch2_fmt_dev_offset(_ca, _offset, fmt) "bcachefs (%s sector %llu): " fmt "\n", ((_ca)->name), (_offset) -+#define bch2_fmt_inum(_c, _inum, fmt) "bcachefs (%s inum %llu): " fmt "\n", ((_c)->name), (_inum) -+#define bch2_fmt_inum_offset(_c, _inum, _offset, fmt) \ -+ "bcachefs (%s inum %llu offset %llu): " fmt "\n", ((_c)->name), (_inum), (_offset) -+ -+#else -+ -+#define bch2_log_msg(_c, fmt) fmt -+#define bch2_fmt_dev(_ca, fmt) "%s: " fmt "\n", ((_ca)->name) -+#define bch2_fmt_dev_offset(_ca, _offset, fmt) "%s sector %llu: " fmt "\n", ((_ca)->name), (_offset) -+#define bch2_fmt_inum(_c, _inum, fmt) "inum %llu: " fmt "\n", (_inum) -+#define bch2_fmt_inum_offset(_c, _inum, _offset, fmt) \ -+ "inum %llu offset %llu: " fmt "\n", (_inum), (_offset) -+ -+#endif -+ -+#define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n") -+ -+#define bch_info(c, fmt, ...) \ -+ printk(KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) -+#define bch_notice(c, fmt, ...) \ -+ printk(KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__) -+#define bch_warn(c, fmt, ...) \ -+ printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) -+#define bch_warn_ratelimited(c, fmt, ...) \ -+ printk_ratelimited(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) -+ -+#define bch_err(c, fmt, ...) \ -+ printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) -+#define bch_err_dev(ca, fmt, ...) \ -+ printk(KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) -+#define bch_err_dev_offset(ca, _offset, fmt, ...) \ -+ printk(KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__) -+#define bch_err_inum(c, _inum, fmt, ...) \ -+ printk(KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__) -+#define bch_err_inum_offset(c, _inum, _offset, fmt, ...) \ -+ printk(KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) -+ -+#define bch_err_ratelimited(c, fmt, ...) \ -+ printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) -+#define bch_err_dev_ratelimited(ca, fmt, ...) \ -+ printk_ratelimited(KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) -+#define bch_err_dev_offset_ratelimited(ca, _offset, fmt, ...) \ -+ printk_ratelimited(KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__) -+#define bch_err_inum_ratelimited(c, _inum, fmt, ...) \ -+ printk_ratelimited(KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__) -+#define bch_err_inum_offset_ratelimited(c, _inum, _offset, fmt, ...) \ -+ printk_ratelimited(KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) -+ -+#define bch_err_fn(_c, _ret) \ -+do { \ -+ if (_ret && !bch2_err_matches(_ret, BCH_ERR_transaction_restart))\ -+ bch_err(_c, "%s(): error %s", __func__, bch2_err_str(_ret));\ -+} while (0) -+ -+#define bch_err_msg(_c, _ret, _msg, ...) \ -+do { \ -+ if (_ret && !bch2_err_matches(_ret, BCH_ERR_transaction_restart))\ -+ bch_err(_c, "%s(): error " _msg " %s", __func__, \ -+ ##__VA_ARGS__, bch2_err_str(_ret)); \ -+} while (0) -+ -+#define bch_verbose(c, fmt, ...) \ -+do { \ -+ if ((c)->opts.verbose) \ -+ bch_info(c, fmt, ##__VA_ARGS__); \ -+} while (0) -+ -+#define pr_verbose_init(opts, fmt, ...) \ -+do { \ -+ if (opt_get(opts, verbose)) \ -+ pr_info(fmt, ##__VA_ARGS__); \ -+} while (0) -+ -+/* Parameters that are useful for debugging, but should always be compiled in: */ -+#define BCH_DEBUG_PARAMS_ALWAYS() \ -+ BCH_DEBUG_PARAM(key_merging_disabled, \ -+ "Disables merging of extents") \ -+ BCH_DEBUG_PARAM(btree_gc_always_rewrite, \ -+ "Causes mark and sweep to compact and rewrite every " \ -+ "btree node it traverses") \ -+ BCH_DEBUG_PARAM(btree_gc_rewrite_disabled, \ -+ "Disables rewriting of btree nodes during mark and sweep")\ -+ BCH_DEBUG_PARAM(btree_shrinker_disabled, \ -+ "Disables the shrinker callback for the btree node cache")\ -+ BCH_DEBUG_PARAM(verify_btree_ondisk, \ -+ "Reread btree nodes at various points to verify the " \ -+ "mergesort in the read path against modifications " \ -+ "done in memory") \ -+ BCH_DEBUG_PARAM(verify_all_btree_replicas, \ -+ "When reading btree nodes, read all replicas and " \ -+ "compare them") \ -+ BCH_DEBUG_PARAM(backpointers_no_use_write_buffer, \ -+ "Don't use the write buffer for backpointers, enabling "\ -+ "extra runtime checks") -+ -+/* Parameters that should only be compiled in debug mode: */ -+#define BCH_DEBUG_PARAMS_DEBUG() \ -+ BCH_DEBUG_PARAM(expensive_debug_checks, \ -+ "Enables various runtime debugging checks that " \ -+ "significantly affect performance") \ -+ BCH_DEBUG_PARAM(debug_check_iterators, \ -+ "Enables extra verification for btree iterators") \ -+ BCH_DEBUG_PARAM(debug_check_btree_accounting, \ -+ "Verify btree accounting for keys within a node") \ -+ BCH_DEBUG_PARAM(journal_seq_verify, \ -+ "Store the journal sequence number in the version " \ -+ "number of every btree key, and verify that btree " \ -+ "update ordering is preserved during recovery") \ -+ BCH_DEBUG_PARAM(inject_invalid_keys, \ -+ "Store the journal sequence number in the version " \ -+ "number of every btree key, and verify that btree " \ -+ "update ordering is preserved during recovery") \ -+ BCH_DEBUG_PARAM(test_alloc_startup, \ -+ "Force allocator startup to use the slowpath where it" \ -+ "can't find enough free buckets without invalidating" \ -+ "cached data") \ -+ BCH_DEBUG_PARAM(force_reconstruct_read, \ -+ "Force reads to use the reconstruct path, when reading" \ -+ "from erasure coded extents") \ -+ BCH_DEBUG_PARAM(test_restart_gc, \ -+ "Test restarting mark and sweep gc when bucket gens change") -+ -+#define BCH_DEBUG_PARAMS_ALL() BCH_DEBUG_PARAMS_ALWAYS() BCH_DEBUG_PARAMS_DEBUG() -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALL() -+#else -+#define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALWAYS() -+#endif -+ -+#define BCH_DEBUG_PARAM(name, description) extern bool bch2_##name; -+BCH_DEBUG_PARAMS() -+#undef BCH_DEBUG_PARAM -+ -+#ifndef CONFIG_BCACHEFS_DEBUG -+#define BCH_DEBUG_PARAM(name, description) static const __maybe_unused bool bch2_##name; -+BCH_DEBUG_PARAMS_DEBUG() -+#undef BCH_DEBUG_PARAM -+#endif -+ -+#define BCH_TIME_STATS() \ -+ x(btree_node_mem_alloc) \ -+ x(btree_node_split) \ -+ x(btree_node_compact) \ -+ x(btree_node_merge) \ -+ x(btree_node_sort) \ -+ x(btree_node_read) \ -+ x(btree_interior_update_foreground) \ -+ x(btree_interior_update_total) \ -+ x(btree_gc) \ -+ x(data_write) \ -+ x(data_read) \ -+ x(data_promote) \ -+ x(journal_flush_write) \ -+ x(journal_noflush_write) \ -+ x(journal_flush_seq) \ -+ x(blocked_journal) \ -+ x(blocked_allocate) \ -+ x(blocked_allocate_open_bucket) \ -+ x(nocow_lock_contended) -+ -+enum bch_time_stats { -+#define x(name) BCH_TIME_##name, -+ BCH_TIME_STATS() -+#undef x -+ BCH_TIME_STAT_NR -+}; -+ -+#include "alloc_types.h" -+#include "btree_types.h" -+#include "btree_write_buffer_types.h" -+#include "buckets_types.h" -+#include "buckets_waiting_for_journal_types.h" -+#include "clock_types.h" -+#include "disk_groups_types.h" -+#include "ec_types.h" -+#include "journal_types.h" -+#include "keylist_types.h" -+#include "quota_types.h" -+#include "rebalance_types.h" -+#include "replicas_types.h" -+#include "subvolume_types.h" -+#include "super_types.h" -+ -+/* Number of nodes btree coalesce will try to coalesce at once */ -+#define GC_MERGE_NODES 4U -+ -+/* Maximum number of nodes we might need to allocate atomically: */ -+#define BTREE_RESERVE_MAX (BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1)) -+ -+/* Size of the freelist we allocate btree nodes from: */ -+#define BTREE_NODE_RESERVE (BTREE_RESERVE_MAX * 4) -+ -+#define BTREE_NODE_OPEN_BUCKET_RESERVE (BTREE_RESERVE_MAX * BCH_REPLICAS_MAX) -+ -+struct btree; -+ -+enum gc_phase { -+ GC_PHASE_NOT_RUNNING, -+ GC_PHASE_START, -+ GC_PHASE_SB, -+ -+ GC_PHASE_BTREE_stripes, -+ GC_PHASE_BTREE_extents, -+ GC_PHASE_BTREE_inodes, -+ GC_PHASE_BTREE_dirents, -+ GC_PHASE_BTREE_xattrs, -+ GC_PHASE_BTREE_alloc, -+ GC_PHASE_BTREE_quotas, -+ GC_PHASE_BTREE_reflink, -+ GC_PHASE_BTREE_subvolumes, -+ GC_PHASE_BTREE_snapshots, -+ GC_PHASE_BTREE_lru, -+ GC_PHASE_BTREE_freespace, -+ GC_PHASE_BTREE_need_discard, -+ GC_PHASE_BTREE_backpointers, -+ GC_PHASE_BTREE_bucket_gens, -+ GC_PHASE_BTREE_snapshot_trees, -+ GC_PHASE_BTREE_deleted_inodes, -+ GC_PHASE_BTREE_logged_ops, -+ GC_PHASE_BTREE_rebalance_work, -+ -+ GC_PHASE_PENDING_DELETE, -+}; -+ -+struct gc_pos { -+ enum gc_phase phase; -+ struct bpos pos; -+ unsigned level; -+}; -+ -+struct reflink_gc { -+ u64 offset; -+ u32 size; -+ u32 refcount; -+}; -+ -+typedef GENRADIX(struct reflink_gc) reflink_gc_table; -+ -+struct io_count { -+ u64 sectors[2][BCH_DATA_NR]; -+}; -+ -+struct bch_dev { -+ struct kobject kobj; -+ struct percpu_ref ref; -+ struct completion ref_completion; -+ struct percpu_ref io_ref; -+ struct completion io_ref_completion; -+ -+ struct bch_fs *fs; -+ -+ u8 dev_idx; -+ /* -+ * Cached version of this device's member info from superblock -+ * Committed by bch2_write_super() -> bch_fs_mi_update() -+ */ -+ struct bch_member_cpu mi; -+ atomic64_t errors[BCH_MEMBER_ERROR_NR]; -+ -+ __uuid_t uuid; -+ char name[BDEVNAME_SIZE]; -+ -+ struct bch_sb_handle disk_sb; -+ struct bch_sb *sb_read_scratch; -+ int sb_write_error; -+ dev_t dev; -+ atomic_t flush_seq; -+ -+ struct bch_devs_mask self; -+ -+ /* biosets used in cloned bios for writing multiple replicas */ -+ struct bio_set replica_set; -+ -+ /* -+ * Buckets: -+ * Per-bucket arrays are protected by c->mark_lock, bucket_lock and -+ * gc_lock, for device resize - holding any is sufficient for access: -+ * Or rcu_read_lock(), but only for ptr_stale(): -+ */ -+ struct bucket_array __rcu *buckets_gc; -+ struct bucket_gens __rcu *bucket_gens; -+ u8 *oldest_gen; -+ unsigned long *buckets_nouse; -+ struct rw_semaphore bucket_lock; -+ -+ struct bch_dev_usage *usage_base; -+ struct bch_dev_usage __percpu *usage[JOURNAL_BUF_NR]; -+ struct bch_dev_usage __percpu *usage_gc; -+ -+ /* Allocator: */ -+ u64 new_fs_bucket_idx; -+ u64 alloc_cursor; -+ -+ unsigned nr_open_buckets; -+ unsigned nr_btree_reserve; -+ -+ size_t inc_gen_needs_gc; -+ size_t inc_gen_really_needs_gc; -+ size_t buckets_waiting_on_journal; -+ -+ atomic64_t rebalance_work; -+ -+ struct journal_device journal; -+ u64 prev_journal_sector; -+ -+ struct work_struct io_error_work; -+ -+ /* The rest of this all shows up in sysfs */ -+ atomic64_t cur_latency[2]; -+ struct bch2_time_stats io_latency[2]; -+ -+#define CONGESTED_MAX 1024 -+ atomic_t congested; -+ u64 congested_last; -+ -+ struct io_count __percpu *io_done; -+}; -+ -+enum { -+ /* startup: */ -+ BCH_FS_STARTED, -+ BCH_FS_MAY_GO_RW, -+ BCH_FS_RW, -+ BCH_FS_WAS_RW, -+ -+ /* shutdown: */ -+ BCH_FS_STOPPING, -+ BCH_FS_EMERGENCY_RO, -+ BCH_FS_GOING_RO, -+ BCH_FS_WRITE_DISABLE_COMPLETE, -+ BCH_FS_CLEAN_SHUTDOWN, -+ -+ /* fsck passes: */ -+ BCH_FS_FSCK_DONE, -+ BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */ -+ BCH_FS_NEED_ANOTHER_GC, -+ -+ BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, -+ -+ /* errors: */ -+ BCH_FS_ERROR, -+ BCH_FS_TOPOLOGY_ERROR, -+ BCH_FS_ERRORS_FIXED, -+ BCH_FS_ERRORS_NOT_FIXED, -+}; -+ -+struct btree_debug { -+ unsigned id; -+}; -+ -+#define BCH_TRANSACTIONS_NR 128 -+ -+struct btree_transaction_stats { -+ struct bch2_time_stats lock_hold_times; -+ struct mutex lock; -+ unsigned nr_max_paths; -+ unsigned wb_updates_size; -+ unsigned max_mem; -+ char *max_paths_text; -+}; -+ -+struct bch_fs_pcpu { -+ u64 sectors_available; -+}; -+ -+struct journal_seq_blacklist_table { -+ size_t nr; -+ struct journal_seq_blacklist_table_entry { -+ u64 start; -+ u64 end; -+ bool dirty; -+ } entries[0]; -+}; -+ -+struct journal_keys { -+ struct journal_key { -+ u64 journal_seq; -+ u32 journal_offset; -+ enum btree_id btree_id:8; -+ unsigned level:8; -+ bool allocated; -+ bool overwritten; -+ struct bkey_i *k; -+ } *d; -+ /* -+ * Gap buffer: instead of all the empty space in the array being at the -+ * end of the buffer - from @nr to @size - the empty space is at @gap. -+ * This means that sequential insertions are O(n) instead of O(n^2). -+ */ -+ size_t gap; -+ size_t nr; -+ size_t size; -+}; -+ -+struct btree_trans_buf { -+ struct btree_trans *trans; -+}; -+ -+#define REPLICAS_DELTA_LIST_MAX (1U << 16) -+ -+#define BCACHEFS_ROOT_SUBVOL_INUM \ -+ ((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO }) -+ -+#define BCH_WRITE_REFS() \ -+ x(trans) \ -+ x(write) \ -+ x(promote) \ -+ x(node_rewrite) \ -+ x(stripe_create) \ -+ x(stripe_delete) \ -+ x(reflink) \ -+ x(fallocate) \ -+ x(discard) \ -+ x(invalidate) \ -+ x(delete_dead_snapshots) \ -+ x(snapshot_delete_pagecache) \ -+ x(sysfs) -+ -+enum bch_write_ref { -+#define x(n) BCH_WRITE_REF_##n, -+ BCH_WRITE_REFS() -+#undef x -+ BCH_WRITE_REF_NR, -+}; -+ -+struct bch_fs { -+ struct closure cl; -+ -+ struct list_head list; -+ struct kobject kobj; -+ struct kobject counters_kobj; -+ struct kobject internal; -+ struct kobject opts_dir; -+ struct kobject time_stats; -+ unsigned long flags; -+ -+ int minor; -+ struct device *chardev; -+ struct super_block *vfs_sb; -+ dev_t dev; -+ char name[40]; -+ -+ /* ro/rw, add/remove/resize devices: */ -+ struct rw_semaphore state_lock; -+ -+ /* Counts outstanding writes, for clean transition to read-only */ -+#ifdef BCH_WRITE_REF_DEBUG -+ atomic_long_t writes[BCH_WRITE_REF_NR]; -+#else -+ struct percpu_ref writes; -+#endif -+ struct work_struct read_only_work; -+ -+ struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX]; -+ -+ struct bch_replicas_cpu replicas; -+ struct bch_replicas_cpu replicas_gc; -+ struct mutex replicas_gc_lock; -+ mempool_t replicas_delta_pool; -+ -+ struct journal_entry_res btree_root_journal_res; -+ struct journal_entry_res replicas_journal_res; -+ struct journal_entry_res clock_journal_res; -+ struct journal_entry_res dev_usage_journal_res; -+ -+ struct bch_disk_groups_cpu __rcu *disk_groups; -+ -+ struct bch_opts opts; -+ -+ /* Updated by bch2_sb_update():*/ -+ struct { -+ __uuid_t uuid; -+ __uuid_t user_uuid; -+ -+ u16 version; -+ u16 version_min; -+ u16 version_upgrade_complete; -+ -+ u8 nr_devices; -+ u8 clean; -+ -+ u8 encryption_type; -+ -+ u64 time_base_lo; -+ u32 time_base_hi; -+ unsigned time_units_per_sec; -+ unsigned nsec_per_time_unit; -+ u64 features; -+ u64 compat; -+ } sb; -+ -+ -+ struct bch_sb_handle disk_sb; -+ -+ unsigned short block_bits; /* ilog2(block_size) */ -+ -+ u16 btree_foreground_merge_threshold; -+ -+ struct closure sb_write; -+ struct mutex sb_lock; -+ -+ /* snapshot.c: */ -+ struct snapshot_table __rcu *snapshots; -+ size_t snapshot_table_size; -+ struct mutex snapshot_table_lock; -+ struct rw_semaphore snapshot_create_lock; -+ -+ struct work_struct snapshot_delete_work; -+ struct work_struct snapshot_wait_for_pagecache_and_delete_work; -+ snapshot_id_list snapshots_unlinked; -+ struct mutex snapshots_unlinked_lock; -+ -+ /* BTREE CACHE */ -+ struct bio_set btree_bio; -+ struct workqueue_struct *io_complete_wq; -+ -+ struct btree_root btree_roots_known[BTREE_ID_NR]; -+ DARRAY(struct btree_root) btree_roots_extra; -+ struct mutex btree_root_lock; -+ -+ struct btree_cache btree_cache; -+ -+ /* -+ * Cache of allocated btree nodes - if we allocate a btree node and -+ * don't use it, if we free it that space can't be reused until going -+ * _all_ the way through the allocator (which exposes us to a livelock -+ * when allocating btree reserves fail halfway through) - instead, we -+ * can stick them here: -+ */ -+ struct btree_alloc btree_reserve_cache[BTREE_NODE_RESERVE * 2]; -+ unsigned btree_reserve_cache_nr; -+ struct mutex btree_reserve_cache_lock; -+ -+ mempool_t btree_interior_update_pool; -+ struct list_head btree_interior_update_list; -+ struct list_head btree_interior_updates_unwritten; -+ struct mutex btree_interior_update_lock; -+ struct closure_waitlist btree_interior_update_wait; -+ -+ struct workqueue_struct *btree_interior_update_worker; -+ struct work_struct btree_interior_update_work; -+ -+ struct list_head pending_node_rewrites; -+ struct mutex pending_node_rewrites_lock; -+ -+ /* btree_io.c: */ -+ spinlock_t btree_write_error_lock; -+ struct btree_write_stats { -+ atomic64_t nr; -+ atomic64_t bytes; -+ } btree_write_stats[BTREE_WRITE_TYPE_NR]; -+ -+ /* btree_iter.c: */ -+ struct seqmutex btree_trans_lock; -+ struct list_head btree_trans_list; -+ mempool_t btree_trans_pool; -+ mempool_t btree_trans_mem_pool; -+ struct btree_trans_buf __percpu *btree_trans_bufs; -+ -+ struct srcu_struct btree_trans_barrier; -+ bool btree_trans_barrier_initialized; -+ -+ struct btree_key_cache btree_key_cache; -+ unsigned btree_key_cache_btrees; -+ -+ struct btree_write_buffer btree_write_buffer; -+ -+ struct workqueue_struct *btree_update_wq; -+ struct workqueue_struct *btree_io_complete_wq; -+ /* copygc needs its own workqueue for index updates.. */ -+ struct workqueue_struct *copygc_wq; -+ /* -+ * Use a dedicated wq for write ref holder tasks. Required to avoid -+ * dependency problems with other wq tasks that can block on ref -+ * draining, such as read-only transition. -+ */ -+ struct workqueue_struct *write_ref_wq; -+ -+ /* ALLOCATION */ -+ struct bch_devs_mask rw_devs[BCH_DATA_NR]; -+ -+ u64 capacity; /* sectors */ -+ -+ /* -+ * When capacity _decreases_ (due to a disk being removed), we -+ * increment capacity_gen - this invalidates outstanding reservations -+ * and forces them to be revalidated -+ */ -+ u32 capacity_gen; -+ unsigned bucket_size_max; -+ -+ atomic64_t sectors_available; -+ struct mutex sectors_available_lock; -+ -+ struct bch_fs_pcpu __percpu *pcpu; -+ -+ struct percpu_rw_semaphore mark_lock; -+ -+ seqcount_t usage_lock; -+ struct bch_fs_usage *usage_base; -+ struct bch_fs_usage __percpu *usage[JOURNAL_BUF_NR]; -+ struct bch_fs_usage __percpu *usage_gc; -+ u64 __percpu *online_reserved; -+ -+ /* single element mempool: */ -+ struct mutex usage_scratch_lock; -+ struct bch_fs_usage_online *usage_scratch; -+ -+ struct io_clock io_clock[2]; -+ -+ /* JOURNAL SEQ BLACKLIST */ -+ struct journal_seq_blacklist_table * -+ journal_seq_blacklist_table; -+ struct work_struct journal_seq_blacklist_gc_work; -+ -+ /* ALLOCATOR */ -+ spinlock_t freelist_lock; -+ struct closure_waitlist freelist_wait; -+ u64 blocked_allocate; -+ u64 blocked_allocate_open_bucket; -+ -+ open_bucket_idx_t open_buckets_freelist; -+ open_bucket_idx_t open_buckets_nr_free; -+ struct closure_waitlist open_buckets_wait; -+ struct open_bucket open_buckets[OPEN_BUCKETS_COUNT]; -+ open_bucket_idx_t open_buckets_hash[OPEN_BUCKETS_COUNT]; -+ -+ open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT]; -+ open_bucket_idx_t open_buckets_partial_nr; -+ -+ struct write_point btree_write_point; -+ struct write_point rebalance_write_point; -+ -+ struct write_point write_points[WRITE_POINT_MAX]; -+ struct hlist_head write_points_hash[WRITE_POINT_HASH_NR]; -+ struct mutex write_points_hash_lock; -+ unsigned write_points_nr; -+ -+ struct buckets_waiting_for_journal buckets_waiting_for_journal; -+ struct work_struct discard_work; -+ struct work_struct invalidate_work; -+ -+ /* GARBAGE COLLECTION */ -+ struct task_struct *gc_thread; -+ atomic_t kick_gc; -+ unsigned long gc_count; -+ -+ enum btree_id gc_gens_btree; -+ struct bpos gc_gens_pos; -+ -+ /* -+ * Tracks GC's progress - everything in the range [ZERO_KEY..gc_cur_pos] -+ * has been marked by GC. -+ * -+ * gc_cur_phase is a superset of btree_ids (BTREE_ID_extents etc.) -+ * -+ * Protected by gc_pos_lock. Only written to by GC thread, so GC thread -+ * can read without a lock. -+ */ -+ seqcount_t gc_pos_lock; -+ struct gc_pos gc_pos; -+ -+ /* -+ * The allocation code needs gc_mark in struct bucket to be correct, but -+ * it's not while a gc is in progress. -+ */ -+ struct rw_semaphore gc_lock; -+ struct mutex gc_gens_lock; -+ -+ /* IO PATH */ -+ struct semaphore io_in_flight; -+ struct bio_set bio_read; -+ struct bio_set bio_read_split; -+ struct bio_set bio_write; -+ struct mutex bio_bounce_pages_lock; -+ mempool_t bio_bounce_pages; -+ struct bucket_nocow_lock_table -+ nocow_locks; -+ struct rhashtable promote_table; -+ -+ mempool_t compression_bounce[2]; -+ mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR]; -+ mempool_t decompress_workspace; -+ ZSTD_parameters zstd_params; -+ -+ struct crypto_shash *sha256; -+ struct crypto_sync_skcipher *chacha20; -+ struct crypto_shash *poly1305; -+ -+ atomic64_t key_version; -+ -+ mempool_t large_bkey_pool; -+ -+ /* MOVE.C */ -+ struct list_head moving_context_list; -+ struct mutex moving_context_lock; -+ -+ /* REBALANCE */ -+ struct bch_fs_rebalance rebalance; -+ -+ /* COPYGC */ -+ struct task_struct *copygc_thread; -+ struct write_point copygc_write_point; -+ s64 copygc_wait_at; -+ s64 copygc_wait; -+ bool copygc_running; -+ wait_queue_head_t copygc_running_wq; -+ -+ /* STRIPES: */ -+ GENRADIX(struct stripe) stripes; -+ GENRADIX(struct gc_stripe) gc_stripes; -+ -+ struct hlist_head ec_stripes_new[32]; -+ spinlock_t ec_stripes_new_lock; -+ -+ ec_stripes_heap ec_stripes_heap; -+ struct mutex ec_stripes_heap_lock; -+ -+ /* ERASURE CODING */ -+ struct list_head ec_stripe_head_list; -+ struct mutex ec_stripe_head_lock; -+ -+ struct list_head ec_stripe_new_list; -+ struct mutex ec_stripe_new_lock; -+ wait_queue_head_t ec_stripe_new_wait; -+ -+ struct work_struct ec_stripe_create_work; -+ u64 ec_stripe_hint; -+ -+ struct work_struct ec_stripe_delete_work; -+ -+ struct bio_set ec_bioset; -+ -+ /* REFLINK */ -+ reflink_gc_table reflink_gc_table; -+ size_t reflink_gc_nr; -+ -+ /* fs.c */ -+ struct list_head vfs_inodes_list; -+ struct mutex vfs_inodes_lock; -+ -+ /* VFS IO PATH - fs-io.c */ -+ struct bio_set writepage_bioset; -+ struct bio_set dio_write_bioset; -+ struct bio_set dio_read_bioset; -+ struct bio_set nocow_flush_bioset; -+ -+ /* QUOTAS */ -+ struct bch_memquota_type quotas[QTYP_NR]; -+ -+ /* RECOVERY */ -+ u64 journal_replay_seq_start; -+ u64 journal_replay_seq_end; -+ enum bch_recovery_pass curr_recovery_pass; -+ /* bitmap of explicitly enabled recovery passes: */ -+ u64 recovery_passes_explicit; -+ u64 recovery_passes_complete; -+ -+ /* DEBUG JUNK */ -+ struct dentry *fs_debug_dir; -+ struct dentry *btree_debug_dir; -+ struct btree_debug btree_debug[BTREE_ID_NR]; -+ struct btree *verify_data; -+ struct btree_node *verify_ondisk; -+ struct mutex verify_lock; -+ -+ u64 *unused_inode_hints; -+ unsigned inode_shard_bits; -+ -+ /* -+ * A btree node on disk could have too many bsets for an iterator to fit -+ * on the stack - have to dynamically allocate them -+ */ -+ mempool_t fill_iter; -+ -+ mempool_t btree_bounce_pool; -+ -+ struct journal journal; -+ GENRADIX(struct journal_replay *) journal_entries; -+ u64 journal_entries_base_seq; -+ struct journal_keys journal_keys; -+ struct list_head journal_iters; -+ -+ u64 last_bucket_seq_cleanup; -+ -+ u64 counters_on_mount[BCH_COUNTER_NR]; -+ u64 __percpu *counters; -+ -+ unsigned btree_gc_periodic:1; -+ unsigned copy_gc_enabled:1; -+ bool promote_whole_extents; -+ -+ struct bch2_time_stats times[BCH_TIME_STAT_NR]; -+ -+ struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR]; -+ -+ /* ERRORS */ -+ struct list_head fsck_error_msgs; -+ struct mutex fsck_error_msgs_lock; -+ bool fsck_alloc_msgs_err; -+ -+ bch_sb_errors_cpu fsck_error_counts; -+ struct mutex fsck_error_counts_lock; -+}; -+ -+extern struct wait_queue_head bch2_read_only_wait; -+ -+static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref) -+{ -+#ifdef BCH_WRITE_REF_DEBUG -+ atomic_long_inc(&c->writes[ref]); -+#else -+ percpu_ref_get(&c->writes); -+#endif -+} -+ -+static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref) -+{ -+#ifdef BCH_WRITE_REF_DEBUG -+ return !test_bit(BCH_FS_GOING_RO, &c->flags) && -+ atomic_long_inc_not_zero(&c->writes[ref]); -+#else -+ return percpu_ref_tryget_live(&c->writes); -+#endif -+} -+ -+static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref ref) -+{ -+#ifdef BCH_WRITE_REF_DEBUG -+ long v = atomic_long_dec_return(&c->writes[ref]); -+ -+ BUG_ON(v < 0); -+ if (v) -+ return; -+ for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++) -+ if (atomic_long_read(&c->writes[i])) -+ return; -+ -+ set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); -+ wake_up(&bch2_read_only_wait); -+#else -+ percpu_ref_put(&c->writes); -+#endif -+} -+ -+static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages) -+{ -+#ifndef NO_BCACHEFS_FS -+ if (c->vfs_sb) -+ c->vfs_sb->s_bdi->ra_pages = ra_pages; -+#endif -+} -+ -+static inline unsigned bucket_bytes(const struct bch_dev *ca) -+{ -+ return ca->mi.bucket_size << 9; -+} -+ -+static inline unsigned block_bytes(const struct bch_fs *c) -+{ -+ return c->opts.block_size; -+} -+ -+static inline unsigned block_sectors(const struct bch_fs *c) -+{ -+ return c->opts.block_size >> 9; -+} -+ -+static inline size_t btree_sectors(const struct bch_fs *c) -+{ -+ return c->opts.btree_node_size >> 9; -+} -+ -+static inline bool btree_id_cached(const struct bch_fs *c, enum btree_id btree) -+{ -+ return c->btree_key_cache_btrees & (1U << btree); -+} -+ -+static inline struct timespec64 bch2_time_to_timespec(const struct bch_fs *c, s64 time) -+{ -+ struct timespec64 t; -+ s32 rem; -+ -+ time += c->sb.time_base_lo; -+ -+ t.tv_sec = div_s64_rem(time, c->sb.time_units_per_sec, &rem); -+ t.tv_nsec = rem * c->sb.nsec_per_time_unit; -+ return t; -+} -+ -+static inline s64 timespec_to_bch2_time(const struct bch_fs *c, struct timespec64 ts) -+{ -+ return (ts.tv_sec * c->sb.time_units_per_sec + -+ (int) ts.tv_nsec / c->sb.nsec_per_time_unit) - c->sb.time_base_lo; -+} -+ -+static inline s64 bch2_current_time(const struct bch_fs *c) -+{ -+ struct timespec64 now; -+ -+ ktime_get_coarse_real_ts64(&now); -+ return timespec_to_bch2_time(c, now); -+} -+ -+static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev) -+{ -+ return dev < c->sb.nr_devices && c->devs[dev]; -+} -+ -+#define BKEY_PADDED_ONSTACK(key, pad) \ -+ struct { struct bkey_i key; __u64 key ## _pad[pad]; } -+ -+#endif /* _BCACHEFS_H */ -diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h -new file mode 100644 -index 000000000000..0a750953ff92 ---- /dev/null -+++ b/fs/bcachefs/bcachefs_format.h -@@ -0,0 +1,2425 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_FORMAT_H -+#define _BCACHEFS_FORMAT_H -+ -+/* -+ * bcachefs on disk data structures -+ * -+ * OVERVIEW: -+ * -+ * There are three main types of on disk data structures in bcachefs (this is -+ * reduced from 5 in bcache) -+ * -+ * - superblock -+ * - journal -+ * - btree -+ * -+ * The btree is the primary structure; most metadata exists as keys in the -+ * various btrees. There are only a small number of btrees, they're not -+ * sharded - we have one btree for extents, another for inodes, et cetera. -+ * -+ * SUPERBLOCK: -+ * -+ * The superblock contains the location of the journal, the list of devices in -+ * the filesystem, and in general any metadata we need in order to decide -+ * whether we can start a filesystem or prior to reading the journal/btree -+ * roots. -+ * -+ * The superblock is extensible, and most of the contents of the superblock are -+ * in variable length, type tagged fields; see struct bch_sb_field. -+ * -+ * Backup superblocks do not reside in a fixed location; also, superblocks do -+ * not have a fixed size. To locate backup superblocks we have struct -+ * bch_sb_layout; we store a copy of this inside every superblock, and also -+ * before the first superblock. -+ * -+ * JOURNAL: -+ * -+ * The journal primarily records btree updates in the order they occurred; -+ * journal replay consists of just iterating over all the keys in the open -+ * journal entries and re-inserting them into the btrees. -+ * -+ * The journal also contains entry types for the btree roots, and blacklisted -+ * journal sequence numbers (see journal_seq_blacklist.c). -+ * -+ * BTREE: -+ * -+ * bcachefs btrees are copy on write b+ trees, where nodes are big (typically -+ * 128k-256k) and log structured. We use struct btree_node for writing the first -+ * entry in a given node (offset 0), and struct btree_node_entry for all -+ * subsequent writes. -+ * -+ * After the header, btree node entries contain a list of keys in sorted order. -+ * Values are stored inline with the keys; since values are variable length (and -+ * keys effectively are variable length too, due to packing) we can't do random -+ * access without building up additional in memory tables in the btree node read -+ * path. -+ * -+ * BTREE KEYS (struct bkey): -+ * -+ * The various btrees share a common format for the key - so as to avoid -+ * switching in fastpath lookup/comparison code - but define their own -+ * structures for the key values. -+ * -+ * The size of a key/value pair is stored as a u8 in units of u64s, so the max -+ * size is just under 2k. The common part also contains a type tag for the -+ * value, and a format field indicating whether the key is packed or not (and -+ * also meant to allow adding new key fields in the future, if desired). -+ * -+ * bkeys, when stored within a btree node, may also be packed. In that case, the -+ * bkey_format in that node is used to unpack it. Packed bkeys mean that we can -+ * be generous with field sizes in the common part of the key format (64 bit -+ * inode number, 64 bit offset, 96 bit version field, etc.) for negligible cost. -+ */ -+ -+#include -+#include -+#include -+#include -+#include "vstructs.h" -+ -+#ifdef __KERNEL__ -+typedef uuid_t __uuid_t; -+#endif -+ -+#define BITMASK(name, type, field, offset, end) \ -+static const __maybe_unused unsigned name##_OFFSET = offset; \ -+static const __maybe_unused unsigned name##_BITS = (end - offset); \ -+ \ -+static inline __u64 name(const type *k) \ -+{ \ -+ return (k->field >> offset) & ~(~0ULL << (end - offset)); \ -+} \ -+ \ -+static inline void SET_##name(type *k, __u64 v) \ -+{ \ -+ k->field &= ~(~(~0ULL << (end - offset)) << offset); \ -+ k->field |= (v & ~(~0ULL << (end - offset))) << offset; \ -+} -+ -+#define LE_BITMASK(_bits, name, type, field, offset, end) \ -+static const __maybe_unused unsigned name##_OFFSET = offset; \ -+static const __maybe_unused unsigned name##_BITS = (end - offset); \ -+static const __maybe_unused __u##_bits name##_MAX = (1ULL << (end - offset)) - 1;\ -+ \ -+static inline __u64 name(const type *k) \ -+{ \ -+ return (__le##_bits##_to_cpu(k->field) >> offset) & \ -+ ~(~0ULL << (end - offset)); \ -+} \ -+ \ -+static inline void SET_##name(type *k, __u64 v) \ -+{ \ -+ __u##_bits new = __le##_bits##_to_cpu(k->field); \ -+ \ -+ new &= ~(~(~0ULL << (end - offset)) << offset); \ -+ new |= (v & ~(~0ULL << (end - offset))) << offset; \ -+ k->field = __cpu_to_le##_bits(new); \ -+} -+ -+#define LE16_BITMASK(n, t, f, o, e) LE_BITMASK(16, n, t, f, o, e) -+#define LE32_BITMASK(n, t, f, o, e) LE_BITMASK(32, n, t, f, o, e) -+#define LE64_BITMASK(n, t, f, o, e) LE_BITMASK(64, n, t, f, o, e) -+ -+struct bkey_format { -+ __u8 key_u64s; -+ __u8 nr_fields; -+ /* One unused slot for now: */ -+ __u8 bits_per_field[6]; -+ __le64 field_offset[6]; -+}; -+ -+/* Btree keys - all units are in sectors */ -+ -+struct bpos { -+ /* -+ * Word order matches machine byte order - btree code treats a bpos as a -+ * single large integer, for search/comparison purposes -+ * -+ * Note that wherever a bpos is embedded in another on disk data -+ * structure, it has to be byte swabbed when reading in metadata that -+ * wasn't written in native endian order: -+ */ -+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -+ __u32 snapshot; -+ __u64 offset; -+ __u64 inode; -+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -+ __u64 inode; -+ __u64 offset; /* Points to end of extent - sectors */ -+ __u32 snapshot; -+#else -+#error edit for your odd byteorder. -+#endif -+} __packed __aligned(4); -+ -+#define KEY_INODE_MAX ((__u64)~0ULL) -+#define KEY_OFFSET_MAX ((__u64)~0ULL) -+#define KEY_SNAPSHOT_MAX ((__u32)~0U) -+#define KEY_SIZE_MAX ((__u32)~0U) -+ -+static inline struct bpos SPOS(__u64 inode, __u64 offset, __u32 snapshot) -+{ -+ return (struct bpos) { -+ .inode = inode, -+ .offset = offset, -+ .snapshot = snapshot, -+ }; -+} -+ -+#define POS_MIN SPOS(0, 0, 0) -+#define POS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, 0) -+#define SPOS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, KEY_SNAPSHOT_MAX) -+#define POS(_inode, _offset) SPOS(_inode, _offset, 0) -+ -+/* Empty placeholder struct, for container_of() */ -+struct bch_val { -+ __u64 __nothing[0]; -+}; -+ -+struct bversion { -+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -+ __u64 lo; -+ __u32 hi; -+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -+ __u32 hi; -+ __u64 lo; -+#endif -+} __packed __aligned(4); -+ -+struct bkey { -+ /* Size of combined key and value, in u64s */ -+ __u8 u64s; -+ -+ /* Format of key (0 for format local to btree node) */ -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u8 format:7, -+ needs_whiteout:1; -+#elif defined (__BIG_ENDIAN_BITFIELD) -+ __u8 needs_whiteout:1, -+ format:7; -+#else -+#error edit for your odd byteorder. -+#endif -+ -+ /* Type of the value */ -+ __u8 type; -+ -+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -+ __u8 pad[1]; -+ -+ struct bversion version; -+ __u32 size; /* extent size, in sectors */ -+ struct bpos p; -+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -+ struct bpos p; -+ __u32 size; /* extent size, in sectors */ -+ struct bversion version; -+ -+ __u8 pad[1]; -+#endif -+} __packed __aligned(8); -+ -+struct bkey_packed { -+ __u64 _data[0]; -+ -+ /* Size of combined key and value, in u64s */ -+ __u8 u64s; -+ -+ /* Format of key (0 for format local to btree node) */ -+ -+ /* -+ * XXX: next incompat on disk format change, switch format and -+ * needs_whiteout - bkey_packed() will be cheaper if format is the high -+ * bits of the bitfield -+ */ -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u8 format:7, -+ needs_whiteout:1; -+#elif defined (__BIG_ENDIAN_BITFIELD) -+ __u8 needs_whiteout:1, -+ format:7; -+#endif -+ -+ /* Type of the value */ -+ __u8 type; -+ __u8 key_start[0]; -+ -+ /* -+ * We copy bkeys with struct assignment in various places, and while -+ * that shouldn't be done with packed bkeys we can't disallow it in C, -+ * and it's legal to cast a bkey to a bkey_packed - so padding it out -+ * to the same size as struct bkey should hopefully be safest. -+ */ -+ __u8 pad[sizeof(struct bkey) - 3]; -+} __packed __aligned(8); -+ -+typedef struct { -+ __le64 lo; -+ __le64 hi; -+} bch_le128; -+ -+#define BKEY_U64s (sizeof(struct bkey) / sizeof(__u64)) -+#define BKEY_U64s_MAX U8_MAX -+#define BKEY_VAL_U64s_MAX (BKEY_U64s_MAX - BKEY_U64s) -+ -+#define KEY_PACKED_BITS_START 24 -+ -+#define KEY_FORMAT_LOCAL_BTREE 0 -+#define KEY_FORMAT_CURRENT 1 -+ -+enum bch_bkey_fields { -+ BKEY_FIELD_INODE, -+ BKEY_FIELD_OFFSET, -+ BKEY_FIELD_SNAPSHOT, -+ BKEY_FIELD_SIZE, -+ BKEY_FIELD_VERSION_HI, -+ BKEY_FIELD_VERSION_LO, -+ BKEY_NR_FIELDS, -+}; -+ -+#define bkey_format_field(name, field) \ -+ [BKEY_FIELD_##name] = (sizeof(((struct bkey *) NULL)->field) * 8) -+ -+#define BKEY_FORMAT_CURRENT \ -+((struct bkey_format) { \ -+ .key_u64s = BKEY_U64s, \ -+ .nr_fields = BKEY_NR_FIELDS, \ -+ .bits_per_field = { \ -+ bkey_format_field(INODE, p.inode), \ -+ bkey_format_field(OFFSET, p.offset), \ -+ bkey_format_field(SNAPSHOT, p.snapshot), \ -+ bkey_format_field(SIZE, size), \ -+ bkey_format_field(VERSION_HI, version.hi), \ -+ bkey_format_field(VERSION_LO, version.lo), \ -+ }, \ -+}) -+ -+/* bkey with inline value */ -+struct bkey_i { -+ __u64 _data[0]; -+ -+ struct bkey k; -+ struct bch_val v; -+}; -+ -+#define KEY(_inode, _offset, _size) \ -+((struct bkey) { \ -+ .u64s = BKEY_U64s, \ -+ .format = KEY_FORMAT_CURRENT, \ -+ .p = POS(_inode, _offset), \ -+ .size = _size, \ -+}) -+ -+static inline void bkey_init(struct bkey *k) -+{ -+ *k = KEY(0, 0, 0); -+} -+ -+#define bkey_bytes(_k) ((_k)->u64s * sizeof(__u64)) -+ -+#define __BKEY_PADDED(key, pad) \ -+ struct bkey_i key; __u64 key ## _pad[pad] -+ -+/* -+ * - DELETED keys are used internally to mark keys that should be ignored but -+ * override keys in composition order. Their version number is ignored. -+ * -+ * - DISCARDED keys indicate that the data is all 0s because it has been -+ * discarded. DISCARDs may have a version; if the version is nonzero the key -+ * will be persistent, otherwise the key will be dropped whenever the btree -+ * node is rewritten (like DELETED keys). -+ * -+ * - ERROR: any read of the data returns a read error, as the data was lost due -+ * to a failing device. Like DISCARDED keys, they can be removed (overridden) -+ * by new writes or cluster-wide GC. Node repair can also overwrite them with -+ * the same or a more recent version number, but not with an older version -+ * number. -+ * -+ * - WHITEOUT: for hash table btrees -+ */ -+#define BCH_BKEY_TYPES() \ -+ x(deleted, 0) \ -+ x(whiteout, 1) \ -+ x(error, 2) \ -+ x(cookie, 3) \ -+ x(hash_whiteout, 4) \ -+ x(btree_ptr, 5) \ -+ x(extent, 6) \ -+ x(reservation, 7) \ -+ x(inode, 8) \ -+ x(inode_generation, 9) \ -+ x(dirent, 10) \ -+ x(xattr, 11) \ -+ x(alloc, 12) \ -+ x(quota, 13) \ -+ x(stripe, 14) \ -+ x(reflink_p, 15) \ -+ x(reflink_v, 16) \ -+ x(inline_data, 17) \ -+ x(btree_ptr_v2, 18) \ -+ x(indirect_inline_data, 19) \ -+ x(alloc_v2, 20) \ -+ x(subvolume, 21) \ -+ x(snapshot, 22) \ -+ x(inode_v2, 23) \ -+ x(alloc_v3, 24) \ -+ x(set, 25) \ -+ x(lru, 26) \ -+ x(alloc_v4, 27) \ -+ x(backpointer, 28) \ -+ x(inode_v3, 29) \ -+ x(bucket_gens, 30) \ -+ x(snapshot_tree, 31) \ -+ x(logged_op_truncate, 32) \ -+ x(logged_op_finsert, 33) -+ -+enum bch_bkey_type { -+#define x(name, nr) KEY_TYPE_##name = nr, -+ BCH_BKEY_TYPES() -+#undef x -+ KEY_TYPE_MAX, -+}; -+ -+struct bch_deleted { -+ struct bch_val v; -+}; -+ -+struct bch_whiteout { -+ struct bch_val v; -+}; -+ -+struct bch_error { -+ struct bch_val v; -+}; -+ -+struct bch_cookie { -+ struct bch_val v; -+ __le64 cookie; -+}; -+ -+struct bch_hash_whiteout { -+ struct bch_val v; -+}; -+ -+struct bch_set { -+ struct bch_val v; -+}; -+ -+/* Extents */ -+ -+/* -+ * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally -+ * preceded by checksum/compression information (bch_extent_crc32 or -+ * bch_extent_crc64). -+ * -+ * One major determining factor in the format of extents is how we handle and -+ * represent extents that have been partially overwritten and thus trimmed: -+ * -+ * If an extent is not checksummed or compressed, when the extent is trimmed we -+ * don't have to remember the extent we originally allocated and wrote: we can -+ * merely adjust ptr->offset to point to the start of the data that is currently -+ * live. The size field in struct bkey records the current (live) size of the -+ * extent, and is also used to mean "size of region on disk that we point to" in -+ * this case. -+ * -+ * Thus an extent that is not checksummed or compressed will consist only of a -+ * list of bch_extent_ptrs, with none of the fields in -+ * bch_extent_crc32/bch_extent_crc64. -+ * -+ * When an extent is checksummed or compressed, it's not possible to read only -+ * the data that is currently live: we have to read the entire extent that was -+ * originally written, and then return only the part of the extent that is -+ * currently live. -+ * -+ * Thus, in addition to the current size of the extent in struct bkey, we need -+ * to store the size of the originally allocated space - this is the -+ * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also, -+ * when the extent is trimmed, instead of modifying the offset field of the -+ * pointer, we keep a second smaller offset field - "offset into the original -+ * extent of the currently live region". -+ * -+ * The other major determining factor is replication and data migration: -+ * -+ * Each pointer may have its own bch_extent_crc32/64. When doing a replicated -+ * write, we will initially write all the replicas in the same format, with the -+ * same checksum type and compression format - however, when copygc runs later (or -+ * tiering/cache promotion, anything that moves data), it is not in general -+ * going to rewrite all the pointers at once - one of the replicas may be in a -+ * bucket on one device that has very little fragmentation while another lives -+ * in a bucket that has become heavily fragmented, and thus is being rewritten -+ * sooner than the rest. -+ * -+ * Thus it will only move a subset of the pointers (or in the case of -+ * tiering/cache promotion perhaps add a single pointer without dropping any -+ * current pointers), and if the extent has been partially overwritten it must -+ * write only the currently live portion (or copygc would not be able to reduce -+ * fragmentation!) - which necessitates a different bch_extent_crc format for -+ * the new pointer. -+ * -+ * But in the interests of space efficiency, we don't want to store one -+ * bch_extent_crc for each pointer if we don't have to. -+ * -+ * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and -+ * bch_extent_ptrs appended arbitrarily one after the other. We determine the -+ * type of a given entry with a scheme similar to utf8 (except we're encoding a -+ * type, not a size), encoding the type in the position of the first set bit: -+ * -+ * bch_extent_crc32 - 0b1 -+ * bch_extent_ptr - 0b10 -+ * bch_extent_crc64 - 0b100 -+ * -+ * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and -+ * bch_extent_crc64 is the least constrained). -+ * -+ * Then, each bch_extent_crc32/64 applies to the pointers that follow after it, -+ * until the next bch_extent_crc32/64. -+ * -+ * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer -+ * is neither checksummed nor compressed. -+ */ -+ -+/* 128 bits, sufficient for cryptographic MACs: */ -+struct bch_csum { -+ __le64 lo; -+ __le64 hi; -+} __packed __aligned(8); -+ -+#define BCH_EXTENT_ENTRY_TYPES() \ -+ x(ptr, 0) \ -+ x(crc32, 1) \ -+ x(crc64, 2) \ -+ x(crc128, 3) \ -+ x(stripe_ptr, 4) \ -+ x(rebalance, 5) -+#define BCH_EXTENT_ENTRY_MAX 6 -+ -+enum bch_extent_entry_type { -+#define x(f, n) BCH_EXTENT_ENTRY_##f = n, -+ BCH_EXTENT_ENTRY_TYPES() -+#undef x -+}; -+ -+/* Compressed/uncompressed size are stored biased by 1: */ -+struct bch_extent_crc32 { -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u32 type:2, -+ _compressed_size:7, -+ _uncompressed_size:7, -+ offset:7, -+ _unused:1, -+ csum_type:4, -+ compression_type:4; -+ __u32 csum; -+#elif defined (__BIG_ENDIAN_BITFIELD) -+ __u32 csum; -+ __u32 compression_type:4, -+ csum_type:4, -+ _unused:1, -+ offset:7, -+ _uncompressed_size:7, -+ _compressed_size:7, -+ type:2; -+#endif -+} __packed __aligned(8); -+ -+#define CRC32_SIZE_MAX (1U << 7) -+#define CRC32_NONCE_MAX 0 -+ -+struct bch_extent_crc64 { -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u64 type:3, -+ _compressed_size:9, -+ _uncompressed_size:9, -+ offset:9, -+ nonce:10, -+ csum_type:4, -+ compression_type:4, -+ csum_hi:16; -+#elif defined (__BIG_ENDIAN_BITFIELD) -+ __u64 csum_hi:16, -+ compression_type:4, -+ csum_type:4, -+ nonce:10, -+ offset:9, -+ _uncompressed_size:9, -+ _compressed_size:9, -+ type:3; -+#endif -+ __u64 csum_lo; -+} __packed __aligned(8); -+ -+#define CRC64_SIZE_MAX (1U << 9) -+#define CRC64_NONCE_MAX ((1U << 10) - 1) -+ -+struct bch_extent_crc128 { -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u64 type:4, -+ _compressed_size:13, -+ _uncompressed_size:13, -+ offset:13, -+ nonce:13, -+ csum_type:4, -+ compression_type:4; -+#elif defined (__BIG_ENDIAN_BITFIELD) -+ __u64 compression_type:4, -+ csum_type:4, -+ nonce:13, -+ offset:13, -+ _uncompressed_size:13, -+ _compressed_size:13, -+ type:4; -+#endif -+ struct bch_csum csum; -+} __packed __aligned(8); -+ -+#define CRC128_SIZE_MAX (1U << 13) -+#define CRC128_NONCE_MAX ((1U << 13) - 1) -+ -+/* -+ * @reservation - pointer hasn't been written to, just reserved -+ */ -+struct bch_extent_ptr { -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u64 type:1, -+ cached:1, -+ unused:1, -+ unwritten:1, -+ offset:44, /* 8 petabytes */ -+ dev:8, -+ gen:8; -+#elif defined (__BIG_ENDIAN_BITFIELD) -+ __u64 gen:8, -+ dev:8, -+ offset:44, -+ unwritten:1, -+ unused:1, -+ cached:1, -+ type:1; -+#endif -+} __packed __aligned(8); -+ -+struct bch_extent_stripe_ptr { -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u64 type:5, -+ block:8, -+ redundancy:4, -+ idx:47; -+#elif defined (__BIG_ENDIAN_BITFIELD) -+ __u64 idx:47, -+ redundancy:4, -+ block:8, -+ type:5; -+#endif -+}; -+ -+struct bch_extent_rebalance { -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u64 type:6, -+ unused:34, -+ compression:8, /* enum bch_compression_opt */ -+ target:16; -+#elif defined (__BIG_ENDIAN_BITFIELD) -+ __u64 target:16, -+ compression:8, -+ unused:34, -+ type:6; -+#endif -+}; -+ -+union bch_extent_entry { -+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64 -+ unsigned long type; -+#elif __BITS_PER_LONG == 32 -+ struct { -+ unsigned long pad; -+ unsigned long type; -+ }; -+#else -+#error edit for your odd byteorder. -+#endif -+ -+#define x(f, n) struct bch_extent_##f f; -+ BCH_EXTENT_ENTRY_TYPES() -+#undef x -+}; -+ -+struct bch_btree_ptr { -+ struct bch_val v; -+ -+ __u64 _data[0]; -+ struct bch_extent_ptr start[]; -+} __packed __aligned(8); -+ -+struct bch_btree_ptr_v2 { -+ struct bch_val v; -+ -+ __u64 mem_ptr; -+ __le64 seq; -+ __le16 sectors_written; -+ __le16 flags; -+ struct bpos min_key; -+ __u64 _data[0]; -+ struct bch_extent_ptr start[]; -+} __packed __aligned(8); -+ -+LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1); -+ -+struct bch_extent { -+ struct bch_val v; -+ -+ __u64 _data[0]; -+ union bch_extent_entry start[]; -+} __packed __aligned(8); -+ -+struct bch_reservation { -+ struct bch_val v; -+ -+ __le32 generation; -+ __u8 nr_replicas; -+ __u8 pad[3]; -+} __packed __aligned(8); -+ -+/* Maximum size (in u64s) a single pointer could be: */ -+#define BKEY_EXTENT_PTR_U64s_MAX\ -+ ((sizeof(struct bch_extent_crc128) + \ -+ sizeof(struct bch_extent_ptr)) / sizeof(__u64)) -+ -+/* Maximum possible size of an entire extent value: */ -+#define BKEY_EXTENT_VAL_U64s_MAX \ -+ (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1)) -+ -+/* * Maximum possible size of an entire extent, key + value: */ -+#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX) -+ -+/* Btree pointers don't carry around checksums: */ -+#define BKEY_BTREE_PTR_VAL_U64s_MAX \ -+ ((sizeof(struct bch_btree_ptr_v2) + \ -+ sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64)) -+#define BKEY_BTREE_PTR_U64s_MAX \ -+ (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX) -+ -+/* Inodes */ -+ -+#define BLOCKDEV_INODE_MAX 4096 -+ -+#define BCACHEFS_ROOT_INO 4096 -+ -+struct bch_inode { -+ struct bch_val v; -+ -+ __le64 bi_hash_seed; -+ __le32 bi_flags; -+ __le16 bi_mode; -+ __u8 fields[]; -+} __packed __aligned(8); -+ -+struct bch_inode_v2 { -+ struct bch_val v; -+ -+ __le64 bi_journal_seq; -+ __le64 bi_hash_seed; -+ __le64 bi_flags; -+ __le16 bi_mode; -+ __u8 fields[]; -+} __packed __aligned(8); -+ -+struct bch_inode_v3 { -+ struct bch_val v; -+ -+ __le64 bi_journal_seq; -+ __le64 bi_hash_seed; -+ __le64 bi_flags; -+ __le64 bi_sectors; -+ __le64 bi_size; -+ __le64 bi_version; -+ __u8 fields[]; -+} __packed __aligned(8); -+ -+#define INODEv3_FIELDS_START_INITIAL 6 -+#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(__u64)) -+ -+struct bch_inode_generation { -+ struct bch_val v; -+ -+ __le32 bi_generation; -+ __le32 pad; -+} __packed __aligned(8); -+ -+/* -+ * bi_subvol and bi_parent_subvol are only set for subvolume roots: -+ */ -+ -+#define BCH_INODE_FIELDS_v2() \ -+ x(bi_atime, 96) \ -+ x(bi_ctime, 96) \ -+ x(bi_mtime, 96) \ -+ x(bi_otime, 96) \ -+ x(bi_size, 64) \ -+ x(bi_sectors, 64) \ -+ x(bi_uid, 32) \ -+ x(bi_gid, 32) \ -+ x(bi_nlink, 32) \ -+ x(bi_generation, 32) \ -+ x(bi_dev, 32) \ -+ x(bi_data_checksum, 8) \ -+ x(bi_compression, 8) \ -+ x(bi_project, 32) \ -+ x(bi_background_compression, 8) \ -+ x(bi_data_replicas, 8) \ -+ x(bi_promote_target, 16) \ -+ x(bi_foreground_target, 16) \ -+ x(bi_background_target, 16) \ -+ x(bi_erasure_code, 16) \ -+ x(bi_fields_set, 16) \ -+ x(bi_dir, 64) \ -+ x(bi_dir_offset, 64) \ -+ x(bi_subvol, 32) \ -+ x(bi_parent_subvol, 32) -+ -+#define BCH_INODE_FIELDS_v3() \ -+ x(bi_atime, 96) \ -+ x(bi_ctime, 96) \ -+ x(bi_mtime, 96) \ -+ x(bi_otime, 96) \ -+ x(bi_uid, 32) \ -+ x(bi_gid, 32) \ -+ x(bi_nlink, 32) \ -+ x(bi_generation, 32) \ -+ x(bi_dev, 32) \ -+ x(bi_data_checksum, 8) \ -+ x(bi_compression, 8) \ -+ x(bi_project, 32) \ -+ x(bi_background_compression, 8) \ -+ x(bi_data_replicas, 8) \ -+ x(bi_promote_target, 16) \ -+ x(bi_foreground_target, 16) \ -+ x(bi_background_target, 16) \ -+ x(bi_erasure_code, 16) \ -+ x(bi_fields_set, 16) \ -+ x(bi_dir, 64) \ -+ x(bi_dir_offset, 64) \ -+ x(bi_subvol, 32) \ -+ x(bi_parent_subvol, 32) \ -+ x(bi_nocow, 8) -+ -+/* subset of BCH_INODE_FIELDS */ -+#define BCH_INODE_OPTS() \ -+ x(data_checksum, 8) \ -+ x(compression, 8) \ -+ x(project, 32) \ -+ x(background_compression, 8) \ -+ x(data_replicas, 8) \ -+ x(promote_target, 16) \ -+ x(foreground_target, 16) \ -+ x(background_target, 16) \ -+ x(erasure_code, 16) \ -+ x(nocow, 8) -+ -+enum inode_opt_id { -+#define x(name, ...) \ -+ Inode_opt_##name, -+ BCH_INODE_OPTS() -+#undef x -+ Inode_opt_nr, -+}; -+ -+#define BCH_INODE_FLAGS() \ -+ x(sync, 0) \ -+ x(immutable, 1) \ -+ x(append, 2) \ -+ x(nodump, 3) \ -+ x(noatime, 4) \ -+ x(i_size_dirty, 5) \ -+ x(i_sectors_dirty, 6) \ -+ x(unlinked, 7) \ -+ x(backptr_untrusted, 8) -+ -+/* bits 20+ reserved for packed fields below: */ -+ -+enum bch_inode_flags { -+#define x(t, n) BCH_INODE_##t = 1U << n, -+ BCH_INODE_FLAGS() -+#undef x -+}; -+ -+enum __bch_inode_flags { -+#define x(t, n) __BCH_INODE_##t = n, -+ BCH_INODE_FLAGS() -+#undef x -+}; -+ -+LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); -+LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31); -+LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32); -+ -+LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24); -+LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31); -+ -+LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24); -+LE64_BITMASK(INODEv3_NR_FIELDS, struct bch_inode_v3, bi_flags, 24, 31); -+ -+LE64_BITMASK(INODEv3_FIELDS_START, -+ struct bch_inode_v3, bi_flags, 31, 36); -+LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52); -+ -+/* Dirents */ -+ -+/* -+ * Dirents (and xattrs) have to implement string lookups; since our b-tree -+ * doesn't support arbitrary length strings for the key, we instead index by a -+ * 64 bit hash (currently truncated sha1) of the string, stored in the offset -+ * field of the key - using linear probing to resolve hash collisions. This also -+ * provides us with the readdir cookie posix requires. -+ * -+ * Linear probing requires us to use whiteouts for deletions, in the event of a -+ * collision: -+ */ -+ -+struct bch_dirent { -+ struct bch_val v; -+ -+ /* Target inode number: */ -+ union { -+ __le64 d_inum; -+ struct { /* DT_SUBVOL */ -+ __le32 d_child_subvol; -+ __le32 d_parent_subvol; -+ }; -+ }; -+ -+ /* -+ * Copy of mode bits 12-15 from the target inode - so userspace can get -+ * the filetype without having to do a stat() -+ */ -+ __u8 d_type; -+ -+ __u8 d_name[]; -+} __packed __aligned(8); -+ -+#define DT_SUBVOL 16 -+#define BCH_DT_MAX 17 -+ -+#define BCH_NAME_MAX 512 -+ -+/* Xattrs */ -+ -+#define KEY_TYPE_XATTR_INDEX_USER 0 -+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS 1 -+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2 -+#define KEY_TYPE_XATTR_INDEX_TRUSTED 3 -+#define KEY_TYPE_XATTR_INDEX_SECURITY 4 -+ -+struct bch_xattr { -+ struct bch_val v; -+ __u8 x_type; -+ __u8 x_name_len; -+ __le16 x_val_len; -+ __u8 x_name[]; -+} __packed __aligned(8); -+ -+/* Bucket/allocation information: */ -+ -+struct bch_alloc { -+ struct bch_val v; -+ __u8 fields; -+ __u8 gen; -+ __u8 data[]; -+} __packed __aligned(8); -+ -+#define BCH_ALLOC_FIELDS_V1() \ -+ x(read_time, 16) \ -+ x(write_time, 16) \ -+ x(data_type, 8) \ -+ x(dirty_sectors, 16) \ -+ x(cached_sectors, 16) \ -+ x(oldest_gen, 8) \ -+ x(stripe, 32) \ -+ x(stripe_redundancy, 8) -+ -+enum { -+#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name, -+ BCH_ALLOC_FIELDS_V1() -+#undef x -+}; -+ -+struct bch_alloc_v2 { -+ struct bch_val v; -+ __u8 nr_fields; -+ __u8 gen; -+ __u8 oldest_gen; -+ __u8 data_type; -+ __u8 data[]; -+} __packed __aligned(8); -+ -+#define BCH_ALLOC_FIELDS_V2() \ -+ x(read_time, 64) \ -+ x(write_time, 64) \ -+ x(dirty_sectors, 32) \ -+ x(cached_sectors, 32) \ -+ x(stripe, 32) \ -+ x(stripe_redundancy, 8) -+ -+struct bch_alloc_v3 { -+ struct bch_val v; -+ __le64 journal_seq; -+ __le32 flags; -+ __u8 nr_fields; -+ __u8 gen; -+ __u8 oldest_gen; -+ __u8 data_type; -+ __u8 data[]; -+} __packed __aligned(8); -+ -+LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1) -+LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2) -+ -+struct bch_alloc_v4 { -+ struct bch_val v; -+ __u64 journal_seq; -+ __u32 flags; -+ __u8 gen; -+ __u8 oldest_gen; -+ __u8 data_type; -+ __u8 stripe_redundancy; -+ __u32 dirty_sectors; -+ __u32 cached_sectors; -+ __u64 io_time[2]; -+ __u32 stripe; -+ __u32 nr_external_backpointers; -+ __u64 fragmentation_lru; -+} __packed __aligned(8); -+ -+#define BCH_ALLOC_V4_U64s_V0 6 -+#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64)) -+ -+BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1) -+BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2) -+BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8) -+BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14) -+ -+#define BCH_ALLOC_V4_NR_BACKPOINTERS_MAX 40 -+ -+struct bch_backpointer { -+ struct bch_val v; -+ __u8 btree_id; -+ __u8 level; -+ __u8 data_type; -+ __u64 bucket_offset:40; -+ __u32 bucket_len; -+ struct bpos pos; -+} __packed __aligned(8); -+ -+#define KEY_TYPE_BUCKET_GENS_BITS 8 -+#define KEY_TYPE_BUCKET_GENS_NR (1U << KEY_TYPE_BUCKET_GENS_BITS) -+#define KEY_TYPE_BUCKET_GENS_MASK (KEY_TYPE_BUCKET_GENS_NR - 1) -+ -+struct bch_bucket_gens { -+ struct bch_val v; -+ u8 gens[KEY_TYPE_BUCKET_GENS_NR]; -+} __packed __aligned(8); -+ -+/* Quotas: */ -+ -+enum quota_types { -+ QTYP_USR = 0, -+ QTYP_GRP = 1, -+ QTYP_PRJ = 2, -+ QTYP_NR = 3, -+}; -+ -+enum quota_counters { -+ Q_SPC = 0, -+ Q_INO = 1, -+ Q_COUNTERS = 2, -+}; -+ -+struct bch_quota_counter { -+ __le64 hardlimit; -+ __le64 softlimit; -+}; -+ -+struct bch_quota { -+ struct bch_val v; -+ struct bch_quota_counter c[Q_COUNTERS]; -+} __packed __aligned(8); -+ -+/* Erasure coding */ -+ -+struct bch_stripe { -+ struct bch_val v; -+ __le16 sectors; -+ __u8 algorithm; -+ __u8 nr_blocks; -+ __u8 nr_redundant; -+ -+ __u8 csum_granularity_bits; -+ __u8 csum_type; -+ __u8 pad; -+ -+ struct bch_extent_ptr ptrs[]; -+} __packed __aligned(8); -+ -+/* Reflink: */ -+ -+struct bch_reflink_p { -+ struct bch_val v; -+ __le64 idx; -+ /* -+ * A reflink pointer might point to an indirect extent which is then -+ * later split (by copygc or rebalance). If we only pointed to part of -+ * the original indirect extent, and then one of the fragments is -+ * outside the range we point to, we'd leak a refcount: so when creating -+ * reflink pointers, we need to store pad values to remember the full -+ * range we were taking a reference on. -+ */ -+ __le32 front_pad; -+ __le32 back_pad; -+} __packed __aligned(8); -+ -+struct bch_reflink_v { -+ struct bch_val v; -+ __le64 refcount; -+ union bch_extent_entry start[0]; -+ __u64 _data[]; -+} __packed __aligned(8); -+ -+struct bch_indirect_inline_data { -+ struct bch_val v; -+ __le64 refcount; -+ u8 data[]; -+}; -+ -+/* Inline data */ -+ -+struct bch_inline_data { -+ struct bch_val v; -+ u8 data[]; -+}; -+ -+/* Subvolumes: */ -+ -+#define SUBVOL_POS_MIN POS(0, 1) -+#define SUBVOL_POS_MAX POS(0, S32_MAX) -+#define BCACHEFS_ROOT_SUBVOL 1 -+ -+struct bch_subvolume { -+ struct bch_val v; -+ __le32 flags; -+ __le32 snapshot; -+ __le64 inode; -+ /* -+ * Snapshot subvolumes form a tree, separate from the snapshot nodes -+ * tree - if this subvolume is a snapshot, this is the ID of the -+ * subvolume it was created from: -+ */ -+ __le32 parent; -+ __le32 pad; -+ bch_le128 otime; -+}; -+ -+LE32_BITMASK(BCH_SUBVOLUME_RO, struct bch_subvolume, flags, 0, 1) -+/* -+ * We need to know whether a subvolume is a snapshot so we can know whether we -+ * can delete it (or whether it should just be rm -rf'd) -+ */ -+LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2) -+LE32_BITMASK(BCH_SUBVOLUME_UNLINKED, struct bch_subvolume, flags, 2, 3) -+ -+/* Snapshots */ -+ -+struct bch_snapshot { -+ struct bch_val v; -+ __le32 flags; -+ __le32 parent; -+ __le32 children[2]; -+ __le32 subvol; -+ /* corresponds to a bch_snapshot_tree in BTREE_ID_snapshot_trees */ -+ __le32 tree; -+ __le32 depth; -+ __le32 skip[3]; -+}; -+ -+LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1) -+ -+/* True if a subvolume points to this snapshot node: */ -+LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2) -+ -+/* -+ * Snapshot trees: -+ * -+ * The snapshot_trees btree gives us persistent indentifier for each tree of -+ * bch_snapshot nodes, and allow us to record and easily find the root/master -+ * subvolume that other snapshots were created from: -+ */ -+struct bch_snapshot_tree { -+ struct bch_val v; -+ __le32 master_subvol; -+ __le32 root_snapshot; -+}; -+ -+/* LRU btree: */ -+ -+struct bch_lru { -+ struct bch_val v; -+ __le64 idx; -+} __packed __aligned(8); -+ -+#define LRU_ID_STRIPES (1U << 16) -+ -+/* Logged operations btree: */ -+ -+struct bch_logged_op_truncate { -+ struct bch_val v; -+ __le32 subvol; -+ __le32 pad; -+ __le64 inum; -+ __le64 new_i_size; -+}; -+ -+enum logged_op_finsert_state { -+ LOGGED_OP_FINSERT_start, -+ LOGGED_OP_FINSERT_shift_extents, -+ LOGGED_OP_FINSERT_finish, -+}; -+ -+struct bch_logged_op_finsert { -+ struct bch_val v; -+ __u8 state; -+ __u8 pad[3]; -+ __le32 subvol; -+ __le64 inum; -+ __le64 dst_offset; -+ __le64 src_offset; -+ __le64 pos; -+}; -+ -+/* Optional/variable size superblock sections: */ -+ -+struct bch_sb_field { -+ __u64 _data[0]; -+ __le32 u64s; -+ __le32 type; -+}; -+ -+#define BCH_SB_FIELDS() \ -+ x(journal, 0) \ -+ x(members_v1, 1) \ -+ x(crypt, 2) \ -+ x(replicas_v0, 3) \ -+ x(quota, 4) \ -+ x(disk_groups, 5) \ -+ x(clean, 6) \ -+ x(replicas, 7) \ -+ x(journal_seq_blacklist, 8) \ -+ x(journal_v2, 9) \ -+ x(counters, 10) \ -+ x(members_v2, 11) \ -+ x(errors, 12) -+ -+enum bch_sb_field_type { -+#define x(f, nr) BCH_SB_FIELD_##f = nr, -+ BCH_SB_FIELDS() -+#undef x -+ BCH_SB_FIELD_NR -+}; -+ -+/* -+ * Most superblock fields are replicated in all device's superblocks - a few are -+ * not: -+ */ -+#define BCH_SINGLE_DEVICE_SB_FIELDS \ -+ ((1U << BCH_SB_FIELD_journal)| \ -+ (1U << BCH_SB_FIELD_journal_v2)) -+ -+/* BCH_SB_FIELD_journal: */ -+ -+struct bch_sb_field_journal { -+ struct bch_sb_field field; -+ __le64 buckets[]; -+}; -+ -+struct bch_sb_field_journal_v2 { -+ struct bch_sb_field field; -+ -+ struct bch_sb_field_journal_v2_entry { -+ __le64 start; -+ __le64 nr; -+ } d[]; -+}; -+ -+/* BCH_SB_FIELD_members_v1: */ -+ -+#define BCH_MIN_NR_NBUCKETS (1 << 6) -+ -+#define BCH_IOPS_MEASUREMENTS() \ -+ x(seqread, 0) \ -+ x(seqwrite, 1) \ -+ x(randread, 2) \ -+ x(randwrite, 3) -+ -+enum bch_iops_measurement { -+#define x(t, n) BCH_IOPS_##t = n, -+ BCH_IOPS_MEASUREMENTS() -+#undef x -+ BCH_IOPS_NR -+}; -+ -+#define BCH_MEMBER_ERROR_TYPES() \ -+ x(read, 0) \ -+ x(write, 1) \ -+ x(checksum, 2) -+ -+enum bch_member_error_type { -+#define x(t, n) BCH_MEMBER_ERROR_##t = n, -+ BCH_MEMBER_ERROR_TYPES() -+#undef x -+ BCH_MEMBER_ERROR_NR -+}; -+ -+struct bch_member { -+ __uuid_t uuid; -+ __le64 nbuckets; /* device size */ -+ __le16 first_bucket; /* index of first bucket used */ -+ __le16 bucket_size; /* sectors */ -+ __le32 pad; -+ __le64 last_mount; /* time_t */ -+ -+ __le64 flags; -+ __le32 iops[4]; -+ __le64 errors[BCH_MEMBER_ERROR_NR]; -+ __le64 errors_at_reset[BCH_MEMBER_ERROR_NR]; -+ __le64 errors_reset_time; -+}; -+ -+#define BCH_MEMBER_V1_BYTES 56 -+ -+LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags, 0, 4) -+/* 4-14 unused, was TIER, HAS_(META)DATA, REPLACEMENT */ -+LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags, 14, 15) -+LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED, struct bch_member, flags, 15, 20) -+LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags, 20, 28) -+LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags, 28, 30) -+LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED, -+ struct bch_member, flags, 30, 31) -+ -+#if 0 -+LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20); -+LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40); -+#endif -+ -+#define BCH_MEMBER_STATES() \ -+ x(rw, 0) \ -+ x(ro, 1) \ -+ x(failed, 2) \ -+ x(spare, 3) -+ -+enum bch_member_state { -+#define x(t, n) BCH_MEMBER_STATE_##t = n, -+ BCH_MEMBER_STATES() -+#undef x -+ BCH_MEMBER_STATE_NR -+}; -+ -+struct bch_sb_field_members_v1 { -+ struct bch_sb_field field; -+ struct bch_member _members[]; //Members are now variable size -+}; -+ -+struct bch_sb_field_members_v2 { -+ struct bch_sb_field field; -+ __le16 member_bytes; //size of single member entry -+ u8 pad[6]; -+ struct bch_member _members[]; -+}; -+ -+/* BCH_SB_FIELD_crypt: */ -+ -+struct nonce { -+ __le32 d[4]; -+}; -+ -+struct bch_key { -+ __le64 key[4]; -+}; -+ -+#define BCH_KEY_MAGIC \ -+ (((__u64) 'b' << 0)|((__u64) 'c' << 8)| \ -+ ((__u64) 'h' << 16)|((__u64) '*' << 24)| \ -+ ((__u64) '*' << 32)|((__u64) 'k' << 40)| \ -+ ((__u64) 'e' << 48)|((__u64) 'y' << 56)) -+ -+struct bch_encrypted_key { -+ __le64 magic; -+ struct bch_key key; -+}; -+ -+/* -+ * If this field is present in the superblock, it stores an encryption key which -+ * is used encrypt all other data/metadata. The key will normally be encrypted -+ * with the key userspace provides, but if encryption has been turned off we'll -+ * just store the master key unencrypted in the superblock so we can access the -+ * previously encrypted data. -+ */ -+struct bch_sb_field_crypt { -+ struct bch_sb_field field; -+ -+ __le64 flags; -+ __le64 kdf_flags; -+ struct bch_encrypted_key key; -+}; -+ -+LE64_BITMASK(BCH_CRYPT_KDF_TYPE, struct bch_sb_field_crypt, flags, 0, 4); -+ -+enum bch_kdf_types { -+ BCH_KDF_SCRYPT = 0, -+ BCH_KDF_NR = 1, -+}; -+ -+/* stored as base 2 log of scrypt params: */ -+LE64_BITMASK(BCH_KDF_SCRYPT_N, struct bch_sb_field_crypt, kdf_flags, 0, 16); -+LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32); -+LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48); -+ -+/* BCH_SB_FIELD_replicas: */ -+ -+#define BCH_DATA_TYPES() \ -+ x(free, 0) \ -+ x(sb, 1) \ -+ x(journal, 2) \ -+ x(btree, 3) \ -+ x(user, 4) \ -+ x(cached, 5) \ -+ x(parity, 6) \ -+ x(stripe, 7) \ -+ x(need_gc_gens, 8) \ -+ x(need_discard, 9) -+ -+enum bch_data_type { -+#define x(t, n) BCH_DATA_##t, -+ BCH_DATA_TYPES() -+#undef x -+ BCH_DATA_NR -+}; -+ -+static inline bool data_type_is_empty(enum bch_data_type type) -+{ -+ switch (type) { -+ case BCH_DATA_free: -+ case BCH_DATA_need_gc_gens: -+ case BCH_DATA_need_discard: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static inline bool data_type_is_hidden(enum bch_data_type type) -+{ -+ switch (type) { -+ case BCH_DATA_sb: -+ case BCH_DATA_journal: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+struct bch_replicas_entry_v0 { -+ __u8 data_type; -+ __u8 nr_devs; -+ __u8 devs[]; -+} __packed; -+ -+struct bch_sb_field_replicas_v0 { -+ struct bch_sb_field field; -+ struct bch_replicas_entry_v0 entries[]; -+} __packed __aligned(8); -+ -+struct bch_replicas_entry { -+ __u8 data_type; -+ __u8 nr_devs; -+ __u8 nr_required; -+ __u8 devs[]; -+} __packed; -+ -+#define replicas_entry_bytes(_i) \ -+ (offsetof(typeof(*(_i)), devs) + (_i)->nr_devs) -+ -+struct bch_sb_field_replicas { -+ struct bch_sb_field field; -+ struct bch_replicas_entry entries[]; -+} __packed __aligned(8); -+ -+/* BCH_SB_FIELD_quota: */ -+ -+struct bch_sb_quota_counter { -+ __le32 timelimit; -+ __le32 warnlimit; -+}; -+ -+struct bch_sb_quota_type { -+ __le64 flags; -+ struct bch_sb_quota_counter c[Q_COUNTERS]; -+}; -+ -+struct bch_sb_field_quota { -+ struct bch_sb_field field; -+ struct bch_sb_quota_type q[QTYP_NR]; -+} __packed __aligned(8); -+ -+/* BCH_SB_FIELD_disk_groups: */ -+ -+#define BCH_SB_LABEL_SIZE 32 -+ -+struct bch_disk_group { -+ __u8 label[BCH_SB_LABEL_SIZE]; -+ __le64 flags[2]; -+} __packed __aligned(8); -+ -+LE64_BITMASK(BCH_GROUP_DELETED, struct bch_disk_group, flags[0], 0, 1) -+LE64_BITMASK(BCH_GROUP_DATA_ALLOWED, struct bch_disk_group, flags[0], 1, 6) -+LE64_BITMASK(BCH_GROUP_PARENT, struct bch_disk_group, flags[0], 6, 24) -+ -+struct bch_sb_field_disk_groups { -+ struct bch_sb_field field; -+ struct bch_disk_group entries[]; -+} __packed __aligned(8); -+ -+/* BCH_SB_FIELD_counters */ -+ -+#define BCH_PERSISTENT_COUNTERS() \ -+ x(io_read, 0) \ -+ x(io_write, 1) \ -+ x(io_move, 2) \ -+ x(bucket_invalidate, 3) \ -+ x(bucket_discard, 4) \ -+ x(bucket_alloc, 5) \ -+ x(bucket_alloc_fail, 6) \ -+ x(btree_cache_scan, 7) \ -+ x(btree_cache_reap, 8) \ -+ x(btree_cache_cannibalize, 9) \ -+ x(btree_cache_cannibalize_lock, 10) \ -+ x(btree_cache_cannibalize_lock_fail, 11) \ -+ x(btree_cache_cannibalize_unlock, 12) \ -+ x(btree_node_write, 13) \ -+ x(btree_node_read, 14) \ -+ x(btree_node_compact, 15) \ -+ x(btree_node_merge, 16) \ -+ x(btree_node_split, 17) \ -+ x(btree_node_rewrite, 18) \ -+ x(btree_node_alloc, 19) \ -+ x(btree_node_free, 20) \ -+ x(btree_node_set_root, 21) \ -+ x(btree_path_relock_fail, 22) \ -+ x(btree_path_upgrade_fail, 23) \ -+ x(btree_reserve_get_fail, 24) \ -+ x(journal_entry_full, 25) \ -+ x(journal_full, 26) \ -+ x(journal_reclaim_finish, 27) \ -+ x(journal_reclaim_start, 28) \ -+ x(journal_write, 29) \ -+ x(read_promote, 30) \ -+ x(read_bounce, 31) \ -+ x(read_split, 33) \ -+ x(read_retry, 32) \ -+ x(read_reuse_race, 34) \ -+ x(move_extent_read, 35) \ -+ x(move_extent_write, 36) \ -+ x(move_extent_finish, 37) \ -+ x(move_extent_fail, 38) \ -+ x(move_extent_alloc_mem_fail, 39) \ -+ x(copygc, 40) \ -+ x(copygc_wait, 41) \ -+ x(gc_gens_end, 42) \ -+ x(gc_gens_start, 43) \ -+ x(trans_blocked_journal_reclaim, 44) \ -+ x(trans_restart_btree_node_reused, 45) \ -+ x(trans_restart_btree_node_split, 46) \ -+ x(trans_restart_fault_inject, 47) \ -+ x(trans_restart_iter_upgrade, 48) \ -+ x(trans_restart_journal_preres_get, 49) \ -+ x(trans_restart_journal_reclaim, 50) \ -+ x(trans_restart_journal_res_get, 51) \ -+ x(trans_restart_key_cache_key_realloced, 52) \ -+ x(trans_restart_key_cache_raced, 53) \ -+ x(trans_restart_mark_replicas, 54) \ -+ x(trans_restart_mem_realloced, 55) \ -+ x(trans_restart_memory_allocation_failure, 56) \ -+ x(trans_restart_relock, 57) \ -+ x(trans_restart_relock_after_fill, 58) \ -+ x(trans_restart_relock_key_cache_fill, 59) \ -+ x(trans_restart_relock_next_node, 60) \ -+ x(trans_restart_relock_parent_for_fill, 61) \ -+ x(trans_restart_relock_path, 62) \ -+ x(trans_restart_relock_path_intent, 63) \ -+ x(trans_restart_too_many_iters, 64) \ -+ x(trans_restart_traverse, 65) \ -+ x(trans_restart_upgrade, 66) \ -+ x(trans_restart_would_deadlock, 67) \ -+ x(trans_restart_would_deadlock_write, 68) \ -+ x(trans_restart_injected, 69) \ -+ x(trans_restart_key_cache_upgrade, 70) \ -+ x(trans_traverse_all, 71) \ -+ x(transaction_commit, 72) \ -+ x(write_super, 73) \ -+ x(trans_restart_would_deadlock_recursion_limit, 74) \ -+ x(trans_restart_write_buffer_flush, 75) \ -+ x(trans_restart_split_race, 76) -+ -+enum bch_persistent_counters { -+#define x(t, n, ...) BCH_COUNTER_##t, -+ BCH_PERSISTENT_COUNTERS() -+#undef x -+ BCH_COUNTER_NR -+}; -+ -+struct bch_sb_field_counters { -+ struct bch_sb_field field; -+ __le64 d[]; -+}; -+ -+/* -+ * On clean shutdown, store btree roots and current journal sequence number in -+ * the superblock: -+ */ -+struct jset_entry { -+ __le16 u64s; -+ __u8 btree_id; -+ __u8 level; -+ __u8 type; /* designates what this jset holds */ -+ __u8 pad[3]; -+ -+ struct bkey_i start[0]; -+ __u64 _data[]; -+}; -+ -+struct bch_sb_field_clean { -+ struct bch_sb_field field; -+ -+ __le32 flags; -+ __le16 _read_clock; /* no longer used */ -+ __le16 _write_clock; -+ __le64 journal_seq; -+ -+ struct jset_entry start[0]; -+ __u64 _data[]; -+}; -+ -+struct journal_seq_blacklist_entry { -+ __le64 start; -+ __le64 end; -+}; -+ -+struct bch_sb_field_journal_seq_blacklist { -+ struct bch_sb_field field; -+ struct journal_seq_blacklist_entry start[]; -+}; -+ -+struct bch_sb_field_errors { -+ struct bch_sb_field field; -+ struct bch_sb_field_error_entry { -+ __le64 v; -+ __le64 last_error_time; -+ } entries[]; -+}; -+ -+LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16); -+LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64); -+ -+/* Superblock: */ -+ -+/* -+ * New versioning scheme: -+ * One common version number for all on disk data structures - superblock, btree -+ * nodes, journal entries -+ */ -+#define BCH_VERSION_MAJOR(_v) ((__u16) ((_v) >> 10)) -+#define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10))) -+#define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0) -+ -+#define RECOVERY_PASS_ALL_FSCK (1ULL << 63) -+ -+#define BCH_METADATA_VERSIONS() \ -+ x(bkey_renumber, BCH_VERSION(0, 10), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(inode_btree_change, BCH_VERSION(0, 11), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(snapshot, BCH_VERSION(0, 12), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(inode_backpointers, BCH_VERSION(0, 13), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(btree_ptr_sectors_written, BCH_VERSION(0, 14), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(snapshot_2, BCH_VERSION(0, 15), \ -+ BIT_ULL(BCH_RECOVERY_PASS_fs_upgrade_for_subvolumes)| \ -+ BIT_ULL(BCH_RECOVERY_PASS_initialize_subvolumes)| \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(reflink_p_fix, BCH_VERSION(0, 16), \ -+ BIT_ULL(BCH_RECOVERY_PASS_fix_reflink_p)) \ -+ x(subvol_dirent, BCH_VERSION(0, 17), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(inode_v2, BCH_VERSION(0, 18), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(freespace, BCH_VERSION(0, 19), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(alloc_v4, BCH_VERSION(0, 20), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(new_data_types, BCH_VERSION(0, 21), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(backpointers, BCH_VERSION(0, 22), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(inode_v3, BCH_VERSION(0, 23), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(unwritten_extents, BCH_VERSION(0, 24), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(bucket_gens, BCH_VERSION(0, 25), \ -+ BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(lru_v2, BCH_VERSION(0, 26), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(fragmentation_lru, BCH_VERSION(0, 27), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(no_bps_in_alloc_keys, BCH_VERSION(0, 28), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(snapshot_trees, BCH_VERSION(0, 29), \ -+ RECOVERY_PASS_ALL_FSCK) \ -+ x(major_minor, BCH_VERSION(1, 0), \ -+ 0) \ -+ x(snapshot_skiplists, BCH_VERSION(1, 1), \ -+ BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) \ -+ x(deleted_inodes, BCH_VERSION(1, 2), \ -+ BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) \ -+ x(rebalance_work, BCH_VERSION(1, 3), \ -+ BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) -+ -+enum bcachefs_metadata_version { -+ bcachefs_metadata_version_min = 9, -+#define x(t, n, upgrade_passes) bcachefs_metadata_version_##t = n, -+ BCH_METADATA_VERSIONS() -+#undef x -+ bcachefs_metadata_version_max -+}; -+ -+static const __maybe_unused -+unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_rebalance_work; -+ -+#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) -+ -+#define BCH_SB_SECTOR 8 -+#define BCH_SB_MEMBERS_MAX 64 /* XXX kill */ -+ -+struct bch_sb_layout { -+ __uuid_t magic; /* bcachefs superblock UUID */ -+ __u8 layout_type; -+ __u8 sb_max_size_bits; /* base 2 of 512 byte sectors */ -+ __u8 nr_superblocks; -+ __u8 pad[5]; -+ __le64 sb_offset[61]; -+} __packed __aligned(8); -+ -+#define BCH_SB_LAYOUT_SECTOR 7 -+ -+/* -+ * @offset - sector where this sb was written -+ * @version - on disk format version -+ * @version_min - Oldest metadata version this filesystem contains; so we can -+ * safely drop compatibility code and refuse to mount filesystems -+ * we'd need it for -+ * @magic - identifies as a bcachefs superblock (BCHFS_MAGIC) -+ * @seq - incremented each time superblock is written -+ * @uuid - used for generating various magic numbers and identifying -+ * member devices, never changes -+ * @user_uuid - user visible UUID, may be changed -+ * @label - filesystem label -+ * @seq - identifies most recent superblock, incremented each time -+ * superblock is written -+ * @features - enabled incompatible features -+ */ -+struct bch_sb { -+ struct bch_csum csum; -+ __le16 version; -+ __le16 version_min; -+ __le16 pad[2]; -+ __uuid_t magic; -+ __uuid_t uuid; -+ __uuid_t user_uuid; -+ __u8 label[BCH_SB_LABEL_SIZE]; -+ __le64 offset; -+ __le64 seq; -+ -+ __le16 block_size; -+ __u8 dev_idx; -+ __u8 nr_devices; -+ __le32 u64s; -+ -+ __le64 time_base_lo; -+ __le32 time_base_hi; -+ __le32 time_precision; -+ -+ __le64 flags[8]; -+ __le64 features[2]; -+ __le64 compat[2]; -+ -+ struct bch_sb_layout layout; -+ -+ struct bch_sb_field start[0]; -+ __le64 _data[]; -+} __packed __aligned(8); -+ -+/* -+ * Flags: -+ * BCH_SB_INITALIZED - set on first mount -+ * BCH_SB_CLEAN - did we shut down cleanly? Just a hint, doesn't affect -+ * behaviour of mount/recovery path: -+ * BCH_SB_INODE_32BIT - limit inode numbers to 32 bits -+ * BCH_SB_128_BIT_MACS - 128 bit macs instead of 80 -+ * BCH_SB_ENCRYPTION_TYPE - if nonzero encryption is enabled; overrides -+ * DATA/META_CSUM_TYPE. Also indicates encryption -+ * algorithm in use, if/when we get more than one -+ */ -+ -+LE16_BITMASK(BCH_SB_BLOCK_SIZE, struct bch_sb, block_size, 0, 16); -+ -+LE64_BITMASK(BCH_SB_INITIALIZED, struct bch_sb, flags[0], 0, 1); -+LE64_BITMASK(BCH_SB_CLEAN, struct bch_sb, flags[0], 1, 2); -+LE64_BITMASK(BCH_SB_CSUM_TYPE, struct bch_sb, flags[0], 2, 8); -+LE64_BITMASK(BCH_SB_ERROR_ACTION, struct bch_sb, flags[0], 8, 12); -+ -+LE64_BITMASK(BCH_SB_BTREE_NODE_SIZE, struct bch_sb, flags[0], 12, 28); -+ -+LE64_BITMASK(BCH_SB_GC_RESERVE, struct bch_sb, flags[0], 28, 33); -+LE64_BITMASK(BCH_SB_ROOT_RESERVE, struct bch_sb, flags[0], 33, 40); -+ -+LE64_BITMASK(BCH_SB_META_CSUM_TYPE, struct bch_sb, flags[0], 40, 44); -+LE64_BITMASK(BCH_SB_DATA_CSUM_TYPE, struct bch_sb, flags[0], 44, 48); -+ -+LE64_BITMASK(BCH_SB_META_REPLICAS_WANT, struct bch_sb, flags[0], 48, 52); -+LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT, struct bch_sb, flags[0], 52, 56); -+ -+LE64_BITMASK(BCH_SB_POSIX_ACL, struct bch_sb, flags[0], 56, 57); -+LE64_BITMASK(BCH_SB_USRQUOTA, struct bch_sb, flags[0], 57, 58); -+LE64_BITMASK(BCH_SB_GRPQUOTA, struct bch_sb, flags[0], 58, 59); -+LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60); -+ -+LE64_BITMASK(BCH_SB_HAS_ERRORS, struct bch_sb, flags[0], 60, 61); -+LE64_BITMASK(BCH_SB_HAS_TOPOLOGY_ERRORS,struct bch_sb, flags[0], 61, 62); -+ -+LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63); -+ -+LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4); -+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_LO,struct bch_sb, flags[1], 4, 8); -+LE64_BITMASK(BCH_SB_INODE_32BIT, struct bch_sb, flags[1], 8, 9); -+ -+LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10); -+LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE, struct bch_sb, flags[1], 10, 14); -+ -+/* -+ * Max size of an extent that may require bouncing to read or write -+ * (checksummed, compressed): 64k -+ */ -+LE64_BITMASK(BCH_SB_ENCODED_EXTENT_MAX_BITS, -+ struct bch_sb, flags[1], 14, 20); -+ -+LE64_BITMASK(BCH_SB_META_REPLICAS_REQ, struct bch_sb, flags[1], 20, 24); -+LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28); -+ -+LE64_BITMASK(BCH_SB_PROMOTE_TARGET, struct bch_sb, flags[1], 28, 40); -+LE64_BITMASK(BCH_SB_FOREGROUND_TARGET, struct bch_sb, flags[1], 40, 52); -+LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64); -+ -+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO, -+ struct bch_sb, flags[2], 0, 4); -+LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES, struct bch_sb, flags[2], 4, 64); -+ -+LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16); -+LE64_BITMASK(BCH_SB_METADATA_TARGET, struct bch_sb, flags[3], 16, 28); -+LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29); -+LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30); -+LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62); -+LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63); -+LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32); -+LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33); -+LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34); -+LE64_BITMASK(BCH_SB_WRITE_BUFFER_SIZE, struct bch_sb, flags[4], 34, 54); -+LE64_BITMASK(BCH_SB_VERSION_UPGRADE, struct bch_sb, flags[4], 54, 56); -+ -+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_HI,struct bch_sb, flags[4], 56, 60); -+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI, -+ struct bch_sb, flags[4], 60, 64); -+ -+LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, -+ struct bch_sb, flags[5], 0, 16); -+ -+static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) -+{ -+ return BCH_SB_COMPRESSION_TYPE_LO(sb) | (BCH_SB_COMPRESSION_TYPE_HI(sb) << 4); -+} -+ -+static inline void SET_BCH_SB_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v) -+{ -+ SET_BCH_SB_COMPRESSION_TYPE_LO(sb, v); -+ SET_BCH_SB_COMPRESSION_TYPE_HI(sb, v >> 4); -+} -+ -+static inline __u64 BCH_SB_BACKGROUND_COMPRESSION_TYPE(const struct bch_sb *sb) -+{ -+ return BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb) | -+ (BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb) << 4); -+} -+ -+static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v) -+{ -+ SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb, v); -+ SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb, v >> 4); -+} -+ -+/* -+ * Features: -+ * -+ * journal_seq_blacklist_v3: gates BCH_SB_FIELD_journal_seq_blacklist -+ * reflink: gates KEY_TYPE_reflink -+ * inline_data: gates KEY_TYPE_inline_data -+ * new_siphash: gates BCH_STR_HASH_siphash -+ * new_extent_overwrite: gates BTREE_NODE_NEW_EXTENT_OVERWRITE -+ */ -+#define BCH_SB_FEATURES() \ -+ x(lz4, 0) \ -+ x(gzip, 1) \ -+ x(zstd, 2) \ -+ x(atomic_nlink, 3) \ -+ x(ec, 4) \ -+ x(journal_seq_blacklist_v3, 5) \ -+ x(reflink, 6) \ -+ x(new_siphash, 7) \ -+ x(inline_data, 8) \ -+ x(new_extent_overwrite, 9) \ -+ x(incompressible, 10) \ -+ x(btree_ptr_v2, 11) \ -+ x(extents_above_btree_updates, 12) \ -+ x(btree_updates_journalled, 13) \ -+ x(reflink_inline_data, 14) \ -+ x(new_varint, 15) \ -+ x(journal_no_flush, 16) \ -+ x(alloc_v2, 17) \ -+ x(extents_across_btree_nodes, 18) -+ -+#define BCH_SB_FEATURES_ALWAYS \ -+ ((1ULL << BCH_FEATURE_new_extent_overwrite)| \ -+ (1ULL << BCH_FEATURE_extents_above_btree_updates)|\ -+ (1ULL << BCH_FEATURE_btree_updates_journalled)|\ -+ (1ULL << BCH_FEATURE_alloc_v2)|\ -+ (1ULL << BCH_FEATURE_extents_across_btree_nodes)) -+ -+#define BCH_SB_FEATURES_ALL \ -+ (BCH_SB_FEATURES_ALWAYS| \ -+ (1ULL << BCH_FEATURE_new_siphash)| \ -+ (1ULL << BCH_FEATURE_btree_ptr_v2)| \ -+ (1ULL << BCH_FEATURE_new_varint)| \ -+ (1ULL << BCH_FEATURE_journal_no_flush)) -+ -+enum bch_sb_feature { -+#define x(f, n) BCH_FEATURE_##f, -+ BCH_SB_FEATURES() -+#undef x -+ BCH_FEATURE_NR, -+}; -+ -+#define BCH_SB_COMPAT() \ -+ x(alloc_info, 0) \ -+ x(alloc_metadata, 1) \ -+ x(extents_above_btree_updates_done, 2) \ -+ x(bformat_overflow_done, 3) -+ -+enum bch_sb_compat { -+#define x(f, n) BCH_COMPAT_##f, -+ BCH_SB_COMPAT() -+#undef x -+ BCH_COMPAT_NR, -+}; -+ -+/* options: */ -+ -+#define BCH_VERSION_UPGRADE_OPTS() \ -+ x(compatible, 0) \ -+ x(incompatible, 1) \ -+ x(none, 2) -+ -+enum bch_version_upgrade_opts { -+#define x(t, n) BCH_VERSION_UPGRADE_##t = n, -+ BCH_VERSION_UPGRADE_OPTS() -+#undef x -+}; -+ -+#define BCH_REPLICAS_MAX 4U -+ -+#define BCH_BKEY_PTRS_MAX 16U -+ -+#define BCH_ERROR_ACTIONS() \ -+ x(continue, 0) \ -+ x(ro, 1) \ -+ x(panic, 2) -+ -+enum bch_error_actions { -+#define x(t, n) BCH_ON_ERROR_##t = n, -+ BCH_ERROR_ACTIONS() -+#undef x -+ BCH_ON_ERROR_NR -+}; -+ -+#define BCH_STR_HASH_TYPES() \ -+ x(crc32c, 0) \ -+ x(crc64, 1) \ -+ x(siphash_old, 2) \ -+ x(siphash, 3) -+ -+enum bch_str_hash_type { -+#define x(t, n) BCH_STR_HASH_##t = n, -+ BCH_STR_HASH_TYPES() -+#undef x -+ BCH_STR_HASH_NR -+}; -+ -+#define BCH_STR_HASH_OPTS() \ -+ x(crc32c, 0) \ -+ x(crc64, 1) \ -+ x(siphash, 2) -+ -+enum bch_str_hash_opts { -+#define x(t, n) BCH_STR_HASH_OPT_##t = n, -+ BCH_STR_HASH_OPTS() -+#undef x -+ BCH_STR_HASH_OPT_NR -+}; -+ -+#define BCH_CSUM_TYPES() \ -+ x(none, 0) \ -+ x(crc32c_nonzero, 1) \ -+ x(crc64_nonzero, 2) \ -+ x(chacha20_poly1305_80, 3) \ -+ x(chacha20_poly1305_128, 4) \ -+ x(crc32c, 5) \ -+ x(crc64, 6) \ -+ x(xxhash, 7) -+ -+enum bch_csum_type { -+#define x(t, n) BCH_CSUM_##t = n, -+ BCH_CSUM_TYPES() -+#undef x -+ BCH_CSUM_NR -+}; -+ -+static const __maybe_unused unsigned bch_crc_bytes[] = { -+ [BCH_CSUM_none] = 0, -+ [BCH_CSUM_crc32c_nonzero] = 4, -+ [BCH_CSUM_crc32c] = 4, -+ [BCH_CSUM_crc64_nonzero] = 8, -+ [BCH_CSUM_crc64] = 8, -+ [BCH_CSUM_xxhash] = 8, -+ [BCH_CSUM_chacha20_poly1305_80] = 10, -+ [BCH_CSUM_chacha20_poly1305_128] = 16, -+}; -+ -+static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type) -+{ -+ switch (type) { -+ case BCH_CSUM_chacha20_poly1305_80: -+ case BCH_CSUM_chacha20_poly1305_128: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+#define BCH_CSUM_OPTS() \ -+ x(none, 0) \ -+ x(crc32c, 1) \ -+ x(crc64, 2) \ -+ x(xxhash, 3) -+ -+enum bch_csum_opts { -+#define x(t, n) BCH_CSUM_OPT_##t = n, -+ BCH_CSUM_OPTS() -+#undef x -+ BCH_CSUM_OPT_NR -+}; -+ -+#define BCH_COMPRESSION_TYPES() \ -+ x(none, 0) \ -+ x(lz4_old, 1) \ -+ x(gzip, 2) \ -+ x(lz4, 3) \ -+ x(zstd, 4) \ -+ x(incompressible, 5) -+ -+enum bch_compression_type { -+#define x(t, n) BCH_COMPRESSION_TYPE_##t = n, -+ BCH_COMPRESSION_TYPES() -+#undef x -+ BCH_COMPRESSION_TYPE_NR -+}; -+ -+#define BCH_COMPRESSION_OPTS() \ -+ x(none, 0) \ -+ x(lz4, 1) \ -+ x(gzip, 2) \ -+ x(zstd, 3) -+ -+enum bch_compression_opts { -+#define x(t, n) BCH_COMPRESSION_OPT_##t = n, -+ BCH_COMPRESSION_OPTS() -+#undef x -+ BCH_COMPRESSION_OPT_NR -+}; -+ -+/* -+ * Magic numbers -+ * -+ * The various other data structures have their own magic numbers, which are -+ * xored with the first part of the cache set's UUID -+ */ -+ -+#define BCACHE_MAGIC \ -+ UUID_INIT(0xc68573f6, 0x4e1a, 0x45ca, \ -+ 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81) -+#define BCHFS_MAGIC \ -+ UUID_INIT(0xc68573f6, 0x66ce, 0x90a9, \ -+ 0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef) -+ -+#define BCACHEFS_STATFS_MAGIC 0xca451a4e -+ -+#define JSET_MAGIC __cpu_to_le64(0x245235c1a3625032ULL) -+#define BSET_MAGIC __cpu_to_le64(0x90135c78b99e07f5ULL) -+ -+static inline __le64 __bch2_sb_magic(struct bch_sb *sb) -+{ -+ __le64 ret; -+ -+ memcpy(&ret, &sb->uuid, sizeof(ret)); -+ return ret; -+} -+ -+static inline __u64 __jset_magic(struct bch_sb *sb) -+{ -+ return __le64_to_cpu(__bch2_sb_magic(sb) ^ JSET_MAGIC); -+} -+ -+static inline __u64 __bset_magic(struct bch_sb *sb) -+{ -+ return __le64_to_cpu(__bch2_sb_magic(sb) ^ BSET_MAGIC); -+} -+ -+/* Journal */ -+ -+#define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64)) -+ -+#define BCH_JSET_ENTRY_TYPES() \ -+ x(btree_keys, 0) \ -+ x(btree_root, 1) \ -+ x(prio_ptrs, 2) \ -+ x(blacklist, 3) \ -+ x(blacklist_v2, 4) \ -+ x(usage, 5) \ -+ x(data_usage, 6) \ -+ x(clock, 7) \ -+ x(dev_usage, 8) \ -+ x(log, 9) \ -+ x(overwrite, 10) -+ -+enum { -+#define x(f, nr) BCH_JSET_ENTRY_##f = nr, -+ BCH_JSET_ENTRY_TYPES() -+#undef x -+ BCH_JSET_ENTRY_NR -+}; -+ -+/* -+ * Journal sequence numbers can be blacklisted: bsets record the max sequence -+ * number of all the journal entries they contain updates for, so that on -+ * recovery we can ignore those bsets that contain index updates newer that what -+ * made it into the journal. -+ * -+ * This means that we can't reuse that journal_seq - we have to skip it, and -+ * then record that we skipped it so that the next time we crash and recover we -+ * don't think there was a missing journal entry. -+ */ -+struct jset_entry_blacklist { -+ struct jset_entry entry; -+ __le64 seq; -+}; -+ -+struct jset_entry_blacklist_v2 { -+ struct jset_entry entry; -+ __le64 start; -+ __le64 end; -+}; -+ -+#define BCH_FS_USAGE_TYPES() \ -+ x(reserved, 0) \ -+ x(inodes, 1) \ -+ x(key_version, 2) -+ -+enum { -+#define x(f, nr) BCH_FS_USAGE_##f = nr, -+ BCH_FS_USAGE_TYPES() -+#undef x -+ BCH_FS_USAGE_NR -+}; -+ -+struct jset_entry_usage { -+ struct jset_entry entry; -+ __le64 v; -+} __packed; -+ -+struct jset_entry_data_usage { -+ struct jset_entry entry; -+ __le64 v; -+ struct bch_replicas_entry r; -+} __packed; -+ -+struct jset_entry_clock { -+ struct jset_entry entry; -+ __u8 rw; -+ __u8 pad[7]; -+ __le64 time; -+} __packed; -+ -+struct jset_entry_dev_usage_type { -+ __le64 buckets; -+ __le64 sectors; -+ __le64 fragmented; -+} __packed; -+ -+struct jset_entry_dev_usage { -+ struct jset_entry entry; -+ __le32 dev; -+ __u32 pad; -+ -+ __le64 buckets_ec; -+ __le64 _buckets_unavailable; /* No longer used */ -+ -+ struct jset_entry_dev_usage_type d[]; -+}; -+ -+static inline unsigned jset_entry_dev_usage_nr_types(struct jset_entry_dev_usage *u) -+{ -+ return (vstruct_bytes(&u->entry) - sizeof(struct jset_entry_dev_usage)) / -+ sizeof(struct jset_entry_dev_usage_type); -+} -+ -+struct jset_entry_log { -+ struct jset_entry entry; -+ u8 d[]; -+} __packed; -+ -+/* -+ * On disk format for a journal entry: -+ * seq is monotonically increasing; every journal entry has its own unique -+ * sequence number. -+ * -+ * last_seq is the oldest journal entry that still has keys the btree hasn't -+ * flushed to disk yet. -+ * -+ * version is for on disk format changes. -+ */ -+struct jset { -+ struct bch_csum csum; -+ -+ __le64 magic; -+ __le64 seq; -+ __le32 version; -+ __le32 flags; -+ -+ __le32 u64s; /* size of d[] in u64s */ -+ -+ __u8 encrypted_start[0]; -+ -+ __le16 _read_clock; /* no longer used */ -+ __le16 _write_clock; -+ -+ /* Sequence number of oldest dirty journal entry */ -+ __le64 last_seq; -+ -+ -+ struct jset_entry start[0]; -+ __u64 _data[]; -+} __packed __aligned(8); -+ -+LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4); -+LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5); -+LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6); -+ -+#define BCH_JOURNAL_BUCKETS_MIN 8 -+ -+/* Btree: */ -+ -+enum btree_id_flags { -+ BTREE_ID_EXTENTS = BIT(0), -+ BTREE_ID_SNAPSHOTS = BIT(1), -+ BTREE_ID_SNAPSHOT_FIELD = BIT(2), -+ BTREE_ID_DATA = BIT(3), -+}; -+ -+#define BCH_BTREE_IDS() \ -+ x(extents, 0, BTREE_ID_EXTENTS|BTREE_ID_SNAPSHOTS|BTREE_ID_DATA,\ -+ BIT_ULL(KEY_TYPE_whiteout)| \ -+ BIT_ULL(KEY_TYPE_error)| \ -+ BIT_ULL(KEY_TYPE_cookie)| \ -+ BIT_ULL(KEY_TYPE_extent)| \ -+ BIT_ULL(KEY_TYPE_reservation)| \ -+ BIT_ULL(KEY_TYPE_reflink_p)| \ -+ BIT_ULL(KEY_TYPE_inline_data)) \ -+ x(inodes, 1, BTREE_ID_SNAPSHOTS, \ -+ BIT_ULL(KEY_TYPE_whiteout)| \ -+ BIT_ULL(KEY_TYPE_inode)| \ -+ BIT_ULL(KEY_TYPE_inode_v2)| \ -+ BIT_ULL(KEY_TYPE_inode_v3)| \ -+ BIT_ULL(KEY_TYPE_inode_generation)) \ -+ x(dirents, 2, BTREE_ID_SNAPSHOTS, \ -+ BIT_ULL(KEY_TYPE_whiteout)| \ -+ BIT_ULL(KEY_TYPE_hash_whiteout)| \ -+ BIT_ULL(KEY_TYPE_dirent)) \ -+ x(xattrs, 3, BTREE_ID_SNAPSHOTS, \ -+ BIT_ULL(KEY_TYPE_whiteout)| \ -+ BIT_ULL(KEY_TYPE_cookie)| \ -+ BIT_ULL(KEY_TYPE_hash_whiteout)| \ -+ BIT_ULL(KEY_TYPE_xattr)) \ -+ x(alloc, 4, 0, \ -+ BIT_ULL(KEY_TYPE_alloc)| \ -+ BIT_ULL(KEY_TYPE_alloc_v2)| \ -+ BIT_ULL(KEY_TYPE_alloc_v3)| \ -+ BIT_ULL(KEY_TYPE_alloc_v4)) \ -+ x(quotas, 5, 0, \ -+ BIT_ULL(KEY_TYPE_quota)) \ -+ x(stripes, 6, 0, \ -+ BIT_ULL(KEY_TYPE_stripe)) \ -+ x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \ -+ BIT_ULL(KEY_TYPE_reflink_v)| \ -+ BIT_ULL(KEY_TYPE_indirect_inline_data)) \ -+ x(subvolumes, 8, 0, \ -+ BIT_ULL(KEY_TYPE_subvolume)) \ -+ x(snapshots, 9, 0, \ -+ BIT_ULL(KEY_TYPE_snapshot)) \ -+ x(lru, 10, 0, \ -+ BIT_ULL(KEY_TYPE_set)) \ -+ x(freespace, 11, BTREE_ID_EXTENTS, \ -+ BIT_ULL(KEY_TYPE_set)) \ -+ x(need_discard, 12, 0, \ -+ BIT_ULL(KEY_TYPE_set)) \ -+ x(backpointers, 13, 0, \ -+ BIT_ULL(KEY_TYPE_backpointer)) \ -+ x(bucket_gens, 14, 0, \ -+ BIT_ULL(KEY_TYPE_bucket_gens)) \ -+ x(snapshot_trees, 15, 0, \ -+ BIT_ULL(KEY_TYPE_snapshot_tree)) \ -+ x(deleted_inodes, 16, BTREE_ID_SNAPSHOT_FIELD, \ -+ BIT_ULL(KEY_TYPE_set)) \ -+ x(logged_ops, 17, 0, \ -+ BIT_ULL(KEY_TYPE_logged_op_truncate)| \ -+ BIT_ULL(KEY_TYPE_logged_op_finsert)) \ -+ x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \ -+ BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) -+ -+enum btree_id { -+#define x(name, nr, ...) BTREE_ID_##name = nr, -+ BCH_BTREE_IDS() -+#undef x -+ BTREE_ID_NR -+}; -+ -+#define BTREE_MAX_DEPTH 4U -+ -+/* Btree nodes */ -+ -+/* -+ * Btree nodes -+ * -+ * On disk a btree node is a list/log of these; within each set the keys are -+ * sorted -+ */ -+struct bset { -+ __le64 seq; -+ -+ /* -+ * Highest journal entry this bset contains keys for. -+ * If on recovery we don't see that journal entry, this bset is ignored: -+ * this allows us to preserve the order of all index updates after a -+ * crash, since the journal records a total order of all index updates -+ * and anything that didn't make it to the journal doesn't get used. -+ */ -+ __le64 journal_seq; -+ -+ __le32 flags; -+ __le16 version; -+ __le16 u64s; /* count of d[] in u64s */ -+ -+ struct bkey_packed start[0]; -+ __u64 _data[]; -+} __packed __aligned(8); -+ -+LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4); -+ -+LE32_BITMASK(BSET_BIG_ENDIAN, struct bset, flags, 4, 5); -+LE32_BITMASK(BSET_SEPARATE_WHITEOUTS, -+ struct bset, flags, 5, 6); -+ -+/* Sector offset within the btree node: */ -+LE32_BITMASK(BSET_OFFSET, struct bset, flags, 16, 32); -+ -+struct btree_node { -+ struct bch_csum csum; -+ __le64 magic; -+ -+ /* this flags field is encrypted, unlike bset->flags: */ -+ __le64 flags; -+ -+ /* Closed interval: */ -+ struct bpos min_key; -+ struct bpos max_key; -+ struct bch_extent_ptr _ptr; /* not used anymore */ -+ struct bkey_format format; -+ -+ union { -+ struct bset keys; -+ struct { -+ __u8 pad[22]; -+ __le16 u64s; -+ __u64 _data[0]; -+ -+ }; -+ }; -+} __packed __aligned(8); -+ -+LE64_BITMASK(BTREE_NODE_ID_LO, struct btree_node, flags, 0, 4); -+LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8); -+LE64_BITMASK(BTREE_NODE_NEW_EXTENT_OVERWRITE, -+ struct btree_node, flags, 8, 9); -+LE64_BITMASK(BTREE_NODE_ID_HI, struct btree_node, flags, 9, 25); -+/* 25-32 unused */ -+LE64_BITMASK(BTREE_NODE_SEQ, struct btree_node, flags, 32, 64); -+ -+static inline __u64 BTREE_NODE_ID(struct btree_node *n) -+{ -+ return BTREE_NODE_ID_LO(n) | (BTREE_NODE_ID_HI(n) << 4); -+} -+ -+static inline void SET_BTREE_NODE_ID(struct btree_node *n, __u64 v) -+{ -+ SET_BTREE_NODE_ID_LO(n, v); -+ SET_BTREE_NODE_ID_HI(n, v >> 4); -+} -+ -+struct btree_node_entry { -+ struct bch_csum csum; -+ -+ union { -+ struct bset keys; -+ struct { -+ __u8 pad[22]; -+ __le16 u64s; -+ __u64 _data[0]; -+ }; -+ }; -+} __packed __aligned(8); -+ -+#endif /* _BCACHEFS_FORMAT_H */ -diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h -new file mode 100644 -index 000000000000..f05881f7e113 ---- /dev/null -+++ b/fs/bcachefs/bcachefs_ioctl.h -@@ -0,0 +1,368 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_IOCTL_H -+#define _BCACHEFS_IOCTL_H -+ -+#include -+#include -+#include "bcachefs_format.h" -+ -+/* -+ * Flags common to multiple ioctls: -+ */ -+#define BCH_FORCE_IF_DATA_LOST (1 << 0) -+#define BCH_FORCE_IF_METADATA_LOST (1 << 1) -+#define BCH_FORCE_IF_DATA_DEGRADED (1 << 2) -+#define BCH_FORCE_IF_METADATA_DEGRADED (1 << 3) -+ -+#define BCH_FORCE_IF_LOST \ -+ (BCH_FORCE_IF_DATA_LOST| \ -+ BCH_FORCE_IF_METADATA_LOST) -+#define BCH_FORCE_IF_DEGRADED \ -+ (BCH_FORCE_IF_DATA_DEGRADED| \ -+ BCH_FORCE_IF_METADATA_DEGRADED) -+ -+/* -+ * If cleared, ioctl that refer to a device pass it as a pointer to a pathname -+ * (e.g. /dev/sda1); if set, the dev field is the device's index within the -+ * filesystem: -+ */ -+#define BCH_BY_INDEX (1 << 4) -+ -+/* -+ * For BCH_IOCTL_READ_SUPER: get superblock of a specific device, not filesystem -+ * wide superblock: -+ */ -+#define BCH_READ_DEV (1 << 5) -+ -+/* global control dev: */ -+ -+/* These are currently broken, and probably unnecessary: */ -+#if 0 -+#define BCH_IOCTL_ASSEMBLE _IOW(0xbc, 1, struct bch_ioctl_assemble) -+#define BCH_IOCTL_INCREMENTAL _IOW(0xbc, 2, struct bch_ioctl_incremental) -+ -+struct bch_ioctl_assemble { -+ __u32 flags; -+ __u32 nr_devs; -+ __u64 pad; -+ __u64 devs[]; -+}; -+ -+struct bch_ioctl_incremental { -+ __u32 flags; -+ __u64 pad; -+ __u64 dev; -+}; -+#endif -+ -+/* filesystem ioctls: */ -+ -+#define BCH_IOCTL_QUERY_UUID _IOR(0xbc, 1, struct bch_ioctl_query_uuid) -+ -+/* These only make sense when we also have incremental assembly */ -+#if 0 -+#define BCH_IOCTL_START _IOW(0xbc, 2, struct bch_ioctl_start) -+#define BCH_IOCTL_STOP _IO(0xbc, 3) -+#endif -+ -+#define BCH_IOCTL_DISK_ADD _IOW(0xbc, 4, struct bch_ioctl_disk) -+#define BCH_IOCTL_DISK_REMOVE _IOW(0xbc, 5, struct bch_ioctl_disk) -+#define BCH_IOCTL_DISK_ONLINE _IOW(0xbc, 6, struct bch_ioctl_disk) -+#define BCH_IOCTL_DISK_OFFLINE _IOW(0xbc, 7, struct bch_ioctl_disk) -+#define BCH_IOCTL_DISK_SET_STATE _IOW(0xbc, 8, struct bch_ioctl_disk_set_state) -+#define BCH_IOCTL_DATA _IOW(0xbc, 10, struct bch_ioctl_data) -+#define BCH_IOCTL_FS_USAGE _IOWR(0xbc, 11, struct bch_ioctl_fs_usage) -+#define BCH_IOCTL_DEV_USAGE _IOWR(0xbc, 11, struct bch_ioctl_dev_usage) -+#define BCH_IOCTL_READ_SUPER _IOW(0xbc, 12, struct bch_ioctl_read_super) -+#define BCH_IOCTL_DISK_GET_IDX _IOW(0xbc, 13, struct bch_ioctl_disk_get_idx) -+#define BCH_IOCTL_DISK_RESIZE _IOW(0xbc, 14, struct bch_ioctl_disk_resize) -+#define BCH_IOCTL_DISK_RESIZE_JOURNAL _IOW(0xbc,15, struct bch_ioctl_disk_resize_journal) -+ -+#define BCH_IOCTL_SUBVOLUME_CREATE _IOW(0xbc, 16, struct bch_ioctl_subvolume) -+#define BCH_IOCTL_SUBVOLUME_DESTROY _IOW(0xbc, 17, struct bch_ioctl_subvolume) -+ -+/* ioctl below act on a particular file, not the filesystem as a whole: */ -+ -+#define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *) -+ -+/* -+ * BCH_IOCTL_QUERY_UUID: get filesystem UUID -+ * -+ * Returns user visible UUID, not internal UUID (which may not ever be changed); -+ * the filesystem's sysfs directory may be found under /sys/fs/bcachefs with -+ * this UUID. -+ */ -+struct bch_ioctl_query_uuid { -+ __uuid_t uuid; -+}; -+ -+#if 0 -+struct bch_ioctl_start { -+ __u32 flags; -+ __u32 pad; -+}; -+#endif -+ -+/* -+ * BCH_IOCTL_DISK_ADD: add a new device to an existing filesystem -+ * -+ * The specified device must not be open or in use. On success, the new device -+ * will be an online member of the filesystem just like any other member. -+ * -+ * The device must first be prepared by userspace by formatting with a bcachefs -+ * superblock, which is only used for passing in superblock options/parameters -+ * for that device (in struct bch_member). The new device's superblock should -+ * not claim to be a member of any existing filesystem - UUIDs on it will be -+ * ignored. -+ */ -+ -+/* -+ * BCH_IOCTL_DISK_REMOVE: permanently remove a member device from a filesystem -+ * -+ * Any data present on @dev will be permanently deleted, and @dev will be -+ * removed from its slot in the filesystem's list of member devices. The device -+ * may be either offline or offline. -+ * -+ * Will fail removing @dev would leave us with insufficient read write devices -+ * or degraded/unavailable data, unless the approprate BCH_FORCE_IF_* flags are -+ * set. -+ */ -+ -+/* -+ * BCH_IOCTL_DISK_ONLINE: given a disk that is already a member of a filesystem -+ * but is not open (e.g. because we started in degraded mode), bring it online -+ * -+ * all existing data on @dev will be available once the device is online, -+ * exactly as if @dev was present when the filesystem was first mounted -+ */ -+ -+/* -+ * BCH_IOCTL_DISK_OFFLINE: offline a disk, causing the kernel to close that -+ * block device, without removing it from the filesystem (so it can be brought -+ * back online later) -+ * -+ * Data present on @dev will be unavailable while @dev is offline (unless -+ * replicated), but will still be intact and untouched if @dev is brought back -+ * online -+ * -+ * Will fail (similarly to BCH_IOCTL_DISK_SET_STATE) if offlining @dev would -+ * leave us with insufficient read write devices or degraded/unavailable data, -+ * unless the approprate BCH_FORCE_IF_* flags are set. -+ */ -+ -+struct bch_ioctl_disk { -+ __u32 flags; -+ __u32 pad; -+ __u64 dev; -+}; -+ -+/* -+ * BCH_IOCTL_DISK_SET_STATE: modify state of a member device of a filesystem -+ * -+ * @new_state - one of the bch_member_state states (rw, ro, failed, -+ * spare) -+ * -+ * Will refuse to change member state if we would then have insufficient devices -+ * to write to, or if it would result in degraded data (when @new_state is -+ * failed or spare) unless the appropriate BCH_FORCE_IF_* flags are set. -+ */ -+struct bch_ioctl_disk_set_state { -+ __u32 flags; -+ __u8 new_state; -+ __u8 pad[3]; -+ __u64 dev; -+}; -+ -+enum bch_data_ops { -+ BCH_DATA_OP_SCRUB = 0, -+ BCH_DATA_OP_REREPLICATE = 1, -+ BCH_DATA_OP_MIGRATE = 2, -+ BCH_DATA_OP_REWRITE_OLD_NODES = 3, -+ BCH_DATA_OP_NR = 4, -+}; -+ -+/* -+ * BCH_IOCTL_DATA: operations that walk and manipulate filesystem data (e.g. -+ * scrub, rereplicate, migrate). -+ * -+ * This ioctl kicks off a job in the background, and returns a file descriptor. -+ * Reading from the file descriptor returns a struct bch_ioctl_data_event, -+ * indicating current progress, and closing the file descriptor will stop the -+ * job. The file descriptor is O_CLOEXEC. -+ */ -+struct bch_ioctl_data { -+ __u16 op; -+ __u8 start_btree; -+ __u8 end_btree; -+ __u32 flags; -+ -+ struct bpos start_pos; -+ struct bpos end_pos; -+ -+ union { -+ struct { -+ __u32 dev; -+ __u32 pad; -+ } migrate; -+ struct { -+ __u64 pad[8]; -+ }; -+ }; -+} __packed __aligned(8); -+ -+enum bch_data_event { -+ BCH_DATA_EVENT_PROGRESS = 0, -+ /* XXX: add an event for reporting errors */ -+ BCH_DATA_EVENT_NR = 1, -+}; -+ -+struct bch_ioctl_data_progress { -+ __u8 data_type; -+ __u8 btree_id; -+ __u8 pad[2]; -+ struct bpos pos; -+ -+ __u64 sectors_done; -+ __u64 sectors_total; -+} __packed __aligned(8); -+ -+struct bch_ioctl_data_event { -+ __u8 type; -+ __u8 pad[7]; -+ union { -+ struct bch_ioctl_data_progress p; -+ __u64 pad2[15]; -+ }; -+} __packed __aligned(8); -+ -+struct bch_replicas_usage { -+ __u64 sectors; -+ struct bch_replicas_entry r; -+} __packed; -+ -+static inline struct bch_replicas_usage * -+replicas_usage_next(struct bch_replicas_usage *u) -+{ -+ return (void *) u + replicas_entry_bytes(&u->r) + 8; -+} -+ -+/* -+ * BCH_IOCTL_FS_USAGE: query filesystem disk space usage -+ * -+ * Returns disk space usage broken out by data type, number of replicas, and -+ * by component device -+ * -+ * @replica_entries_bytes - size, in bytes, allocated for replica usage entries -+ * -+ * On success, @replica_entries_bytes will be changed to indicate the number of -+ * bytes actually used. -+ * -+ * Returns -ERANGE if @replica_entries_bytes was too small -+ */ -+struct bch_ioctl_fs_usage { -+ __u64 capacity; -+ __u64 used; -+ __u64 online_reserved; -+ __u64 persistent_reserved[BCH_REPLICAS_MAX]; -+ -+ __u32 replica_entries_bytes; -+ __u32 pad; -+ -+ struct bch_replicas_usage replicas[0]; -+}; -+ -+/* -+ * BCH_IOCTL_DEV_USAGE: query device disk space usage -+ * -+ * Returns disk space usage broken out by data type - both by buckets and -+ * sectors. -+ */ -+struct bch_ioctl_dev_usage { -+ __u64 dev; -+ __u32 flags; -+ __u8 state; -+ __u8 pad[7]; -+ -+ __u32 bucket_size; -+ __u64 nr_buckets; -+ -+ __u64 buckets_ec; -+ -+ struct bch_ioctl_dev_usage_type { -+ __u64 buckets; -+ __u64 sectors; -+ __u64 fragmented; -+ } d[BCH_DATA_NR]; -+}; -+ -+/* -+ * BCH_IOCTL_READ_SUPER: read filesystem superblock -+ * -+ * Equivalent to reading the superblock directly from the block device, except -+ * avoids racing with the kernel writing the superblock or having to figure out -+ * which block device to read -+ * -+ * @sb - buffer to read into -+ * @size - size of userspace allocated buffer -+ * @dev - device to read superblock for, if BCH_READ_DEV flag is -+ * specified -+ * -+ * Returns -ERANGE if buffer provided is too small -+ */ -+struct bch_ioctl_read_super { -+ __u32 flags; -+ __u32 pad; -+ __u64 dev; -+ __u64 size; -+ __u64 sb; -+}; -+ -+/* -+ * BCH_IOCTL_DISK_GET_IDX: give a path to a block device, query filesystem to -+ * determine if disk is a (online) member - if so, returns device's index -+ * -+ * Returns -ENOENT if not found -+ */ -+struct bch_ioctl_disk_get_idx { -+ __u64 dev; -+}; -+ -+/* -+ * BCH_IOCTL_DISK_RESIZE: resize filesystem on a device -+ * -+ * @dev - member to resize -+ * @nbuckets - new number of buckets -+ */ -+struct bch_ioctl_disk_resize { -+ __u32 flags; -+ __u32 pad; -+ __u64 dev; -+ __u64 nbuckets; -+}; -+ -+/* -+ * BCH_IOCTL_DISK_RESIZE_JOURNAL: resize journal on a device -+ * -+ * @dev - member to resize -+ * @nbuckets - new number of buckets -+ */ -+struct bch_ioctl_disk_resize_journal { -+ __u32 flags; -+ __u32 pad; -+ __u64 dev; -+ __u64 nbuckets; -+}; -+ -+struct bch_ioctl_subvolume { -+ __u32 flags; -+ __u32 dirfd; -+ __u16 mode; -+ __u16 pad[3]; -+ __u64 dst_ptr; -+ __u64 src_ptr; -+}; -+ -+#define BCH_SUBVOL_SNAPSHOT_CREATE (1U << 0) -+#define BCH_SUBVOL_SNAPSHOT_RO (1U << 1) -+ -+#endif /* _BCACHEFS_IOCTL_H */ -diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c -new file mode 100644 -index 000000000000..abdb05507d16 ---- /dev/null -+++ b/fs/bcachefs/bkey.c -@@ -0,0 +1,1120 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "bkey.h" -+#include "bkey_cmp.h" -+#include "bkey_methods.h" -+#include "bset.h" -+#include "util.h" -+ -+const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT; -+ -+void bch2_bkey_packed_to_binary_text(struct printbuf *out, -+ const struct bkey_format *f, -+ const struct bkey_packed *k) -+{ -+ const u64 *p = high_word(f, k); -+ unsigned word_bits = 64 - high_bit_offset; -+ unsigned nr_key_bits = bkey_format_key_bits(f) + high_bit_offset; -+ u64 v = *p & (~0ULL >> high_bit_offset); -+ -+ if (!nr_key_bits) { -+ prt_str(out, "(empty)"); -+ return; -+ } -+ -+ while (1) { -+ unsigned next_key_bits = nr_key_bits; -+ -+ if (nr_key_bits < 64) { -+ v >>= 64 - nr_key_bits; -+ next_key_bits = 0; -+ } else { -+ next_key_bits -= 64; -+ } -+ -+ bch2_prt_u64_binary(out, v, min(word_bits, nr_key_bits)); -+ -+ if (!next_key_bits) -+ break; -+ -+ prt_char(out, ' '); -+ -+ p = next_word(p); -+ v = *p; -+ word_bits = 64; -+ nr_key_bits = next_key_bits; -+ } -+} -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ -+static void bch2_bkey_pack_verify(const struct bkey_packed *packed, -+ const struct bkey *unpacked, -+ const struct bkey_format *format) -+{ -+ struct bkey tmp; -+ -+ BUG_ON(bkeyp_val_u64s(format, packed) != -+ bkey_val_u64s(unpacked)); -+ -+ BUG_ON(packed->u64s < bkeyp_key_u64s(format, packed)); -+ -+ tmp = __bch2_bkey_unpack_key(format, packed); -+ -+ if (memcmp(&tmp, unpacked, sizeof(struct bkey))) { -+ struct printbuf buf = PRINTBUF; -+ -+ prt_printf(&buf, "keys differ: format u64s %u fields %u %u %u %u %u\n", -+ format->key_u64s, -+ format->bits_per_field[0], -+ format->bits_per_field[1], -+ format->bits_per_field[2], -+ format->bits_per_field[3], -+ format->bits_per_field[4]); -+ -+ prt_printf(&buf, "compiled unpack: "); -+ bch2_bkey_to_text(&buf, unpacked); -+ prt_newline(&buf); -+ -+ prt_printf(&buf, "c unpack: "); -+ bch2_bkey_to_text(&buf, &tmp); -+ prt_newline(&buf); -+ -+ prt_printf(&buf, "compiled unpack: "); -+ bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current, -+ (struct bkey_packed *) unpacked); -+ prt_newline(&buf); -+ -+ prt_printf(&buf, "c unpack: "); -+ bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current, -+ (struct bkey_packed *) &tmp); -+ prt_newline(&buf); -+ -+ panic("%s", buf.buf); -+ } -+} -+ -+#else -+static inline void bch2_bkey_pack_verify(const struct bkey_packed *packed, -+ const struct bkey *unpacked, -+ const struct bkey_format *format) {} -+#endif -+ -+struct pack_state { -+ const struct bkey_format *format; -+ unsigned bits; /* bits remaining in current word */ -+ u64 w; /* current word */ -+ u64 *p; /* pointer to next word */ -+}; -+ -+__always_inline -+static struct pack_state pack_state_init(const struct bkey_format *format, -+ struct bkey_packed *k) -+{ -+ u64 *p = high_word(format, k); -+ -+ return (struct pack_state) { -+ .format = format, -+ .bits = 64 - high_bit_offset, -+ .w = 0, -+ .p = p, -+ }; -+} -+ -+__always_inline -+static void pack_state_finish(struct pack_state *state, -+ struct bkey_packed *k) -+{ -+ EBUG_ON(state->p < k->_data); -+ EBUG_ON(state->p >= (u64 *) k->_data + state->format->key_u64s); -+ -+ *state->p = state->w; -+} -+ -+struct unpack_state { -+ const struct bkey_format *format; -+ unsigned bits; /* bits remaining in current word */ -+ u64 w; /* current word */ -+ const u64 *p; /* pointer to next word */ -+}; -+ -+__always_inline -+static struct unpack_state unpack_state_init(const struct bkey_format *format, -+ const struct bkey_packed *k) -+{ -+ const u64 *p = high_word(format, k); -+ -+ return (struct unpack_state) { -+ .format = format, -+ .bits = 64 - high_bit_offset, -+ .w = *p << high_bit_offset, -+ .p = p, -+ }; -+} -+ -+__always_inline -+static u64 get_inc_field(struct unpack_state *state, unsigned field) -+{ -+ unsigned bits = state->format->bits_per_field[field]; -+ u64 v = 0, offset = le64_to_cpu(state->format->field_offset[field]); -+ -+ if (bits >= state->bits) { -+ v = state->w >> (64 - bits); -+ bits -= state->bits; -+ -+ state->p = next_word(state->p); -+ state->w = *state->p; -+ state->bits = 64; -+ } -+ -+ /* avoid shift by 64 if bits is 0 - bits is never 64 here: */ -+ v |= (state->w >> 1) >> (63 - bits); -+ state->w <<= bits; -+ state->bits -= bits; -+ -+ return v + offset; -+} -+ -+__always_inline -+static void __set_inc_field(struct pack_state *state, unsigned field, u64 v) -+{ -+ unsigned bits = state->format->bits_per_field[field]; -+ -+ if (bits) { -+ if (bits > state->bits) { -+ bits -= state->bits; -+ /* avoid shift by 64 if bits is 64 - bits is never 0 here: */ -+ state->w |= (v >> 1) >> (bits - 1); -+ -+ *state->p = state->w; -+ state->p = next_word(state->p); -+ state->w = 0; -+ state->bits = 64; -+ } -+ -+ state->bits -= bits; -+ state->w |= v << state->bits; -+ } -+} -+ -+__always_inline -+static bool set_inc_field(struct pack_state *state, unsigned field, u64 v) -+{ -+ unsigned bits = state->format->bits_per_field[field]; -+ u64 offset = le64_to_cpu(state->format->field_offset[field]); -+ -+ if (v < offset) -+ return false; -+ -+ v -= offset; -+ -+ if (fls64(v) > bits) -+ return false; -+ -+ __set_inc_field(state, field, v); -+ return true; -+} -+ -+/* -+ * Note: does NOT set out->format (we don't know what it should be here!) -+ * -+ * Also: doesn't work on extents - it doesn't preserve the invariant that -+ * if k is packed bkey_start_pos(k) will successfully pack -+ */ -+static bool bch2_bkey_transform_key(const struct bkey_format *out_f, -+ struct bkey_packed *out, -+ const struct bkey_format *in_f, -+ const struct bkey_packed *in) -+{ -+ struct pack_state out_s = pack_state_init(out_f, out); -+ struct unpack_state in_s = unpack_state_init(in_f, in); -+ u64 *w = out->_data; -+ unsigned i; -+ -+ *w = 0; -+ -+ for (i = 0; i < BKEY_NR_FIELDS; i++) -+ if (!set_inc_field(&out_s, i, get_inc_field(&in_s, i))) -+ return false; -+ -+ /* Can't happen because the val would be too big to unpack: */ -+ EBUG_ON(in->u64s - in_f->key_u64s + out_f->key_u64s > U8_MAX); -+ -+ pack_state_finish(&out_s, out); -+ out->u64s = out_f->key_u64s + in->u64s - in_f->key_u64s; -+ out->needs_whiteout = in->needs_whiteout; -+ out->type = in->type; -+ -+ return true; -+} -+ -+bool bch2_bkey_transform(const struct bkey_format *out_f, -+ struct bkey_packed *out, -+ const struct bkey_format *in_f, -+ const struct bkey_packed *in) -+{ -+ if (!bch2_bkey_transform_key(out_f, out, in_f, in)) -+ return false; -+ -+ memcpy_u64s((u64 *) out + out_f->key_u64s, -+ (u64 *) in + in_f->key_u64s, -+ (in->u64s - in_f->key_u64s)); -+ return true; -+} -+ -+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *format, -+ const struct bkey_packed *in) -+{ -+ struct unpack_state state = unpack_state_init(format, in); -+ struct bkey out; -+ -+ EBUG_ON(format->nr_fields != BKEY_NR_FIELDS); -+ EBUG_ON(in->u64s < format->key_u64s); -+ EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE); -+ EBUG_ON(in->u64s - format->key_u64s + BKEY_U64s > U8_MAX); -+ -+ out.u64s = BKEY_U64s + in->u64s - format->key_u64s; -+ out.format = KEY_FORMAT_CURRENT; -+ out.needs_whiteout = in->needs_whiteout; -+ out.type = in->type; -+ out.pad[0] = 0; -+ -+#define x(id, field) out.field = get_inc_field(&state, id); -+ bkey_fields() -+#undef x -+ -+ return out; -+} -+ -+#ifndef HAVE_BCACHEFS_COMPILED_UNPACK -+struct bpos __bkey_unpack_pos(const struct bkey_format *format, -+ const struct bkey_packed *in) -+{ -+ struct unpack_state state = unpack_state_init(format, in); -+ struct bpos out; -+ -+ EBUG_ON(format->nr_fields != BKEY_NR_FIELDS); -+ EBUG_ON(in->u64s < format->key_u64s); -+ EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE); -+ -+ out.inode = get_inc_field(&state, BKEY_FIELD_INODE); -+ out.offset = get_inc_field(&state, BKEY_FIELD_OFFSET); -+ out.snapshot = get_inc_field(&state, BKEY_FIELD_SNAPSHOT); -+ -+ return out; -+} -+#endif -+ -+/** -+ * bch2_bkey_pack_key -- pack just the key, not the value -+ * @out: packed result -+ * @in: key to pack -+ * @format: format of packed result -+ * -+ * Returns: true on success, false on failure -+ */ -+bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in, -+ const struct bkey_format *format) -+{ -+ struct pack_state state = pack_state_init(format, out); -+ u64 *w = out->_data; -+ -+ EBUG_ON((void *) in == (void *) out); -+ EBUG_ON(format->nr_fields != BKEY_NR_FIELDS); -+ EBUG_ON(in->format != KEY_FORMAT_CURRENT); -+ -+ *w = 0; -+ -+#define x(id, field) if (!set_inc_field(&state, id, in->field)) return false; -+ bkey_fields() -+#undef x -+ pack_state_finish(&state, out); -+ out->u64s = format->key_u64s + in->u64s - BKEY_U64s; -+ out->format = KEY_FORMAT_LOCAL_BTREE; -+ out->needs_whiteout = in->needs_whiteout; -+ out->type = in->type; -+ -+ bch2_bkey_pack_verify(out, in, format); -+ return true; -+} -+ -+/** -+ * bch2_bkey_unpack -- unpack the key and the value -+ * @b: btree node of @src key (for packed format) -+ * @dst: unpacked result -+ * @src: packed input -+ */ -+void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst, -+ const struct bkey_packed *src) -+{ -+ __bkey_unpack_key(b, &dst->k, src); -+ -+ memcpy_u64s(&dst->v, -+ bkeyp_val(&b->format, src), -+ bkeyp_val_u64s(&b->format, src)); -+} -+ -+/** -+ * bch2_bkey_pack -- pack the key and the value -+ * @dst: packed result -+ * @src: unpacked input -+ * @format: format of packed result -+ * -+ * Returns: true on success, false on failure -+ */ -+bool bch2_bkey_pack(struct bkey_packed *dst, const struct bkey_i *src, -+ const struct bkey_format *format) -+{ -+ struct bkey_packed tmp; -+ -+ if (!bch2_bkey_pack_key(&tmp, &src->k, format)) -+ return false; -+ -+ memmove_u64s((u64 *) dst + format->key_u64s, -+ &src->v, -+ bkey_val_u64s(&src->k)); -+ memcpy_u64s_small(dst, &tmp, format->key_u64s); -+ -+ return true; -+} -+ -+__always_inline -+static bool set_inc_field_lossy(struct pack_state *state, unsigned field, u64 v) -+{ -+ unsigned bits = state->format->bits_per_field[field]; -+ u64 offset = le64_to_cpu(state->format->field_offset[field]); -+ bool ret = true; -+ -+ EBUG_ON(v < offset); -+ v -= offset; -+ -+ if (fls64(v) > bits) { -+ v = ~(~0ULL << bits); -+ ret = false; -+ } -+ -+ __set_inc_field(state, field, v); -+ return ret; -+} -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+static bool bkey_packed_successor(struct bkey_packed *out, -+ const struct btree *b, -+ struct bkey_packed k) -+{ -+ const struct bkey_format *f = &b->format; -+ unsigned nr_key_bits = b->nr_key_bits; -+ unsigned first_bit, offset; -+ u64 *p; -+ -+ EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f)); -+ -+ if (!nr_key_bits) -+ return false; -+ -+ *out = k; -+ -+ first_bit = high_bit_offset + nr_key_bits - 1; -+ p = nth_word(high_word(f, out), first_bit >> 6); -+ offset = 63 - (first_bit & 63); -+ -+ while (nr_key_bits) { -+ unsigned bits = min(64 - offset, nr_key_bits); -+ u64 mask = (~0ULL >> (64 - bits)) << offset; -+ -+ if ((*p & mask) != mask) { -+ *p += 1ULL << offset; -+ EBUG_ON(bch2_bkey_cmp_packed(b, out, &k) <= 0); -+ return true; -+ } -+ -+ *p &= ~mask; -+ p = prev_word(p); -+ nr_key_bits -= bits; -+ offset = 0; -+ } -+ -+ return false; -+} -+ -+static bool bkey_format_has_too_big_fields(const struct bkey_format *f) -+{ -+ for (unsigned i = 0; i < f->nr_fields; i++) { -+ unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; -+ u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1)); -+ u64 packed_max = f->bits_per_field[i] -+ ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1)) -+ : 0; -+ u64 field_offset = le64_to_cpu(f->field_offset[i]); -+ -+ if (packed_max + field_offset < packed_max || -+ packed_max + field_offset > unpacked_max) -+ return true; -+ } -+ -+ return false; -+} -+#endif -+ -+/* -+ * Returns a packed key that compares <= in -+ * -+ * This is used in bset_search_tree(), where we need a packed pos in order to be -+ * able to compare against the keys in the auxiliary search tree - and it's -+ * legal to use a packed pos that isn't equivalent to the original pos, -+ * _provided_ it compares <= to the original pos. -+ */ -+enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out, -+ struct bpos in, -+ const struct btree *b) -+{ -+ const struct bkey_format *f = &b->format; -+ struct pack_state state = pack_state_init(f, out); -+ u64 *w = out->_data; -+#ifdef CONFIG_BCACHEFS_DEBUG -+ struct bpos orig = in; -+#endif -+ bool exact = true; -+ unsigned i; -+ -+ /* -+ * bch2_bkey_pack_key() will write to all of f->key_u64s, minus the 3 -+ * byte header, but pack_pos() won't if the len/version fields are big -+ * enough - we need to make sure to zero them out: -+ */ -+ for (i = 0; i < f->key_u64s; i++) -+ w[i] = 0; -+ -+ if (unlikely(in.snapshot < -+ le64_to_cpu(f->field_offset[BKEY_FIELD_SNAPSHOT]))) { -+ if (!in.offset-- && -+ !in.inode--) -+ return BKEY_PACK_POS_FAIL; -+ in.snapshot = KEY_SNAPSHOT_MAX; -+ exact = false; -+ } -+ -+ if (unlikely(in.offset < -+ le64_to_cpu(f->field_offset[BKEY_FIELD_OFFSET]))) { -+ if (!in.inode--) -+ return BKEY_PACK_POS_FAIL; -+ in.offset = KEY_OFFSET_MAX; -+ in.snapshot = KEY_SNAPSHOT_MAX; -+ exact = false; -+ } -+ -+ if (unlikely(in.inode < -+ le64_to_cpu(f->field_offset[BKEY_FIELD_INODE]))) -+ return BKEY_PACK_POS_FAIL; -+ -+ if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_INODE, in.inode))) { -+ in.offset = KEY_OFFSET_MAX; -+ in.snapshot = KEY_SNAPSHOT_MAX; -+ exact = false; -+ } -+ -+ if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_OFFSET, in.offset))) { -+ in.snapshot = KEY_SNAPSHOT_MAX; -+ exact = false; -+ } -+ -+ if (unlikely(!set_inc_field_lossy(&state, BKEY_FIELD_SNAPSHOT, in.snapshot))) -+ exact = false; -+ -+ pack_state_finish(&state, out); -+ out->u64s = f->key_u64s; -+ out->format = KEY_FORMAT_LOCAL_BTREE; -+ out->type = KEY_TYPE_deleted; -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ if (exact) { -+ BUG_ON(bkey_cmp_left_packed(b, out, &orig)); -+ } else { -+ struct bkey_packed successor; -+ -+ BUG_ON(bkey_cmp_left_packed(b, out, &orig) >= 0); -+ BUG_ON(bkey_packed_successor(&successor, b, *out) && -+ bkey_cmp_left_packed(b, &successor, &orig) < 0 && -+ !bkey_format_has_too_big_fields(f)); -+ } -+#endif -+ -+ return exact ? BKEY_PACK_POS_EXACT : BKEY_PACK_POS_SMALLER; -+} -+ -+void bch2_bkey_format_init(struct bkey_format_state *s) -+{ -+ unsigned i; -+ -+ for (i = 0; i < ARRAY_SIZE(s->field_min); i++) -+ s->field_min[i] = U64_MAX; -+ -+ for (i = 0; i < ARRAY_SIZE(s->field_max); i++) -+ s->field_max[i] = 0; -+ -+ /* Make sure we can store a size of 0: */ -+ s->field_min[BKEY_FIELD_SIZE] = 0; -+} -+ -+void bch2_bkey_format_add_pos(struct bkey_format_state *s, struct bpos p) -+{ -+ unsigned field = 0; -+ -+ __bkey_format_add(s, field++, p.inode); -+ __bkey_format_add(s, field++, p.offset); -+ __bkey_format_add(s, field++, p.snapshot); -+} -+ -+/* -+ * We don't want it to be possible for the packed format to represent fields -+ * bigger than a u64... that will cause confusion and issues (like with -+ * bkey_packed_successor()) -+ */ -+static void set_format_field(struct bkey_format *f, enum bch_bkey_fields i, -+ unsigned bits, u64 offset) -+{ -+ unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; -+ u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1)); -+ -+ bits = min(bits, unpacked_bits); -+ -+ offset = bits == unpacked_bits ? 0 : min(offset, unpacked_max - ((1ULL << bits) - 1)); -+ -+ f->bits_per_field[i] = bits; -+ f->field_offset[i] = cpu_to_le64(offset); -+} -+ -+struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s) -+{ -+ unsigned i, bits = KEY_PACKED_BITS_START; -+ struct bkey_format ret = { -+ .nr_fields = BKEY_NR_FIELDS, -+ }; -+ -+ for (i = 0; i < ARRAY_SIZE(s->field_min); i++) { -+ s->field_min[i] = min(s->field_min[i], s->field_max[i]); -+ -+ set_format_field(&ret, i, -+ fls64(s->field_max[i] - s->field_min[i]), -+ s->field_min[i]); -+ -+ bits += ret.bits_per_field[i]; -+ } -+ -+ /* allow for extent merging: */ -+ if (ret.bits_per_field[BKEY_FIELD_SIZE]) { -+ unsigned b = min(4U, 32U - ret.bits_per_field[BKEY_FIELD_SIZE]); -+ -+ ret.bits_per_field[BKEY_FIELD_SIZE] += b; -+ bits += b; -+ } -+ -+ ret.key_u64s = DIV_ROUND_UP(bits, 64); -+ -+ /* if we have enough spare bits, round fields up to nearest byte */ -+ bits = ret.key_u64s * 64 - bits; -+ -+ for (i = 0; i < ARRAY_SIZE(ret.bits_per_field); i++) { -+ unsigned r = round_up(ret.bits_per_field[i], 8) - -+ ret.bits_per_field[i]; -+ -+ if (r <= bits) { -+ set_format_field(&ret, i, -+ ret.bits_per_field[i] + r, -+ le64_to_cpu(ret.field_offset[i])); -+ bits -= r; -+ } -+ } -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ { -+ struct printbuf buf = PRINTBUF; -+ -+ BUG_ON(bch2_bkey_format_invalid(NULL, &ret, 0, &buf)); -+ printbuf_exit(&buf); -+ } -+#endif -+ return ret; -+} -+ -+int bch2_bkey_format_invalid(struct bch_fs *c, -+ struct bkey_format *f, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ unsigned i, bits = KEY_PACKED_BITS_START; -+ -+ if (f->nr_fields != BKEY_NR_FIELDS) { -+ prt_printf(err, "incorrect number of fields: got %u, should be %u", -+ f->nr_fields, BKEY_NR_FIELDS); -+ return -BCH_ERR_invalid; -+ } -+ -+ /* -+ * Verify that the packed format can't represent fields larger than the -+ * unpacked format: -+ */ -+ for (i = 0; i < f->nr_fields; i++) { -+ if (!c || c->sb.version_min >= bcachefs_metadata_version_snapshot) { -+ unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; -+ u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1)); -+ u64 packed_max = f->bits_per_field[i] -+ ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1)) -+ : 0; -+ u64 field_offset = le64_to_cpu(f->field_offset[i]); -+ -+ if (packed_max + field_offset < packed_max || -+ packed_max + field_offset > unpacked_max) { -+ prt_printf(err, "field %u too large: %llu + %llu > %llu", -+ i, packed_max, field_offset, unpacked_max); -+ return -BCH_ERR_invalid; -+ } -+ } -+ -+ bits += f->bits_per_field[i]; -+ } -+ -+ if (f->key_u64s != DIV_ROUND_UP(bits, 64)) { -+ prt_printf(err, "incorrect key_u64s: got %u, should be %u", -+ f->key_u64s, DIV_ROUND_UP(bits, 64)); -+ return -BCH_ERR_invalid; -+ } -+ -+ return 0; -+} -+ -+void bch2_bkey_format_to_text(struct printbuf *out, const struct bkey_format *f) -+{ -+ prt_printf(out, "u64s %u fields ", f->key_u64s); -+ -+ for (unsigned i = 0; i < ARRAY_SIZE(f->bits_per_field); i++) { -+ if (i) -+ prt_str(out, ", "); -+ prt_printf(out, "%u:%llu", -+ f->bits_per_field[i], -+ le64_to_cpu(f->field_offset[i])); -+ } -+} -+ -+/* -+ * Most significant differing bit -+ * Bits are indexed from 0 - return is [0, nr_key_bits) -+ */ -+__pure -+unsigned bch2_bkey_greatest_differing_bit(const struct btree *b, -+ const struct bkey_packed *l_k, -+ const struct bkey_packed *r_k) -+{ -+ const u64 *l = high_word(&b->format, l_k); -+ const u64 *r = high_word(&b->format, r_k); -+ unsigned nr_key_bits = b->nr_key_bits; -+ unsigned word_bits = 64 - high_bit_offset; -+ u64 l_v, r_v; -+ -+ EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format)); -+ -+ /* for big endian, skip past header */ -+ l_v = *l & (~0ULL >> high_bit_offset); -+ r_v = *r & (~0ULL >> high_bit_offset); -+ -+ while (nr_key_bits) { -+ if (nr_key_bits < word_bits) { -+ l_v >>= word_bits - nr_key_bits; -+ r_v >>= word_bits - nr_key_bits; -+ nr_key_bits = 0; -+ } else { -+ nr_key_bits -= word_bits; -+ } -+ -+ if (l_v != r_v) -+ return fls64(l_v ^ r_v) - 1 + nr_key_bits; -+ -+ l = next_word(l); -+ r = next_word(r); -+ -+ l_v = *l; -+ r_v = *r; -+ word_bits = 64; -+ } -+ -+ return 0; -+} -+ -+/* -+ * First set bit -+ * Bits are indexed from 0 - return is [0, nr_key_bits) -+ */ -+__pure -+unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k) -+{ -+ const u64 *p = high_word(&b->format, k); -+ unsigned nr_key_bits = b->nr_key_bits; -+ unsigned ret = 0, offset; -+ -+ EBUG_ON(b->nr_key_bits != bkey_format_key_bits(&b->format)); -+ -+ offset = nr_key_bits; -+ while (offset > 64) { -+ p = next_word(p); -+ offset -= 64; -+ } -+ -+ offset = 64 - offset; -+ -+ while (nr_key_bits) { -+ unsigned bits = nr_key_bits + offset < 64 -+ ? nr_key_bits -+ : 64 - offset; -+ -+ u64 mask = (~0ULL >> (64 - bits)) << offset; -+ -+ if (*p & mask) -+ return ret + __ffs64(*p & mask) - offset; -+ -+ p = prev_word(p); -+ nr_key_bits -= bits; -+ ret += bits; -+ offset = 0; -+ } -+ -+ return 0; -+} -+ -+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK -+ -+#define I(_x) (*(out)++ = (_x)) -+#define I1(i0) I(i0) -+#define I2(i0, i1) (I1(i0), I(i1)) -+#define I3(i0, i1, i2) (I2(i0, i1), I(i2)) -+#define I4(i0, i1, i2, i3) (I3(i0, i1, i2), I(i3)) -+#define I5(i0, i1, i2, i3, i4) (I4(i0, i1, i2, i3), I(i4)) -+ -+static u8 *compile_bkey_field(const struct bkey_format *format, u8 *out, -+ enum bch_bkey_fields field, -+ unsigned dst_offset, unsigned dst_size, -+ bool *eax_zeroed) -+{ -+ unsigned bits = format->bits_per_field[field]; -+ u64 offset = le64_to_cpu(format->field_offset[field]); -+ unsigned i, byte, bit_offset, align, shl, shr; -+ -+ if (!bits && !offset) { -+ if (!*eax_zeroed) { -+ /* xor eax, eax */ -+ I2(0x31, 0xc0); -+ } -+ -+ *eax_zeroed = true; -+ goto set_field; -+ } -+ -+ if (!bits) { -+ /* just return offset: */ -+ -+ switch (dst_size) { -+ case 8: -+ if (offset > S32_MAX) { -+ /* mov [rdi + dst_offset], offset */ -+ I3(0xc7, 0x47, dst_offset); -+ memcpy(out, &offset, 4); -+ out += 4; -+ -+ I3(0xc7, 0x47, dst_offset + 4); -+ memcpy(out, (void *) &offset + 4, 4); -+ out += 4; -+ } else { -+ /* mov [rdi + dst_offset], offset */ -+ /* sign extended */ -+ I4(0x48, 0xc7, 0x47, dst_offset); -+ memcpy(out, &offset, 4); -+ out += 4; -+ } -+ break; -+ case 4: -+ /* mov [rdi + dst_offset], offset */ -+ I3(0xc7, 0x47, dst_offset); -+ memcpy(out, &offset, 4); -+ out += 4; -+ break; -+ default: -+ BUG(); -+ } -+ -+ return out; -+ } -+ -+ bit_offset = format->key_u64s * 64; -+ for (i = 0; i <= field; i++) -+ bit_offset -= format->bits_per_field[i]; -+ -+ byte = bit_offset / 8; -+ bit_offset -= byte * 8; -+ -+ *eax_zeroed = false; -+ -+ if (bit_offset == 0 && bits == 8) { -+ /* movzx eax, BYTE PTR [rsi + imm8] */ -+ I4(0x0f, 0xb6, 0x46, byte); -+ } else if (bit_offset == 0 && bits == 16) { -+ /* movzx eax, WORD PTR [rsi + imm8] */ -+ I4(0x0f, 0xb7, 0x46, byte); -+ } else if (bit_offset + bits <= 32) { -+ align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3); -+ byte -= align; -+ bit_offset += align * 8; -+ -+ BUG_ON(bit_offset + bits > 32); -+ -+ /* mov eax, [rsi + imm8] */ -+ I3(0x8b, 0x46, byte); -+ -+ if (bit_offset) { -+ /* shr eax, imm8 */ -+ I3(0xc1, 0xe8, bit_offset); -+ } -+ -+ if (bit_offset + bits < 32) { -+ unsigned mask = ~0U >> (32 - bits); -+ -+ /* and eax, imm32 */ -+ I1(0x25); -+ memcpy(out, &mask, 4); -+ out += 4; -+ } -+ } else if (bit_offset + bits <= 64) { -+ align = min(8 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 7); -+ byte -= align; -+ bit_offset += align * 8; -+ -+ BUG_ON(bit_offset + bits > 64); -+ -+ /* mov rax, [rsi + imm8] */ -+ I4(0x48, 0x8b, 0x46, byte); -+ -+ shl = 64 - bit_offset - bits; -+ shr = bit_offset + shl; -+ -+ if (shl) { -+ /* shl rax, imm8 */ -+ I4(0x48, 0xc1, 0xe0, shl); -+ } -+ -+ if (shr) { -+ /* shr rax, imm8 */ -+ I4(0x48, 0xc1, 0xe8, shr); -+ } -+ } else { -+ align = min(4 - DIV_ROUND_UP(bit_offset + bits, 8), byte & 3); -+ byte -= align; -+ bit_offset += align * 8; -+ -+ BUG_ON(bit_offset + bits > 96); -+ -+ /* mov rax, [rsi + byte] */ -+ I4(0x48, 0x8b, 0x46, byte); -+ -+ /* mov edx, [rsi + byte + 8] */ -+ I3(0x8b, 0x56, byte + 8); -+ -+ /* bits from next word: */ -+ shr = bit_offset + bits - 64; -+ BUG_ON(shr > bit_offset); -+ -+ /* shr rax, bit_offset */ -+ I4(0x48, 0xc1, 0xe8, shr); -+ -+ /* shl rdx, imm8 */ -+ I4(0x48, 0xc1, 0xe2, 64 - shr); -+ -+ /* or rax, rdx */ -+ I3(0x48, 0x09, 0xd0); -+ -+ shr = bit_offset - shr; -+ -+ if (shr) { -+ /* shr rax, imm8 */ -+ I4(0x48, 0xc1, 0xe8, shr); -+ } -+ } -+ -+ /* rax += offset: */ -+ if (offset > S32_MAX) { -+ /* mov rdx, imm64 */ -+ I2(0x48, 0xba); -+ memcpy(out, &offset, 8); -+ out += 8; -+ /* add %rdx, %rax */ -+ I3(0x48, 0x01, 0xd0); -+ } else if (offset + (~0ULL >> (64 - bits)) > U32_MAX) { -+ /* add rax, imm32 */ -+ I2(0x48, 0x05); -+ memcpy(out, &offset, 4); -+ out += 4; -+ } else if (offset) { -+ /* add eax, imm32 */ -+ I1(0x05); -+ memcpy(out, &offset, 4); -+ out += 4; -+ } -+set_field: -+ switch (dst_size) { -+ case 8: -+ /* mov [rdi + dst_offset], rax */ -+ I4(0x48, 0x89, 0x47, dst_offset); -+ break; -+ case 4: -+ /* mov [rdi + dst_offset], eax */ -+ I3(0x89, 0x47, dst_offset); -+ break; -+ default: -+ BUG(); -+ } -+ -+ return out; -+} -+ -+int bch2_compile_bkey_format(const struct bkey_format *format, void *_out) -+{ -+ bool eax_zeroed = false; -+ u8 *out = _out; -+ -+ /* -+ * rdi: dst - unpacked key -+ * rsi: src - packed key -+ */ -+ -+ /* k->u64s, k->format, k->type */ -+ -+ /* mov eax, [rsi] */ -+ I2(0x8b, 0x06); -+ -+ /* add eax, BKEY_U64s - format->key_u64s */ -+ I5(0x05, BKEY_U64s - format->key_u64s, KEY_FORMAT_CURRENT, 0, 0); -+ -+ /* and eax, imm32: mask out k->pad: */ -+ I5(0x25, 0xff, 0xff, 0xff, 0); -+ -+ /* mov [rdi], eax */ -+ I2(0x89, 0x07); -+ -+#define x(id, field) \ -+ out = compile_bkey_field(format, out, id, \ -+ offsetof(struct bkey, field), \ -+ sizeof(((struct bkey *) NULL)->field), \ -+ &eax_zeroed); -+ bkey_fields() -+#undef x -+ -+ /* retq */ -+ I1(0xc3); -+ -+ return (void *) out - _out; -+} -+ -+#else -+#endif -+ -+__pure -+int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l, -+ const struct bkey_packed *r, -+ const struct btree *b) -+{ -+ return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b); -+} -+ -+__pure __flatten -+int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *b, -+ const struct bkey_packed *l, -+ const struct bpos *r) -+{ -+ return bpos_cmp(bkey_unpack_pos_format_checked(b, l), *r); -+} -+ -+__pure __flatten -+int bch2_bkey_cmp_packed(const struct btree *b, -+ const struct bkey_packed *l, -+ const struct bkey_packed *r) -+{ -+ return bch2_bkey_cmp_packed_inlined(b, l, r); -+} -+ -+__pure __flatten -+int __bch2_bkey_cmp_left_packed(const struct btree *b, -+ const struct bkey_packed *l, -+ const struct bpos *r) -+{ -+ const struct bkey *l_unpacked; -+ -+ return unlikely(l_unpacked = packed_to_bkey_c(l)) -+ ? bpos_cmp(l_unpacked->p, *r) -+ : __bch2_bkey_cmp_left_packed_format_checked(b, l, r); -+} -+ -+void bch2_bpos_swab(struct bpos *p) -+{ -+ u8 *l = (u8 *) p; -+ u8 *h = ((u8 *) &p[1]) - 1; -+ -+ while (l < h) { -+ swap(*l, *h); -+ l++; -+ --h; -+ } -+} -+ -+void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k) -+{ -+ const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current; -+ u8 *l = k->key_start; -+ u8 *h = (u8 *) (k->_data + f->key_u64s) - 1; -+ -+ while (l < h) { -+ swap(*l, *h); -+ l++; -+ --h; -+ } -+} -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+void bch2_bkey_pack_test(void) -+{ -+ struct bkey t = KEY(4134ULL, 1250629070527416633ULL, 0); -+ struct bkey_packed p; -+ -+ struct bkey_format test_format = { -+ .key_u64s = 3, -+ .nr_fields = BKEY_NR_FIELDS, -+ .bits_per_field = { -+ 13, -+ 64, -+ 32, -+ }, -+ }; -+ -+ struct unpack_state in_s = -+ unpack_state_init(&bch2_bkey_format_current, (void *) &t); -+ struct pack_state out_s = pack_state_init(&test_format, &p); -+ unsigned i; -+ -+ for (i = 0; i < out_s.format->nr_fields; i++) { -+ u64 a, v = get_inc_field(&in_s, i); -+ -+ switch (i) { -+#define x(id, field) case id: a = t.field; break; -+ bkey_fields() -+#undef x -+ default: -+ BUG(); -+ } -+ -+ if (a != v) -+ panic("got %llu actual %llu i %u\n", v, a, i); -+ -+ if (!set_inc_field(&out_s, i, v)) -+ panic("failed at %u\n", i); -+ } -+ -+ BUG_ON(!bch2_bkey_pack_key(&p, &t, &test_format)); -+} -+#endif -diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h -new file mode 100644 -index 000000000000..831be01809f2 ---- /dev/null -+++ b/fs/bcachefs/bkey.h -@@ -0,0 +1,778 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BKEY_H -+#define _BCACHEFS_BKEY_H -+ -+#include -+#include "bcachefs_format.h" -+ -+#include "btree_types.h" -+#include "util.h" -+#include "vstructs.h" -+ -+enum bkey_invalid_flags { -+ BKEY_INVALID_WRITE = (1U << 0), -+ BKEY_INVALID_COMMIT = (1U << 1), -+ BKEY_INVALID_JOURNAL = (1U << 2), -+}; -+ -+#if 0 -+ -+/* -+ * compiled unpack functions are disabled, pending a new interface for -+ * dynamically allocating executable memory: -+ */ -+ -+#ifdef CONFIG_X86_64 -+#define HAVE_BCACHEFS_COMPILED_UNPACK 1 -+#endif -+#endif -+ -+void bch2_bkey_packed_to_binary_text(struct printbuf *, -+ const struct bkey_format *, -+ const struct bkey_packed *); -+ -+/* bkey with split value, const */ -+struct bkey_s_c { -+ const struct bkey *k; -+ const struct bch_val *v; -+}; -+ -+/* bkey with split value */ -+struct bkey_s { -+ union { -+ struct { -+ struct bkey *k; -+ struct bch_val *v; -+ }; -+ struct bkey_s_c s_c; -+ }; -+}; -+ -+#define bkey_p_next(_k) vstruct_next(_k) -+ -+static inline struct bkey_i *bkey_next(struct bkey_i *k) -+{ -+ return (struct bkey_i *) ((u64 *) k->_data + k->k.u64s); -+} -+ -+#define bkey_val_u64s(_k) ((_k)->u64s - BKEY_U64s) -+ -+static inline size_t bkey_val_bytes(const struct bkey *k) -+{ -+ return bkey_val_u64s(k) * sizeof(u64); -+} -+ -+static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s) -+{ -+ unsigned u64s = BKEY_U64s + val_u64s; -+ -+ BUG_ON(u64s > U8_MAX); -+ k->u64s = u64s; -+} -+ -+static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes) -+{ -+ set_bkey_val_u64s(k, DIV_ROUND_UP(bytes, sizeof(u64))); -+} -+ -+#define bkey_val_end(_k) ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k))) -+ -+#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted) -+ -+#define bkey_whiteout(_k) \ -+ ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_whiteout) -+ -+enum bkey_lr_packed { -+ BKEY_PACKED_BOTH, -+ BKEY_PACKED_RIGHT, -+ BKEY_PACKED_LEFT, -+ BKEY_PACKED_NONE, -+}; -+ -+#define bkey_lr_packed(_l, _r) \ -+ ((_l)->format + ((_r)->format << 1)) -+ -+static inline void bkey_p_copy(struct bkey_packed *dst, const struct bkey_packed *src) -+{ -+ memcpy_u64s_small(dst, src, src->u64s); -+} -+ -+static inline void bkey_copy(struct bkey_i *dst, const struct bkey_i *src) -+{ -+ memcpy_u64s_small(dst, src, src->k.u64s); -+} -+ -+struct btree; -+ -+__pure -+unsigned bch2_bkey_greatest_differing_bit(const struct btree *, -+ const struct bkey_packed *, -+ const struct bkey_packed *); -+__pure -+unsigned bch2_bkey_ffs(const struct btree *, const struct bkey_packed *); -+ -+__pure -+int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *, -+ const struct bkey_packed *, -+ const struct btree *); -+ -+__pure -+int __bch2_bkey_cmp_left_packed_format_checked(const struct btree *, -+ const struct bkey_packed *, -+ const struct bpos *); -+ -+__pure -+int bch2_bkey_cmp_packed(const struct btree *, -+ const struct bkey_packed *, -+ const struct bkey_packed *); -+ -+__pure -+int __bch2_bkey_cmp_left_packed(const struct btree *, -+ const struct bkey_packed *, -+ const struct bpos *); -+ -+static inline __pure -+int bkey_cmp_left_packed(const struct btree *b, -+ const struct bkey_packed *l, const struct bpos *r) -+{ -+ return __bch2_bkey_cmp_left_packed(b, l, r); -+} -+ -+/* -+ * The compiler generates better code when we pass bpos by ref, but it's often -+ * enough terribly convenient to pass it by val... as much as I hate c++, const -+ * ref would be nice here: -+ */ -+__pure __flatten -+static inline int bkey_cmp_left_packed_byval(const struct btree *b, -+ const struct bkey_packed *l, -+ struct bpos r) -+{ -+ return bkey_cmp_left_packed(b, l, &r); -+} -+ -+static __always_inline bool bpos_eq(struct bpos l, struct bpos r) -+{ -+ return !((l.inode ^ r.inode) | -+ (l.offset ^ r.offset) | -+ (l.snapshot ^ r.snapshot)); -+} -+ -+static __always_inline bool bpos_lt(struct bpos l, struct bpos r) -+{ -+ return l.inode != r.inode ? l.inode < r.inode : -+ l.offset != r.offset ? l.offset < r.offset : -+ l.snapshot != r.snapshot ? l.snapshot < r.snapshot : false; -+} -+ -+static __always_inline bool bpos_le(struct bpos l, struct bpos r) -+{ -+ return l.inode != r.inode ? l.inode < r.inode : -+ l.offset != r.offset ? l.offset < r.offset : -+ l.snapshot != r.snapshot ? l.snapshot < r.snapshot : true; -+} -+ -+static __always_inline bool bpos_gt(struct bpos l, struct bpos r) -+{ -+ return bpos_lt(r, l); -+} -+ -+static __always_inline bool bpos_ge(struct bpos l, struct bpos r) -+{ -+ return bpos_le(r, l); -+} -+ -+static __always_inline int bpos_cmp(struct bpos l, struct bpos r) -+{ -+ return cmp_int(l.inode, r.inode) ?: -+ cmp_int(l.offset, r.offset) ?: -+ cmp_int(l.snapshot, r.snapshot); -+} -+ -+static inline struct bpos bpos_min(struct bpos l, struct bpos r) -+{ -+ return bpos_lt(l, r) ? l : r; -+} -+ -+static inline struct bpos bpos_max(struct bpos l, struct bpos r) -+{ -+ return bpos_gt(l, r) ? l : r; -+} -+ -+static __always_inline bool bkey_eq(struct bpos l, struct bpos r) -+{ -+ return !((l.inode ^ r.inode) | -+ (l.offset ^ r.offset)); -+} -+ -+static __always_inline bool bkey_lt(struct bpos l, struct bpos r) -+{ -+ return l.inode != r.inode -+ ? l.inode < r.inode -+ : l.offset < r.offset; -+} -+ -+static __always_inline bool bkey_le(struct bpos l, struct bpos r) -+{ -+ return l.inode != r.inode -+ ? l.inode < r.inode -+ : l.offset <= r.offset; -+} -+ -+static __always_inline bool bkey_gt(struct bpos l, struct bpos r) -+{ -+ return bkey_lt(r, l); -+} -+ -+static __always_inline bool bkey_ge(struct bpos l, struct bpos r) -+{ -+ return bkey_le(r, l); -+} -+ -+static __always_inline int bkey_cmp(struct bpos l, struct bpos r) -+{ -+ return cmp_int(l.inode, r.inode) ?: -+ cmp_int(l.offset, r.offset); -+} -+ -+static inline struct bpos bkey_min(struct bpos l, struct bpos r) -+{ -+ return bkey_lt(l, r) ? l : r; -+} -+ -+static inline struct bpos bkey_max(struct bpos l, struct bpos r) -+{ -+ return bkey_gt(l, r) ? l : r; -+} -+ -+void bch2_bpos_swab(struct bpos *); -+void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *); -+ -+static __always_inline int bversion_cmp(struct bversion l, struct bversion r) -+{ -+ return cmp_int(l.hi, r.hi) ?: -+ cmp_int(l.lo, r.lo); -+} -+ -+#define ZERO_VERSION ((struct bversion) { .hi = 0, .lo = 0 }) -+#define MAX_VERSION ((struct bversion) { .hi = ~0, .lo = ~0ULL }) -+ -+static __always_inline int bversion_zero(struct bversion v) -+{ -+ return !bversion_cmp(v, ZERO_VERSION); -+} -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+/* statement expressions confusing unlikely()? */ -+#define bkey_packed(_k) \ -+ ({ EBUG_ON((_k)->format > KEY_FORMAT_CURRENT); \ -+ (_k)->format != KEY_FORMAT_CURRENT; }) -+#else -+#define bkey_packed(_k) ((_k)->format != KEY_FORMAT_CURRENT) -+#endif -+ -+/* -+ * It's safe to treat an unpacked bkey as a packed one, but not the reverse -+ */ -+static inline struct bkey_packed *bkey_to_packed(struct bkey_i *k) -+{ -+ return (struct bkey_packed *) k; -+} -+ -+static inline const struct bkey_packed *bkey_to_packed_c(const struct bkey_i *k) -+{ -+ return (const struct bkey_packed *) k; -+} -+ -+static inline struct bkey_i *packed_to_bkey(struct bkey_packed *k) -+{ -+ return bkey_packed(k) ? NULL : (struct bkey_i *) k; -+} -+ -+static inline const struct bkey *packed_to_bkey_c(const struct bkey_packed *k) -+{ -+ return bkey_packed(k) ? NULL : (const struct bkey *) k; -+} -+ -+static inline unsigned bkey_format_key_bits(const struct bkey_format *format) -+{ -+ return format->bits_per_field[BKEY_FIELD_INODE] + -+ format->bits_per_field[BKEY_FIELD_OFFSET] + -+ format->bits_per_field[BKEY_FIELD_SNAPSHOT]; -+} -+ -+static inline struct bpos bpos_successor(struct bpos p) -+{ -+ if (!++p.snapshot && -+ !++p.offset && -+ !++p.inode) -+ BUG(); -+ -+ return p; -+} -+ -+static inline struct bpos bpos_predecessor(struct bpos p) -+{ -+ if (!p.snapshot-- && -+ !p.offset-- && -+ !p.inode--) -+ BUG(); -+ -+ return p; -+} -+ -+static inline struct bpos bpos_nosnap_successor(struct bpos p) -+{ -+ p.snapshot = 0; -+ -+ if (!++p.offset && -+ !++p.inode) -+ BUG(); -+ -+ return p; -+} -+ -+static inline struct bpos bpos_nosnap_predecessor(struct bpos p) -+{ -+ p.snapshot = 0; -+ -+ if (!p.offset-- && -+ !p.inode--) -+ BUG(); -+ -+ return p; -+} -+ -+static inline u64 bkey_start_offset(const struct bkey *k) -+{ -+ return k->p.offset - k->size; -+} -+ -+static inline struct bpos bkey_start_pos(const struct bkey *k) -+{ -+ return (struct bpos) { -+ .inode = k->p.inode, -+ .offset = bkey_start_offset(k), -+ .snapshot = k->p.snapshot, -+ }; -+} -+ -+/* Packed helpers */ -+ -+static inline unsigned bkeyp_key_u64s(const struct bkey_format *format, -+ const struct bkey_packed *k) -+{ -+ unsigned ret = bkey_packed(k) ? format->key_u64s : BKEY_U64s; -+ -+ EBUG_ON(k->u64s < ret); -+ return ret; -+} -+ -+static inline unsigned bkeyp_key_bytes(const struct bkey_format *format, -+ const struct bkey_packed *k) -+{ -+ return bkeyp_key_u64s(format, k) * sizeof(u64); -+} -+ -+static inline unsigned bkeyp_val_u64s(const struct bkey_format *format, -+ const struct bkey_packed *k) -+{ -+ return k->u64s - bkeyp_key_u64s(format, k); -+} -+ -+static inline size_t bkeyp_val_bytes(const struct bkey_format *format, -+ const struct bkey_packed *k) -+{ -+ return bkeyp_val_u64s(format, k) * sizeof(u64); -+} -+ -+static inline void set_bkeyp_val_u64s(const struct bkey_format *format, -+ struct bkey_packed *k, unsigned val_u64s) -+{ -+ k->u64s = bkeyp_key_u64s(format, k) + val_u64s; -+} -+ -+#define bkeyp_val(_format, _k) \ -+ ((struct bch_val *) ((u64 *) (_k)->_data + bkeyp_key_u64s(_format, _k))) -+ -+extern const struct bkey_format bch2_bkey_format_current; -+ -+bool bch2_bkey_transform(const struct bkey_format *, -+ struct bkey_packed *, -+ const struct bkey_format *, -+ const struct bkey_packed *); -+ -+struct bkey __bch2_bkey_unpack_key(const struct bkey_format *, -+ const struct bkey_packed *); -+ -+#ifndef HAVE_BCACHEFS_COMPILED_UNPACK -+struct bpos __bkey_unpack_pos(const struct bkey_format *, -+ const struct bkey_packed *); -+#endif -+ -+bool bch2_bkey_pack_key(struct bkey_packed *, const struct bkey *, -+ const struct bkey_format *); -+ -+enum bkey_pack_pos_ret { -+ BKEY_PACK_POS_EXACT, -+ BKEY_PACK_POS_SMALLER, -+ BKEY_PACK_POS_FAIL, -+}; -+ -+enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *, struct bpos, -+ const struct btree *); -+ -+static inline bool bkey_pack_pos(struct bkey_packed *out, struct bpos in, -+ const struct btree *b) -+{ -+ return bch2_bkey_pack_pos_lossy(out, in, b) == BKEY_PACK_POS_EXACT; -+} -+ -+void bch2_bkey_unpack(const struct btree *, struct bkey_i *, -+ const struct bkey_packed *); -+bool bch2_bkey_pack(struct bkey_packed *, const struct bkey_i *, -+ const struct bkey_format *); -+ -+typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *); -+ -+static inline void -+__bkey_unpack_key_format_checked(const struct btree *b, -+ struct bkey *dst, -+ const struct bkey_packed *src) -+{ -+ if (IS_ENABLED(HAVE_BCACHEFS_COMPILED_UNPACK)) { -+ compiled_unpack_fn unpack_fn = b->aux_data; -+ unpack_fn(dst, src); -+ -+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && -+ bch2_expensive_debug_checks) { -+ struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src); -+ -+ BUG_ON(memcmp(dst, &dst2, sizeof(*dst))); -+ } -+ } else { -+ *dst = __bch2_bkey_unpack_key(&b->format, src); -+ } -+} -+ -+static inline struct bkey -+bkey_unpack_key_format_checked(const struct btree *b, -+ const struct bkey_packed *src) -+{ -+ struct bkey dst; -+ -+ __bkey_unpack_key_format_checked(b, &dst, src); -+ return dst; -+} -+ -+static inline void __bkey_unpack_key(const struct btree *b, -+ struct bkey *dst, -+ const struct bkey_packed *src) -+{ -+ if (likely(bkey_packed(src))) -+ __bkey_unpack_key_format_checked(b, dst, src); -+ else -+ *dst = *packed_to_bkey_c(src); -+} -+ -+/** -+ * bkey_unpack_key -- unpack just the key, not the value -+ */ -+static inline struct bkey bkey_unpack_key(const struct btree *b, -+ const struct bkey_packed *src) -+{ -+ return likely(bkey_packed(src)) -+ ? bkey_unpack_key_format_checked(b, src) -+ : *packed_to_bkey_c(src); -+} -+ -+static inline struct bpos -+bkey_unpack_pos_format_checked(const struct btree *b, -+ const struct bkey_packed *src) -+{ -+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK -+ return bkey_unpack_key_format_checked(b, src).p; -+#else -+ return __bkey_unpack_pos(&b->format, src); -+#endif -+} -+ -+static inline struct bpos bkey_unpack_pos(const struct btree *b, -+ const struct bkey_packed *src) -+{ -+ return likely(bkey_packed(src)) -+ ? bkey_unpack_pos_format_checked(b, src) -+ : packed_to_bkey_c(src)->p; -+} -+ -+/* Disassembled bkeys */ -+ -+static inline struct bkey_s_c bkey_disassemble(const struct btree *b, -+ const struct bkey_packed *k, -+ struct bkey *u) -+{ -+ __bkey_unpack_key(b, u, k); -+ -+ return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), }; -+} -+ -+/* non const version: */ -+static inline struct bkey_s __bkey_disassemble(const struct btree *b, -+ struct bkey_packed *k, -+ struct bkey *u) -+{ -+ __bkey_unpack_key(b, u, k); -+ -+ return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), }; -+} -+ -+static inline u64 bkey_field_max(const struct bkey_format *f, -+ enum bch_bkey_fields nr) -+{ -+ return f->bits_per_field[nr] < 64 -+ ? (le64_to_cpu(f->field_offset[nr]) + -+ ~(~0ULL << f->bits_per_field[nr])) -+ : U64_MAX; -+} -+ -+#ifdef HAVE_BCACHEFS_COMPILED_UNPACK -+ -+int bch2_compile_bkey_format(const struct bkey_format *, void *); -+ -+#else -+ -+static inline int bch2_compile_bkey_format(const struct bkey_format *format, -+ void *out) { return 0; } -+ -+#endif -+ -+static inline void bkey_reassemble(struct bkey_i *dst, -+ struct bkey_s_c src) -+{ -+ dst->k = *src.k; -+ memcpy_u64s_small(&dst->v, src.v, bkey_val_u64s(src.k)); -+} -+ -+#define bkey_s_null ((struct bkey_s) { .k = NULL }) -+#define bkey_s_c_null ((struct bkey_s_c) { .k = NULL }) -+ -+#define bkey_s_err(err) ((struct bkey_s) { .k = ERR_PTR(err) }) -+#define bkey_s_c_err(err) ((struct bkey_s_c) { .k = ERR_PTR(err) }) -+ -+static inline struct bkey_s bkey_to_s(struct bkey *k) -+{ -+ return (struct bkey_s) { .k = k, .v = NULL }; -+} -+ -+static inline struct bkey_s_c bkey_to_s_c(const struct bkey *k) -+{ -+ return (struct bkey_s_c) { .k = k, .v = NULL }; -+} -+ -+static inline struct bkey_s bkey_i_to_s(struct bkey_i *k) -+{ -+ return (struct bkey_s) { .k = &k->k, .v = &k->v }; -+} -+ -+static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k) -+{ -+ return (struct bkey_s_c) { .k = &k->k, .v = &k->v }; -+} -+ -+/* -+ * For a given type of value (e.g. struct bch_extent), generates the types for -+ * bkey + bch_extent - inline, split, split const - and also all the conversion -+ * functions, which also check that the value is of the correct type. -+ * -+ * We use anonymous unions for upcasting - e.g. converting from e.g. a -+ * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion -+ * functions. -+ */ -+#define x(name, ...) \ -+struct bkey_i_##name { \ -+ union { \ -+ struct bkey k; \ -+ struct bkey_i k_i; \ -+ }; \ -+ struct bch_##name v; \ -+}; \ -+ \ -+struct bkey_s_c_##name { \ -+ union { \ -+ struct { \ -+ const struct bkey *k; \ -+ const struct bch_##name *v; \ -+ }; \ -+ struct bkey_s_c s_c; \ -+ }; \ -+}; \ -+ \ -+struct bkey_s_##name { \ -+ union { \ -+ struct { \ -+ struct bkey *k; \ -+ struct bch_##name *v; \ -+ }; \ -+ struct bkey_s_c_##name c; \ -+ struct bkey_s s; \ -+ struct bkey_s_c s_c; \ -+ }; \ -+}; \ -+ \ -+static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k) \ -+{ \ -+ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \ -+ return container_of(&k->k, struct bkey_i_##name, k); \ -+} \ -+ \ -+static inline const struct bkey_i_##name * \ -+bkey_i_to_##name##_c(const struct bkey_i *k) \ -+{ \ -+ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \ -+ return container_of(&k->k, struct bkey_i_##name, k); \ -+} \ -+ \ -+static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k) \ -+{ \ -+ EBUG_ON(!IS_ERR_OR_NULL(k.k) && k.k->type != KEY_TYPE_##name); \ -+ return (struct bkey_s_##name) { \ -+ .k = k.k, \ -+ .v = container_of(k.v, struct bch_##name, v), \ -+ }; \ -+} \ -+ \ -+static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\ -+{ \ -+ EBUG_ON(!IS_ERR_OR_NULL(k.k) && k.k->type != KEY_TYPE_##name); \ -+ return (struct bkey_s_c_##name) { \ -+ .k = k.k, \ -+ .v = container_of(k.v, struct bch_##name, v), \ -+ }; \ -+} \ -+ \ -+static inline struct bkey_s_##name name##_i_to_s(struct bkey_i_##name *k)\ -+{ \ -+ return (struct bkey_s_##name) { \ -+ .k = &k->k, \ -+ .v = &k->v, \ -+ }; \ -+} \ -+ \ -+static inline struct bkey_s_c_##name \ -+name##_i_to_s_c(const struct bkey_i_##name *k) \ -+{ \ -+ return (struct bkey_s_c_##name) { \ -+ .k = &k->k, \ -+ .v = &k->v, \ -+ }; \ -+} \ -+ \ -+static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k) \ -+{ \ -+ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \ -+ return (struct bkey_s_##name) { \ -+ .k = &k->k, \ -+ .v = container_of(&k->v, struct bch_##name, v), \ -+ }; \ -+} \ -+ \ -+static inline struct bkey_s_c_##name \ -+bkey_i_to_s_c_##name(const struct bkey_i *k) \ -+{ \ -+ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \ -+ return (struct bkey_s_c_##name) { \ -+ .k = &k->k, \ -+ .v = container_of(&k->v, struct bch_##name, v), \ -+ }; \ -+} \ -+ \ -+static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\ -+{ \ -+ struct bkey_i_##name *k = \ -+ container_of(&_k->k, struct bkey_i_##name, k); \ -+ \ -+ bkey_init(&k->k); \ -+ memset(&k->v, 0, sizeof(k->v)); \ -+ k->k.type = KEY_TYPE_##name; \ -+ set_bkey_val_bytes(&k->k, sizeof(k->v)); \ -+ \ -+ return k; \ -+} -+ -+BCH_BKEY_TYPES(); -+#undef x -+ -+/* byte order helpers */ -+ -+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -+ -+static inline unsigned high_word_offset(const struct bkey_format *f) -+{ -+ return f->key_u64s - 1; -+} -+ -+#define high_bit_offset 0 -+#define nth_word(p, n) ((p) - (n)) -+ -+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -+ -+static inline unsigned high_word_offset(const struct bkey_format *f) -+{ -+ return 0; -+} -+ -+#define high_bit_offset KEY_PACKED_BITS_START -+#define nth_word(p, n) ((p) + (n)) -+ -+#else -+#error edit for your odd byteorder. -+#endif -+ -+#define high_word(f, k) ((u64 *) (k)->_data + high_word_offset(f)) -+#define next_word(p) nth_word(p, 1) -+#define prev_word(p) nth_word(p, -1) -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+void bch2_bkey_pack_test(void); -+#else -+static inline void bch2_bkey_pack_test(void) {} -+#endif -+ -+#define bkey_fields() \ -+ x(BKEY_FIELD_INODE, p.inode) \ -+ x(BKEY_FIELD_OFFSET, p.offset) \ -+ x(BKEY_FIELD_SNAPSHOT, p.snapshot) \ -+ x(BKEY_FIELD_SIZE, size) \ -+ x(BKEY_FIELD_VERSION_HI, version.hi) \ -+ x(BKEY_FIELD_VERSION_LO, version.lo) -+ -+struct bkey_format_state { -+ u64 field_min[BKEY_NR_FIELDS]; -+ u64 field_max[BKEY_NR_FIELDS]; -+}; -+ -+void bch2_bkey_format_init(struct bkey_format_state *); -+ -+static inline void __bkey_format_add(struct bkey_format_state *s, unsigned field, u64 v) -+{ -+ s->field_min[field] = min(s->field_min[field], v); -+ s->field_max[field] = max(s->field_max[field], v); -+} -+ -+/* -+ * Changes @format so that @k can be successfully packed with @format -+ */ -+static inline void bch2_bkey_format_add_key(struct bkey_format_state *s, const struct bkey *k) -+{ -+#define x(id, field) __bkey_format_add(s, id, k->field); -+ bkey_fields() -+#undef x -+} -+ -+void bch2_bkey_format_add_pos(struct bkey_format_state *, struct bpos); -+struct bkey_format bch2_bkey_format_done(struct bkey_format_state *); -+int bch2_bkey_format_invalid(struct bch_fs *, struct bkey_format *, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_bkey_format_to_text(struct printbuf *, const struct bkey_format *); -+ -+#endif /* _BCACHEFS_BKEY_H */ -diff --git a/fs/bcachefs/bkey_buf.h b/fs/bcachefs/bkey_buf.h -new file mode 100644 -index 000000000000..a30c4ae8eb36 ---- /dev/null -+++ b/fs/bcachefs/bkey_buf.h -@@ -0,0 +1,61 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BKEY_BUF_H -+#define _BCACHEFS_BKEY_BUF_H -+ -+#include "bcachefs.h" -+#include "bkey.h" -+ -+struct bkey_buf { -+ struct bkey_i *k; -+ u64 onstack[12]; -+}; -+ -+static inline void bch2_bkey_buf_realloc(struct bkey_buf *s, -+ struct bch_fs *c, unsigned u64s) -+{ -+ if (s->k == (void *) s->onstack && -+ u64s > ARRAY_SIZE(s->onstack)) { -+ s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS); -+ memcpy(s->k, s->onstack, sizeof(s->onstack)); -+ } -+} -+ -+static inline void bch2_bkey_buf_reassemble(struct bkey_buf *s, -+ struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ bch2_bkey_buf_realloc(s, c, k.k->u64s); -+ bkey_reassemble(s->k, k); -+} -+ -+static inline void bch2_bkey_buf_copy(struct bkey_buf *s, -+ struct bch_fs *c, -+ struct bkey_i *src) -+{ -+ bch2_bkey_buf_realloc(s, c, src->k.u64s); -+ bkey_copy(s->k, src); -+} -+ -+static inline void bch2_bkey_buf_unpack(struct bkey_buf *s, -+ struct bch_fs *c, -+ struct btree *b, -+ struct bkey_packed *src) -+{ -+ bch2_bkey_buf_realloc(s, c, BKEY_U64s + -+ bkeyp_val_u64s(&b->format, src)); -+ bch2_bkey_unpack(b, s->k, src); -+} -+ -+static inline void bch2_bkey_buf_init(struct bkey_buf *s) -+{ -+ s->k = (void *) s->onstack; -+} -+ -+static inline void bch2_bkey_buf_exit(struct bkey_buf *s, struct bch_fs *c) -+{ -+ if (s->k != (void *) s->onstack) -+ mempool_free(s->k, &c->large_bkey_pool); -+ s->k = NULL; -+} -+ -+#endif /* _BCACHEFS_BKEY_BUF_H */ -diff --git a/fs/bcachefs/bkey_cmp.h b/fs/bcachefs/bkey_cmp.h -new file mode 100644 -index 000000000000..5f42a6e69360 ---- /dev/null -+++ b/fs/bcachefs/bkey_cmp.h -@@ -0,0 +1,129 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BKEY_CMP_H -+#define _BCACHEFS_BKEY_CMP_H -+ -+#include "bkey.h" -+ -+#ifdef CONFIG_X86_64 -+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r, -+ unsigned nr_key_bits) -+{ -+ long d0, d1, d2, d3; -+ int cmp; -+ -+ /* we shouldn't need asm for this, but gcc is being retarded: */ -+ -+ asm(".intel_syntax noprefix;" -+ "xor eax, eax;" -+ "xor edx, edx;" -+ "1:;" -+ "mov r8, [rdi];" -+ "mov r9, [rsi];" -+ "sub ecx, 64;" -+ "jl 2f;" -+ -+ "cmp r8, r9;" -+ "jnz 3f;" -+ -+ "lea rdi, [rdi - 8];" -+ "lea rsi, [rsi - 8];" -+ "jmp 1b;" -+ -+ "2:;" -+ "not ecx;" -+ "shr r8, 1;" -+ "shr r9, 1;" -+ "shr r8, cl;" -+ "shr r9, cl;" -+ "cmp r8, r9;" -+ -+ "3:\n" -+ "seta al;" -+ "setb dl;" -+ "sub eax, edx;" -+ ".att_syntax prefix;" -+ : "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp) -+ : "0" (l), "1" (r), "3" (nr_key_bits) -+ : "r8", "r9", "cc", "memory"); -+ -+ return cmp; -+} -+#else -+static inline int __bkey_cmp_bits(const u64 *l, const u64 *r, -+ unsigned nr_key_bits) -+{ -+ u64 l_v, r_v; -+ -+ if (!nr_key_bits) -+ return 0; -+ -+ /* for big endian, skip past header */ -+ nr_key_bits += high_bit_offset; -+ l_v = *l & (~0ULL >> high_bit_offset); -+ r_v = *r & (~0ULL >> high_bit_offset); -+ -+ while (1) { -+ if (nr_key_bits < 64) { -+ l_v >>= 64 - nr_key_bits; -+ r_v >>= 64 - nr_key_bits; -+ nr_key_bits = 0; -+ } else { -+ nr_key_bits -= 64; -+ } -+ -+ if (!nr_key_bits || l_v != r_v) -+ break; -+ -+ l = next_word(l); -+ r = next_word(r); -+ -+ l_v = *l; -+ r_v = *r; -+ } -+ -+ return cmp_int(l_v, r_v); -+} -+#endif -+ -+static inline __pure __flatten -+int __bch2_bkey_cmp_packed_format_checked_inlined(const struct bkey_packed *l, -+ const struct bkey_packed *r, -+ const struct btree *b) -+{ -+ const struct bkey_format *f = &b->format; -+ int ret; -+ -+ EBUG_ON(!bkey_packed(l) || !bkey_packed(r)); -+ EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f)); -+ -+ ret = __bkey_cmp_bits(high_word(f, l), -+ high_word(f, r), -+ b->nr_key_bits); -+ -+ EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l), -+ bkey_unpack_pos(b, r))); -+ return ret; -+} -+ -+static inline __pure __flatten -+int bch2_bkey_cmp_packed_inlined(const struct btree *b, -+ const struct bkey_packed *l, -+ const struct bkey_packed *r) -+{ -+ struct bkey unpacked; -+ -+ if (likely(bkey_packed(l) && bkey_packed(r))) -+ return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b); -+ -+ if (bkey_packed(l)) { -+ __bkey_unpack_key_format_checked(b, &unpacked, l); -+ l = (void *) &unpacked; -+ } else if (bkey_packed(r)) { -+ __bkey_unpack_key_format_checked(b, &unpacked, r); -+ r = (void *) &unpacked; -+ } -+ -+ return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p); -+} -+ -+#endif /* _BCACHEFS_BKEY_CMP_H */ -diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c -new file mode 100644 -index 000000000000..761f5e33b1e6 ---- /dev/null -+++ b/fs/bcachefs/bkey_methods.c -@@ -0,0 +1,459 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "backpointers.h" -+#include "bkey_methods.h" -+#include "btree_cache.h" -+#include "btree_types.h" -+#include "alloc_background.h" -+#include "dirent.h" -+#include "ec.h" -+#include "error.h" -+#include "extents.h" -+#include "inode.h" -+#include "io_misc.h" -+#include "lru.h" -+#include "quota.h" -+#include "reflink.h" -+#include "snapshot.h" -+#include "subvolume.h" -+#include "xattr.h" -+ -+const char * const bch2_bkey_types[] = { -+#define x(name, nr) #name, -+ BCH_BKEY_TYPES() -+#undef x -+ NULL -+}; -+ -+static int deleted_key_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, struct printbuf *err) -+{ -+ return 0; -+} -+ -+#define bch2_bkey_ops_deleted ((struct bkey_ops) { \ -+ .key_invalid = deleted_key_invalid, \ -+}) -+ -+#define bch2_bkey_ops_whiteout ((struct bkey_ops) { \ -+ .key_invalid = deleted_key_invalid, \ -+}) -+ -+static int empty_val_key_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, struct printbuf *err) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(bkey_val_bytes(k.k), c, err, -+ bkey_val_size_nonzero, -+ "incorrect value size (%zu != 0)", -+ bkey_val_bytes(k.k)); -+fsck_err: -+ return ret; -+} -+ -+#define bch2_bkey_ops_error ((struct bkey_ops) { \ -+ .key_invalid = empty_val_key_invalid, \ -+}) -+ -+static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, struct printbuf *err) -+{ -+ return 0; -+} -+ -+#define bch2_bkey_ops_cookie ((struct bkey_ops) { \ -+ .key_invalid = key_type_cookie_invalid, \ -+ .min_val_size = 8, \ -+}) -+ -+#define bch2_bkey_ops_hash_whiteout ((struct bkey_ops) {\ -+ .key_invalid = empty_val_key_invalid, \ -+}) -+ -+static int key_type_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, struct printbuf *err) -+{ -+ return 0; -+} -+ -+static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_inline_data d = bkey_s_c_to_inline_data(k); -+ unsigned datalen = bkey_inline_data_bytes(k.k); -+ -+ prt_printf(out, "datalen %u: %*phN", -+ datalen, min(datalen, 32U), d.v->data); -+} -+ -+#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ -+ .key_invalid = key_type_inline_data_invalid, \ -+ .val_to_text = key_type_inline_data_to_text, \ -+}) -+ -+static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) -+{ -+ bch2_key_resize(l.k, l.k->size + r.k->size); -+ return true; -+} -+ -+#define bch2_bkey_ops_set ((struct bkey_ops) { \ -+ .key_invalid = empty_val_key_invalid, \ -+ .key_merge = key_type_set_merge, \ -+}) -+ -+const struct bkey_ops bch2_bkey_ops[] = { -+#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name, -+ BCH_BKEY_TYPES() -+#undef x -+}; -+ -+const struct bkey_ops bch2_bkey_null_ops = { -+}; -+ -+int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type); -+ int ret = 0; -+ -+ bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, c, err, -+ bkey_val_size_too_small, -+ "bad val size (%zu < %u)", -+ bkey_val_bytes(k.k), ops->min_val_size); -+ -+ if (!ops->key_invalid) -+ return 0; -+ -+ ret = ops->key_invalid(c, k, flags, err); -+fsck_err: -+ return ret; -+} -+ -+static u64 bch2_key_types_allowed[] = { -+ [BKEY_TYPE_btree] = -+ BIT_ULL(KEY_TYPE_deleted)| -+ BIT_ULL(KEY_TYPE_btree_ptr)| -+ BIT_ULL(KEY_TYPE_btree_ptr_v2), -+#define x(name, nr, flags, keys) [BKEY_TYPE_##name] = BIT_ULL(KEY_TYPE_deleted)|keys, -+ BCH_BTREE_IDS() -+#undef x -+}; -+ -+const char *bch2_btree_node_type_str(enum btree_node_type type) -+{ -+ return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1); -+} -+ -+int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum btree_node_type type, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(k.k->u64s < BKEY_U64s, c, err, -+ bkey_u64s_too_small, -+ "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s); -+ -+ if (type >= BKEY_TYPE_NR) -+ return 0; -+ -+ bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && -+ !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, -+ bkey_invalid_type_for_btree, -+ "invalid key type for btree %s (%s)", -+ bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]); -+ -+ if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { -+ bkey_fsck_err_on(k.k->size == 0, c, err, -+ bkey_extent_size_zero, -+ "size == 0"); -+ -+ bkey_fsck_err_on(k.k->size > k.k->p.offset, c, err, -+ bkey_extent_size_greater_than_offset, -+ "size greater than offset (%u > %llu)", -+ k.k->size, k.k->p.offset); -+ } else { -+ bkey_fsck_err_on(k.k->size, c, err, -+ bkey_size_nonzero, -+ "size != 0"); -+ } -+ -+ if (type != BKEY_TYPE_btree) { -+ enum btree_id btree = type - 1; -+ -+ if (btree_type_has_snapshots(btree)) { -+ bkey_fsck_err_on(!k.k->p.snapshot, c, err, -+ bkey_snapshot_zero, -+ "snapshot == 0"); -+ } else if (!btree_type_has_snapshot_field(btree)) { -+ bkey_fsck_err_on(k.k->p.snapshot, c, err, -+ bkey_snapshot_nonzero, -+ "nonzero snapshot"); -+ } else { -+ /* -+ * btree uses snapshot field but it's not required to be -+ * nonzero -+ */ -+ } -+ -+ bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err, -+ bkey_at_pos_max, -+ "key at POS_MAX"); -+ } -+fsck_err: -+ return ret; -+} -+ -+int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum btree_node_type type, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ return __bch2_bkey_invalid(c, k, type, flags, err) ?: -+ bch2_bkey_val_invalid(c, k, flags, err); -+} -+ -+int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b, -+ struct bkey_s_c k, struct printbuf *err) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), c, err, -+ bkey_before_start_of_btree_node, -+ "key before start of btree node"); -+ -+ bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), c, err, -+ bkey_after_end_of_btree_node, -+ "key past end of btree node"); -+fsck_err: -+ return ret; -+} -+ -+void bch2_bpos_to_text(struct printbuf *out, struct bpos pos) -+{ -+ if (bpos_eq(pos, POS_MIN)) -+ prt_printf(out, "POS_MIN"); -+ else if (bpos_eq(pos, POS_MAX)) -+ prt_printf(out, "POS_MAX"); -+ else if (bpos_eq(pos, SPOS_MAX)) -+ prt_printf(out, "SPOS_MAX"); -+ else { -+ if (pos.inode == U64_MAX) -+ prt_printf(out, "U64_MAX"); -+ else -+ prt_printf(out, "%llu", pos.inode); -+ prt_printf(out, ":"); -+ if (pos.offset == U64_MAX) -+ prt_printf(out, "U64_MAX"); -+ else -+ prt_printf(out, "%llu", pos.offset); -+ prt_printf(out, ":"); -+ if (pos.snapshot == U32_MAX) -+ prt_printf(out, "U32_MAX"); -+ else -+ prt_printf(out, "%u", pos.snapshot); -+ } -+} -+ -+void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k) -+{ -+ if (k) { -+ prt_printf(out, "u64s %u type ", k->u64s); -+ -+ if (k->type < KEY_TYPE_MAX) -+ prt_printf(out, "%s ", bch2_bkey_types[k->type]); -+ else -+ prt_printf(out, "%u ", k->type); -+ -+ bch2_bpos_to_text(out, k->p); -+ -+ prt_printf(out, " len %u ver %llu", k->size, k->version.lo); -+ } else { -+ prt_printf(out, "(null)"); -+ } -+} -+ -+void bch2_val_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type); -+ -+ if (likely(ops->val_to_text)) -+ ops->val_to_text(out, c, k); -+} -+ -+void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ bch2_bkey_to_text(out, k.k); -+ -+ if (bkey_val_bytes(k.k)) { -+ prt_printf(out, ": "); -+ bch2_val_to_text(out, c, k); -+ } -+} -+ -+void bch2_bkey_swab_val(struct bkey_s k) -+{ -+ const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type); -+ -+ if (ops->swab) -+ ops->swab(k); -+} -+ -+bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k) -+{ -+ const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type); -+ -+ return ops->key_normalize -+ ? ops->key_normalize(c, k) -+ : false; -+} -+ -+bool bch2_bkey_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) -+{ -+ const struct bkey_ops *ops = bch2_bkey_type_ops(l.k->type); -+ -+ return ops->key_merge && -+ bch2_bkey_maybe_mergable(l.k, r.k) && -+ (u64) l.k->size + r.k->size <= KEY_SIZE_MAX && -+ !bch2_key_merging_disabled && -+ ops->key_merge(c, l, r); -+} -+ -+static const struct old_bkey_type { -+ u8 btree_node_type; -+ u8 old; -+ u8 new; -+} bkey_renumber_table[] = { -+ {BKEY_TYPE_btree, 128, KEY_TYPE_btree_ptr }, -+ {BKEY_TYPE_extents, 128, KEY_TYPE_extent }, -+ {BKEY_TYPE_extents, 129, KEY_TYPE_extent }, -+ {BKEY_TYPE_extents, 130, KEY_TYPE_reservation }, -+ {BKEY_TYPE_inodes, 128, KEY_TYPE_inode }, -+ {BKEY_TYPE_inodes, 130, KEY_TYPE_inode_generation }, -+ {BKEY_TYPE_dirents, 128, KEY_TYPE_dirent }, -+ {BKEY_TYPE_dirents, 129, KEY_TYPE_hash_whiteout }, -+ {BKEY_TYPE_xattrs, 128, KEY_TYPE_xattr }, -+ {BKEY_TYPE_xattrs, 129, KEY_TYPE_hash_whiteout }, -+ {BKEY_TYPE_alloc, 128, KEY_TYPE_alloc }, -+ {BKEY_TYPE_quotas, 128, KEY_TYPE_quota }, -+}; -+ -+void bch2_bkey_renumber(enum btree_node_type btree_node_type, -+ struct bkey_packed *k, -+ int write) -+{ -+ const struct old_bkey_type *i; -+ -+ for (i = bkey_renumber_table; -+ i < bkey_renumber_table + ARRAY_SIZE(bkey_renumber_table); -+ i++) -+ if (btree_node_type == i->btree_node_type && -+ k->type == (write ? i->new : i->old)) { -+ k->type = write ? i->old : i->new; -+ break; -+ } -+} -+ -+void __bch2_bkey_compat(unsigned level, enum btree_id btree_id, -+ unsigned version, unsigned big_endian, -+ int write, -+ struct bkey_format *f, -+ struct bkey_packed *k) -+{ -+ const struct bkey_ops *ops; -+ struct bkey uk; -+ unsigned nr_compat = 5; -+ int i; -+ -+ /* -+ * Do these operations in reverse order in the write path: -+ */ -+ -+ for (i = 0; i < nr_compat; i++) -+ switch (!write ? i : nr_compat - 1 - i) { -+ case 0: -+ if (big_endian != CPU_BIG_ENDIAN) -+ bch2_bkey_swab_key(f, k); -+ break; -+ case 1: -+ if (version < bcachefs_metadata_version_bkey_renumber) -+ bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write); -+ break; -+ case 2: -+ if (version < bcachefs_metadata_version_inode_btree_change && -+ btree_id == BTREE_ID_inodes) { -+ if (!bkey_packed(k)) { -+ struct bkey_i *u = packed_to_bkey(k); -+ -+ swap(u->k.p.inode, u->k.p.offset); -+ } else if (f->bits_per_field[BKEY_FIELD_INODE] && -+ f->bits_per_field[BKEY_FIELD_OFFSET]) { -+ struct bkey_format tmp = *f, *in = f, *out = &tmp; -+ -+ swap(tmp.bits_per_field[BKEY_FIELD_INODE], -+ tmp.bits_per_field[BKEY_FIELD_OFFSET]); -+ swap(tmp.field_offset[BKEY_FIELD_INODE], -+ tmp.field_offset[BKEY_FIELD_OFFSET]); -+ -+ if (!write) -+ swap(in, out); -+ -+ uk = __bch2_bkey_unpack_key(in, k); -+ swap(uk.p.inode, uk.p.offset); -+ BUG_ON(!bch2_bkey_pack_key(k, &uk, out)); -+ } -+ } -+ break; -+ case 3: -+ if (version < bcachefs_metadata_version_snapshot && -+ (level || btree_type_has_snapshots(btree_id))) { -+ struct bkey_i *u = packed_to_bkey(k); -+ -+ if (u) { -+ u->k.p.snapshot = write -+ ? 0 : U32_MAX; -+ } else { -+ u64 min_packed = le64_to_cpu(f->field_offset[BKEY_FIELD_SNAPSHOT]); -+ u64 max_packed = min_packed + -+ ~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]); -+ -+ uk = __bch2_bkey_unpack_key(f, k); -+ uk.p.snapshot = write -+ ? min_packed : min_t(u64, U32_MAX, max_packed); -+ -+ BUG_ON(!bch2_bkey_pack_key(k, &uk, f)); -+ } -+ } -+ -+ break; -+ case 4: { -+ struct bkey_s u; -+ -+ if (!bkey_packed(k)) { -+ u = bkey_i_to_s(packed_to_bkey(k)); -+ } else { -+ uk = __bch2_bkey_unpack_key(f, k); -+ u.k = &uk; -+ u.v = bkeyp_val(f, k); -+ } -+ -+ if (big_endian != CPU_BIG_ENDIAN) -+ bch2_bkey_swab_val(u); -+ -+ ops = bch2_bkey_type_ops(k->type); -+ -+ if (ops->compat) -+ ops->compat(btree_id, version, big_endian, write, u); -+ break; -+ } -+ default: -+ BUG(); -+ } -+} -diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h -new file mode 100644 -index 000000000000..3a370b7087ac ---- /dev/null -+++ b/fs/bcachefs/bkey_methods.h -@@ -0,0 +1,179 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BKEY_METHODS_H -+#define _BCACHEFS_BKEY_METHODS_H -+ -+#include "bkey.h" -+ -+struct bch_fs; -+struct btree; -+struct btree_trans; -+struct bkey; -+enum btree_node_type; -+ -+extern const char * const bch2_bkey_types[]; -+extern const struct bkey_ops bch2_bkey_null_ops; -+ -+/* -+ * key_invalid: checks validity of @k, returns 0 if good or -EINVAL if bad. If -+ * invalid, entire key will be deleted. -+ * -+ * When invalid, error string is returned via @err. @rw indicates whether key is -+ * being read or written; more aggressive checks can be enabled when rw == WRITE. -+ */ -+struct bkey_ops { -+ int (*key_invalid)(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, struct printbuf *err); -+ void (*val_to_text)(struct printbuf *, struct bch_fs *, -+ struct bkey_s_c); -+ void (*swab)(struct bkey_s); -+ bool (*key_normalize)(struct bch_fs *, struct bkey_s); -+ bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c); -+ int (*trans_trigger)(struct btree_trans *, enum btree_id, unsigned, -+ struct bkey_s_c, struct bkey_i *, unsigned); -+ int (*atomic_trigger)(struct btree_trans *, enum btree_id, unsigned, -+ struct bkey_s_c, struct bkey_s_c, unsigned); -+ void (*compat)(enum btree_id id, unsigned version, -+ unsigned big_endian, int write, -+ struct bkey_s); -+ -+ /* Size of value type when first created: */ -+ unsigned min_val_size; -+}; -+ -+extern const struct bkey_ops bch2_bkey_ops[]; -+ -+static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type) -+{ -+ return likely(type < KEY_TYPE_MAX) -+ ? &bch2_bkey_ops[type] -+ : &bch2_bkey_null_ops; -+} -+ -+int bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, -+ enum bkey_invalid_flags, struct printbuf *); -+int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, -+ enum bkey_invalid_flags, struct printbuf *); -+int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, -+ struct bkey_s_c, struct printbuf *); -+ -+void bch2_bpos_to_text(struct printbuf *, struct bpos); -+void bch2_bkey_to_text(struct printbuf *, const struct bkey *); -+void bch2_val_to_text(struct printbuf *, struct bch_fs *, -+ struct bkey_s_c); -+void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *, -+ struct bkey_s_c); -+ -+void bch2_bkey_swab_val(struct bkey_s); -+ -+bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s); -+ -+static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct bkey *r) -+{ -+ return l->type == r->type && -+ !bversion_cmp(l->version, r->version) && -+ bpos_eq(l->p, bkey_start_pos(r)); -+} -+ -+bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); -+ -+static inline int bch2_mark_key(struct btree_trans *trans, -+ enum btree_id btree, unsigned level, -+ struct bkey_s_c old, struct bkey_s_c new, -+ unsigned flags) -+{ -+ const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type); -+ -+ return ops->atomic_trigger -+ ? ops->atomic_trigger(trans, btree, level, old, new, flags) -+ : 0; -+} -+ -+enum btree_update_flags { -+ __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END, -+ __BTREE_UPDATE_NOJOURNAL, -+ __BTREE_UPDATE_PREJOURNAL, -+ __BTREE_UPDATE_KEY_CACHE_RECLAIM, -+ -+ __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */ -+ -+ __BTREE_TRIGGER_INSERT, -+ __BTREE_TRIGGER_OVERWRITE, -+ -+ __BTREE_TRIGGER_GC, -+ __BTREE_TRIGGER_BUCKET_INVALIDATE, -+ __BTREE_TRIGGER_NOATOMIC, -+}; -+ -+#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) -+#define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL) -+#define BTREE_UPDATE_PREJOURNAL (1U << __BTREE_UPDATE_PREJOURNAL) -+#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM) -+ -+#define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN) -+ -+#define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT) -+#define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE) -+ -+#define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC) -+#define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE) -+#define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC) -+ -+static inline int bch2_trans_mark_key(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, struct bkey_i *new, -+ unsigned flags) -+{ -+ const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new->k.type); -+ -+ return ops->trans_trigger -+ ? ops->trans_trigger(trans, btree_id, level, old, new, flags) -+ : 0; -+} -+ -+static inline int bch2_trans_mark_old(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, unsigned flags) -+{ -+ struct bkey_i deleted; -+ -+ bkey_init(&deleted.k); -+ deleted.k.p = old.k->p; -+ -+ return bch2_trans_mark_key(trans, btree_id, level, old, &deleted, -+ BTREE_TRIGGER_OVERWRITE|flags); -+} -+ -+static inline int bch2_trans_mark_new(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_i *new, unsigned flags) -+{ -+ struct bkey_i deleted; -+ -+ bkey_init(&deleted.k); -+ deleted.k.p = new->k.p; -+ -+ return bch2_trans_mark_key(trans, btree_id, level, bkey_i_to_s_c(&deleted), new, -+ BTREE_TRIGGER_INSERT|flags); -+} -+ -+void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int); -+ -+void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned, -+ int, struct bkey_format *, struct bkey_packed *); -+ -+static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id, -+ unsigned version, unsigned big_endian, -+ int write, -+ struct bkey_format *f, -+ struct bkey_packed *k) -+{ -+ if (version < bcachefs_metadata_version_current || -+ big_endian != CPU_BIG_ENDIAN) -+ __bch2_bkey_compat(level, btree_id, version, -+ big_endian, write, f, k); -+ -+} -+ -+#endif /* _BCACHEFS_BKEY_METHODS_H */ -diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c -new file mode 100644 -index 000000000000..bcca9e76a0b4 ---- /dev/null -+++ b/fs/bcachefs/bkey_sort.c -@@ -0,0 +1,201 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include "bcachefs.h" -+#include "bkey_buf.h" -+#include "bkey_cmp.h" -+#include "bkey_sort.h" -+#include "bset.h" -+#include "extents.h" -+ -+typedef int (*sort_cmp_fn)(struct btree *, -+ struct bkey_packed *, -+ struct bkey_packed *); -+ -+static inline bool sort_iter_end(struct sort_iter *iter) -+{ -+ return !iter->used; -+} -+ -+static inline void sort_iter_sift(struct sort_iter *iter, unsigned from, -+ sort_cmp_fn cmp) -+{ -+ unsigned i; -+ -+ for (i = from; -+ i + 1 < iter->used && -+ cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0; -+ i++) -+ swap(iter->data[i], iter->data[i + 1]); -+} -+ -+static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp) -+{ -+ unsigned i = iter->used; -+ -+ while (i--) -+ sort_iter_sift(iter, i, cmp); -+} -+ -+static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter) -+{ -+ return !sort_iter_end(iter) ? iter->data->k : NULL; -+} -+ -+static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp) -+{ -+ struct sort_iter_set *i = iter->data; -+ -+ BUG_ON(!iter->used); -+ -+ i->k = bkey_p_next(i->k); -+ -+ BUG_ON(i->k > i->end); -+ -+ if (i->k == i->end) -+ array_remove_item(iter->data, iter->used, 0); -+ else -+ sort_iter_sift(iter, 0, cmp); -+} -+ -+static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter, -+ sort_cmp_fn cmp) -+{ -+ struct bkey_packed *ret = sort_iter_peek(iter); -+ -+ if (ret) -+ sort_iter_advance(iter, cmp); -+ -+ return ret; -+} -+ -+/* -+ * If keys compare equal, compare by pointer order: -+ */ -+static inline int key_sort_fix_overlapping_cmp(struct btree *b, -+ struct bkey_packed *l, -+ struct bkey_packed *r) -+{ -+ return bch2_bkey_cmp_packed(b, l, r) ?: -+ cmp_int((unsigned long) l, (unsigned long) r); -+} -+ -+static inline bool should_drop_next_key(struct sort_iter *iter) -+{ -+ /* -+ * key_sort_cmp() ensures that when keys compare equal the older key -+ * comes first; so if l->k compares equal to r->k then l->k is older -+ * and should be dropped. -+ */ -+ return iter->used >= 2 && -+ !bch2_bkey_cmp_packed(iter->b, -+ iter->data[0].k, -+ iter->data[1].k); -+} -+ -+struct btree_nr_keys -+bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, -+ struct sort_iter *iter) -+{ -+ struct bkey_packed *out = dst->start; -+ struct bkey_packed *k; -+ struct btree_nr_keys nr; -+ -+ memset(&nr, 0, sizeof(nr)); -+ -+ sort_iter_sort(iter, key_sort_fix_overlapping_cmp); -+ -+ while ((k = sort_iter_peek(iter))) { -+ if (!bkey_deleted(k) && -+ !should_drop_next_key(iter)) { -+ bkey_p_copy(out, k); -+ btree_keys_account_key_add(&nr, 0, out); -+ out = bkey_p_next(out); -+ } -+ -+ sort_iter_advance(iter, key_sort_fix_overlapping_cmp); -+ } -+ -+ dst->u64s = cpu_to_le16((u64 *) out - dst->_data); -+ return nr; -+} -+ -+/* Sort + repack in a new format: */ -+struct btree_nr_keys -+bch2_sort_repack(struct bset *dst, struct btree *src, -+ struct btree_node_iter *src_iter, -+ struct bkey_format *out_f, -+ bool filter_whiteouts) -+{ -+ struct bkey_format *in_f = &src->format; -+ struct bkey_packed *in, *out = vstruct_last(dst); -+ struct btree_nr_keys nr; -+ bool transform = memcmp(out_f, &src->format, sizeof(*out_f)); -+ -+ memset(&nr, 0, sizeof(nr)); -+ -+ while ((in = bch2_btree_node_iter_next_all(src_iter, src))) { -+ if (filter_whiteouts && bkey_deleted(in)) -+ continue; -+ -+ if (!transform) -+ bkey_p_copy(out, in); -+ else if (bch2_bkey_transform(out_f, out, bkey_packed(in) -+ ? in_f : &bch2_bkey_format_current, in)) -+ out->format = KEY_FORMAT_LOCAL_BTREE; -+ else -+ bch2_bkey_unpack(src, (void *) out, in); -+ -+ out->needs_whiteout = false; -+ -+ btree_keys_account_key_add(&nr, 0, out); -+ out = bkey_p_next(out); -+ } -+ -+ dst->u64s = cpu_to_le16((u64 *) out - dst->_data); -+ return nr; -+} -+ -+static inline int sort_keys_cmp(struct btree *b, -+ struct bkey_packed *l, -+ struct bkey_packed *r) -+{ -+ return bch2_bkey_cmp_packed_inlined(b, l, r) ?: -+ (int) bkey_deleted(r) - (int) bkey_deleted(l) ?: -+ (int) l->needs_whiteout - (int) r->needs_whiteout; -+} -+ -+unsigned bch2_sort_keys(struct bkey_packed *dst, -+ struct sort_iter *iter, -+ bool filter_whiteouts) -+{ -+ const struct bkey_format *f = &iter->b->format; -+ struct bkey_packed *in, *next, *out = dst; -+ -+ sort_iter_sort(iter, sort_keys_cmp); -+ -+ while ((in = sort_iter_next(iter, sort_keys_cmp))) { -+ bool needs_whiteout = false; -+ -+ if (bkey_deleted(in) && -+ (filter_whiteouts || !in->needs_whiteout)) -+ continue; -+ -+ while ((next = sort_iter_peek(iter)) && -+ !bch2_bkey_cmp_packed_inlined(iter->b, in, next)) { -+ BUG_ON(in->needs_whiteout && -+ next->needs_whiteout); -+ needs_whiteout |= in->needs_whiteout; -+ in = sort_iter_next(iter, sort_keys_cmp); -+ } -+ -+ if (bkey_deleted(in)) { -+ memcpy_u64s_small(out, in, bkeyp_key_u64s(f, in)); -+ set_bkeyp_val_u64s(f, out, 0); -+ } else { -+ bkey_p_copy(out, in); -+ } -+ out->needs_whiteout |= needs_whiteout; -+ out = bkey_p_next(out); -+ } -+ -+ return (u64 *) out - (u64 *) dst; -+} -diff --git a/fs/bcachefs/bkey_sort.h b/fs/bcachefs/bkey_sort.h -new file mode 100644 -index 000000000000..7c0f0b160f18 ---- /dev/null -+++ b/fs/bcachefs/bkey_sort.h -@@ -0,0 +1,54 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BKEY_SORT_H -+#define _BCACHEFS_BKEY_SORT_H -+ -+struct sort_iter { -+ struct btree *b; -+ unsigned used; -+ unsigned size; -+ -+ struct sort_iter_set { -+ struct bkey_packed *k, *end; -+ } data[]; -+}; -+ -+static inline void sort_iter_init(struct sort_iter *iter, struct btree *b, unsigned size) -+{ -+ iter->b = b; -+ iter->used = 0; -+ iter->size = size; -+} -+ -+struct sort_iter_stack { -+ struct sort_iter iter; -+ struct sort_iter_set sets[MAX_BSETS + 1]; -+}; -+ -+static inline void sort_iter_stack_init(struct sort_iter_stack *iter, struct btree *b) -+{ -+ sort_iter_init(&iter->iter, b, ARRAY_SIZE(iter->sets)); -+} -+ -+static inline void sort_iter_add(struct sort_iter *iter, -+ struct bkey_packed *k, -+ struct bkey_packed *end) -+{ -+ BUG_ON(iter->used >= iter->size); -+ -+ if (k != end) -+ iter->data[iter->used++] = (struct sort_iter_set) { k, end }; -+} -+ -+struct btree_nr_keys -+bch2_key_sort_fix_overlapping(struct bch_fs *, struct bset *, -+ struct sort_iter *); -+ -+struct btree_nr_keys -+bch2_sort_repack(struct bset *, struct btree *, -+ struct btree_node_iter *, -+ struct bkey_format *, bool); -+ -+unsigned bch2_sort_keys(struct bkey_packed *, -+ struct sort_iter *, bool); -+ -+#endif /* _BCACHEFS_BKEY_SORT_H */ -diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c -new file mode 100644 -index 000000000000..bb73ba9017b0 ---- /dev/null -+++ b/fs/bcachefs/bset.c -@@ -0,0 +1,1592 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Code for working with individual keys, and sorted sets of keys with in a -+ * btree node -+ * -+ * Copyright 2012 Google, Inc. -+ */ -+ -+#include "bcachefs.h" -+#include "btree_cache.h" -+#include "bset.h" -+#include "eytzinger.h" -+#include "trace.h" -+#include "util.h" -+ -+#include -+#include -+#include -+#include -+ -+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *, -+ struct btree *); -+ -+static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter) -+{ -+ unsigned n = ARRAY_SIZE(iter->data); -+ -+ while (n && __btree_node_iter_set_end(iter, n - 1)) -+ --n; -+ -+ return n; -+} -+ -+struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k) -+{ -+ return bch2_bkey_to_bset_inlined(b, k); -+} -+ -+/* -+ * There are never duplicate live keys in the btree - but including keys that -+ * have been flagged as deleted (and will be cleaned up later) we _will_ see -+ * duplicates. -+ * -+ * Thus the sort order is: usual key comparison first, but for keys that compare -+ * equal the deleted key(s) come first, and the (at most one) live version comes -+ * last. -+ * -+ * The main reason for this is insertion: to handle overwrites, we first iterate -+ * over keys that compare equal to our insert key, and then insert immediately -+ * prior to the first key greater than the key we're inserting - our insert -+ * position will be after all keys that compare equal to our insert key, which -+ * by the time we actually do the insert will all be deleted. -+ */ -+ -+void bch2_dump_bset(struct bch_fs *c, struct btree *b, -+ struct bset *i, unsigned set) -+{ -+ struct bkey_packed *_k, *_n; -+ struct bkey uk, n; -+ struct bkey_s_c k; -+ struct printbuf buf = PRINTBUF; -+ -+ if (!i->u64s) -+ return; -+ -+ for (_k = i->start; -+ _k < vstruct_last(i); -+ _k = _n) { -+ _n = bkey_p_next(_k); -+ -+ k = bkey_disassemble(b, _k, &uk); -+ -+ printbuf_reset(&buf); -+ if (c) -+ bch2_bkey_val_to_text(&buf, c, k); -+ else -+ bch2_bkey_to_text(&buf, k.k); -+ printk(KERN_ERR "block %u key %5zu: %s\n", set, -+ _k->_data - i->_data, buf.buf); -+ -+ if (_n == vstruct_last(i)) -+ continue; -+ -+ n = bkey_unpack_key(b, _n); -+ -+ if (bpos_lt(n.p, k.k->p)) { -+ printk(KERN_ERR "Key skipped backwards\n"); -+ continue; -+ } -+ -+ if (!bkey_deleted(k.k) && bpos_eq(n.p, k.k->p)) -+ printk(KERN_ERR "Duplicate keys\n"); -+ } -+ -+ printbuf_exit(&buf); -+} -+ -+void bch2_dump_btree_node(struct bch_fs *c, struct btree *b) -+{ -+ struct bset_tree *t; -+ -+ console_lock(); -+ for_each_bset(b, t) -+ bch2_dump_bset(c, b, bset(b, t), t - b->set); -+ console_unlock(); -+} -+ -+void bch2_dump_btree_node_iter(struct btree *b, -+ struct btree_node_iter *iter) -+{ -+ struct btree_node_iter_set *set; -+ struct printbuf buf = PRINTBUF; -+ -+ printk(KERN_ERR "btree node iter with %u/%u sets:\n", -+ __btree_node_iter_used(iter), b->nsets); -+ -+ btree_node_iter_for_each(iter, set) { -+ struct bkey_packed *k = __btree_node_offset_to_key(b, set->k); -+ struct bset_tree *t = bch2_bkey_to_bset(b, k); -+ struct bkey uk = bkey_unpack_key(b, k); -+ -+ printbuf_reset(&buf); -+ bch2_bkey_to_text(&buf, &uk); -+ printk(KERN_ERR "set %zu key %u: %s\n", -+ t - b->set, set->k, buf.buf); -+ } -+ -+ printbuf_exit(&buf); -+} -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ -+void __bch2_verify_btree_nr_keys(struct btree *b) -+{ -+ struct bset_tree *t; -+ struct bkey_packed *k; -+ struct btree_nr_keys nr = { 0 }; -+ -+ for_each_bset(b, t) -+ bset_tree_for_each_key(b, t, k) -+ if (!bkey_deleted(k)) -+ btree_keys_account_key_add(&nr, t - b->set, k); -+ -+ BUG_ON(memcmp(&nr, &b->nr, sizeof(nr))); -+} -+ -+static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter, -+ struct btree *b) -+{ -+ struct btree_node_iter iter = *_iter; -+ const struct bkey_packed *k, *n; -+ -+ k = bch2_btree_node_iter_peek_all(&iter, b); -+ __bch2_btree_node_iter_advance(&iter, b); -+ n = bch2_btree_node_iter_peek_all(&iter, b); -+ -+ bkey_unpack_key(b, k); -+ -+ if (n && -+ bkey_iter_cmp(b, k, n) > 0) { -+ struct btree_node_iter_set *set; -+ struct bkey ku = bkey_unpack_key(b, k); -+ struct bkey nu = bkey_unpack_key(b, n); -+ struct printbuf buf1 = PRINTBUF; -+ struct printbuf buf2 = PRINTBUF; -+ -+ bch2_dump_btree_node(NULL, b); -+ bch2_bkey_to_text(&buf1, &ku); -+ bch2_bkey_to_text(&buf2, &nu); -+ printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n", -+ buf1.buf, buf2.buf); -+ printk(KERN_ERR "iter was:"); -+ -+ btree_node_iter_for_each(_iter, set) { -+ struct bkey_packed *k2 = __btree_node_offset_to_key(b, set->k); -+ struct bset_tree *t = bch2_bkey_to_bset(b, k2); -+ printk(" [%zi %zi]", t - b->set, -+ k2->_data - bset(b, t)->_data); -+ } -+ panic("\n"); -+ } -+} -+ -+void bch2_btree_node_iter_verify(struct btree_node_iter *iter, -+ struct btree *b) -+{ -+ struct btree_node_iter_set *set, *s2; -+ struct bkey_packed *k, *p; -+ struct bset_tree *t; -+ -+ if (bch2_btree_node_iter_end(iter)) -+ return; -+ -+ /* Verify no duplicates: */ -+ btree_node_iter_for_each(iter, set) { -+ BUG_ON(set->k > set->end); -+ btree_node_iter_for_each(iter, s2) -+ BUG_ON(set != s2 && set->end == s2->end); -+ } -+ -+ /* Verify that set->end is correct: */ -+ btree_node_iter_for_each(iter, set) { -+ for_each_bset(b, t) -+ if (set->end == t->end_offset) -+ goto found; -+ BUG(); -+found: -+ BUG_ON(set->k < btree_bkey_first_offset(t) || -+ set->k >= t->end_offset); -+ } -+ -+ /* Verify iterator is sorted: */ -+ btree_node_iter_for_each(iter, set) -+ BUG_ON(set != iter->data && -+ btree_node_iter_cmp(b, set[-1], set[0]) > 0); -+ -+ k = bch2_btree_node_iter_peek_all(iter, b); -+ -+ for_each_bset(b, t) { -+ if (iter->data[0].end == t->end_offset) -+ continue; -+ -+ p = bch2_bkey_prev_all(b, t, -+ bch2_btree_node_iter_bset_pos(iter, b, t)); -+ -+ BUG_ON(p && bkey_iter_cmp(b, k, p) < 0); -+ } -+} -+ -+void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, -+ struct bkey_packed *insert, unsigned clobber_u64s) -+{ -+ struct bset_tree *t = bch2_bkey_to_bset(b, where); -+ struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where); -+ struct bkey_packed *next = (void *) ((u64 *) where->_data + clobber_u64s); -+ struct printbuf buf1 = PRINTBUF; -+ struct printbuf buf2 = PRINTBUF; -+#if 0 -+ BUG_ON(prev && -+ bkey_iter_cmp(b, prev, insert) > 0); -+#else -+ if (prev && -+ bkey_iter_cmp(b, prev, insert) > 0) { -+ struct bkey k1 = bkey_unpack_key(b, prev); -+ struct bkey k2 = bkey_unpack_key(b, insert); -+ -+ bch2_dump_btree_node(NULL, b); -+ bch2_bkey_to_text(&buf1, &k1); -+ bch2_bkey_to_text(&buf2, &k2); -+ -+ panic("prev > insert:\n" -+ "prev key %s\n" -+ "insert key %s\n", -+ buf1.buf, buf2.buf); -+ } -+#endif -+#if 0 -+ BUG_ON(next != btree_bkey_last(b, t) && -+ bkey_iter_cmp(b, insert, next) > 0); -+#else -+ if (next != btree_bkey_last(b, t) && -+ bkey_iter_cmp(b, insert, next) > 0) { -+ struct bkey k1 = bkey_unpack_key(b, insert); -+ struct bkey k2 = bkey_unpack_key(b, next); -+ -+ bch2_dump_btree_node(NULL, b); -+ bch2_bkey_to_text(&buf1, &k1); -+ bch2_bkey_to_text(&buf2, &k2); -+ -+ panic("insert > next:\n" -+ "insert key %s\n" -+ "next key %s\n", -+ buf1.buf, buf2.buf); -+ } -+#endif -+} -+ -+#else -+ -+static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter, -+ struct btree *b) {} -+ -+#endif -+ -+/* Auxiliary search trees */ -+ -+#define BFLOAT_FAILED_UNPACKED U8_MAX -+#define BFLOAT_FAILED U8_MAX -+ -+struct bkey_float { -+ u8 exponent; -+ u8 key_offset; -+ u16 mantissa; -+}; -+#define BKEY_MANTISSA_BITS 16 -+ -+static unsigned bkey_float_byte_offset(unsigned idx) -+{ -+ return idx * sizeof(struct bkey_float); -+} -+ -+struct ro_aux_tree { -+ u8 nothing[0]; -+ struct bkey_float f[]; -+}; -+ -+struct rw_aux_tree { -+ u16 offset; -+ struct bpos k; -+}; -+ -+static unsigned bset_aux_tree_buf_end(const struct bset_tree *t) -+{ -+ BUG_ON(t->aux_data_offset == U16_MAX); -+ -+ switch (bset_aux_tree_type(t)) { -+ case BSET_NO_AUX_TREE: -+ return t->aux_data_offset; -+ case BSET_RO_AUX_TREE: -+ return t->aux_data_offset + -+ DIV_ROUND_UP(t->size * sizeof(struct bkey_float) + -+ t->size * sizeof(u8), 8); -+ case BSET_RW_AUX_TREE: -+ return t->aux_data_offset + -+ DIV_ROUND_UP(sizeof(struct rw_aux_tree) * t->size, 8); -+ default: -+ BUG(); -+ } -+} -+ -+static unsigned bset_aux_tree_buf_start(const struct btree *b, -+ const struct bset_tree *t) -+{ -+ return t == b->set -+ ? DIV_ROUND_UP(b->unpack_fn_len, 8) -+ : bset_aux_tree_buf_end(t - 1); -+} -+ -+static void *__aux_tree_base(const struct btree *b, -+ const struct bset_tree *t) -+{ -+ return b->aux_data + t->aux_data_offset * 8; -+} -+ -+static struct ro_aux_tree *ro_aux_tree_base(const struct btree *b, -+ const struct bset_tree *t) -+{ -+ EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE); -+ -+ return __aux_tree_base(b, t); -+} -+ -+static u8 *ro_aux_tree_prev(const struct btree *b, -+ const struct bset_tree *t) -+{ -+ EBUG_ON(bset_aux_tree_type(t) != BSET_RO_AUX_TREE); -+ -+ return __aux_tree_base(b, t) + bkey_float_byte_offset(t->size); -+} -+ -+static struct bkey_float *bkey_float(const struct btree *b, -+ const struct bset_tree *t, -+ unsigned idx) -+{ -+ return ro_aux_tree_base(b, t)->f + idx; -+} -+ -+static void bset_aux_tree_verify(const struct btree *b) -+{ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ const struct bset_tree *t; -+ -+ for_each_bset(b, t) { -+ if (t->aux_data_offset == U16_MAX) -+ continue; -+ -+ BUG_ON(t != b->set && -+ t[-1].aux_data_offset == U16_MAX); -+ -+ BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t)); -+ BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b)); -+ BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b)); -+ } -+#endif -+} -+ -+void bch2_btree_keys_init(struct btree *b) -+{ -+ unsigned i; -+ -+ b->nsets = 0; -+ memset(&b->nr, 0, sizeof(b->nr)); -+ -+ for (i = 0; i < MAX_BSETS; i++) -+ b->set[i].data_offset = U16_MAX; -+ -+ bch2_bset_set_no_aux_tree(b, b->set); -+} -+ -+/* Binary tree stuff for auxiliary search trees */ -+ -+/* -+ * Cacheline/offset <-> bkey pointer arithmetic: -+ * -+ * t->tree is a binary search tree in an array; each node corresponds to a key -+ * in one cacheline in t->set (BSET_CACHELINE bytes). -+ * -+ * This means we don't have to store the full index of the key that a node in -+ * the binary tree points to; eytzinger1_to_inorder() gives us the cacheline, and -+ * then bkey_float->m gives us the offset within that cacheline, in units of 8 -+ * bytes. -+ * -+ * cacheline_to_bkey() and friends abstract out all the pointer arithmetic to -+ * make this work. -+ * -+ * To construct the bfloat for an arbitrary key we need to know what the key -+ * immediately preceding it is: we have to check if the two keys differ in the -+ * bits we're going to store in bkey_float->mantissa. t->prev[j] stores the size -+ * of the previous key so we can walk backwards to it from t->tree[j]'s key. -+ */ -+ -+static inline void *bset_cacheline(const struct btree *b, -+ const struct bset_tree *t, -+ unsigned cacheline) -+{ -+ return (void *) round_down((unsigned long) btree_bkey_first(b, t), -+ L1_CACHE_BYTES) + -+ cacheline * BSET_CACHELINE; -+} -+ -+static struct bkey_packed *cacheline_to_bkey(const struct btree *b, -+ const struct bset_tree *t, -+ unsigned cacheline, -+ unsigned offset) -+{ -+ return bset_cacheline(b, t, cacheline) + offset * 8; -+} -+ -+static unsigned bkey_to_cacheline(const struct btree *b, -+ const struct bset_tree *t, -+ const struct bkey_packed *k) -+{ -+ return ((void *) k - bset_cacheline(b, t, 0)) / BSET_CACHELINE; -+} -+ -+static ssize_t __bkey_to_cacheline_offset(const struct btree *b, -+ const struct bset_tree *t, -+ unsigned cacheline, -+ const struct bkey_packed *k) -+{ -+ return (u64 *) k - (u64 *) bset_cacheline(b, t, cacheline); -+} -+ -+static unsigned bkey_to_cacheline_offset(const struct btree *b, -+ const struct bset_tree *t, -+ unsigned cacheline, -+ const struct bkey_packed *k) -+{ -+ size_t m = __bkey_to_cacheline_offset(b, t, cacheline, k); -+ -+ EBUG_ON(m > U8_MAX); -+ return m; -+} -+ -+static inline struct bkey_packed *tree_to_bkey(const struct btree *b, -+ const struct bset_tree *t, -+ unsigned j) -+{ -+ return cacheline_to_bkey(b, t, -+ __eytzinger1_to_inorder(j, t->size - 1, t->extra), -+ bkey_float(b, t, j)->key_offset); -+} -+ -+static struct bkey_packed *tree_to_prev_bkey(const struct btree *b, -+ const struct bset_tree *t, -+ unsigned j) -+{ -+ unsigned prev_u64s = ro_aux_tree_prev(b, t)[j]; -+ -+ return (void *) ((u64 *) tree_to_bkey(b, t, j)->_data - prev_u64s); -+} -+ -+static struct rw_aux_tree *rw_aux_tree(const struct btree *b, -+ const struct bset_tree *t) -+{ -+ EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE); -+ -+ return __aux_tree_base(b, t); -+} -+ -+/* -+ * For the write set - the one we're currently inserting keys into - we don't -+ * maintain a full search tree, we just keep a simple lookup table in t->prev. -+ */ -+static struct bkey_packed *rw_aux_to_bkey(const struct btree *b, -+ struct bset_tree *t, -+ unsigned j) -+{ -+ return __btree_node_offset_to_key(b, rw_aux_tree(b, t)[j].offset); -+} -+ -+static void rw_aux_tree_set(const struct btree *b, struct bset_tree *t, -+ unsigned j, struct bkey_packed *k) -+{ -+ EBUG_ON(k >= btree_bkey_last(b, t)); -+ -+ rw_aux_tree(b, t)[j] = (struct rw_aux_tree) { -+ .offset = __btree_node_key_to_offset(b, k), -+ .k = bkey_unpack_pos(b, k), -+ }; -+} -+ -+static void bch2_bset_verify_rw_aux_tree(struct btree *b, -+ struct bset_tree *t) -+{ -+ struct bkey_packed *k = btree_bkey_first(b, t); -+ unsigned j = 0; -+ -+ if (!bch2_expensive_debug_checks) -+ return; -+ -+ BUG_ON(bset_has_ro_aux_tree(t)); -+ -+ if (!bset_has_rw_aux_tree(t)) -+ return; -+ -+ BUG_ON(t->size < 1); -+ BUG_ON(rw_aux_to_bkey(b, t, j) != k); -+ -+ goto start; -+ while (1) { -+ if (rw_aux_to_bkey(b, t, j) == k) { -+ BUG_ON(!bpos_eq(rw_aux_tree(b, t)[j].k, -+ bkey_unpack_pos(b, k))); -+start: -+ if (++j == t->size) -+ break; -+ -+ BUG_ON(rw_aux_tree(b, t)[j].offset <= -+ rw_aux_tree(b, t)[j - 1].offset); -+ } -+ -+ k = bkey_p_next(k); -+ BUG_ON(k >= btree_bkey_last(b, t)); -+ } -+} -+ -+/* returns idx of first entry >= offset: */ -+static unsigned rw_aux_tree_bsearch(struct btree *b, -+ struct bset_tree *t, -+ unsigned offset) -+{ -+ unsigned bset_offs = offset - btree_bkey_first_offset(t); -+ unsigned bset_u64s = t->end_offset - btree_bkey_first_offset(t); -+ unsigned idx = bset_u64s ? bset_offs * t->size / bset_u64s : 0; -+ -+ EBUG_ON(bset_aux_tree_type(t) != BSET_RW_AUX_TREE); -+ EBUG_ON(!t->size); -+ EBUG_ON(idx > t->size); -+ -+ while (idx < t->size && -+ rw_aux_tree(b, t)[idx].offset < offset) -+ idx++; -+ -+ while (idx && -+ rw_aux_tree(b, t)[idx - 1].offset >= offset) -+ idx--; -+ -+ EBUG_ON(idx < t->size && -+ rw_aux_tree(b, t)[idx].offset < offset); -+ EBUG_ON(idx && rw_aux_tree(b, t)[idx - 1].offset >= offset); -+ EBUG_ON(idx + 1 < t->size && -+ rw_aux_tree(b, t)[idx].offset == -+ rw_aux_tree(b, t)[idx + 1].offset); -+ -+ return idx; -+} -+ -+static inline unsigned bkey_mantissa(const struct bkey_packed *k, -+ const struct bkey_float *f, -+ unsigned idx) -+{ -+ u64 v; -+ -+ EBUG_ON(!bkey_packed(k)); -+ -+ v = get_unaligned((u64 *) (((u8 *) k->_data) + (f->exponent >> 3))); -+ -+ /* -+ * In little endian, we're shifting off low bits (and then the bits we -+ * want are at the low end), in big endian we're shifting off high bits -+ * (and then the bits we want are at the high end, so we shift them -+ * back down): -+ */ -+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -+ v >>= f->exponent & 7; -+#else -+ v >>= 64 - (f->exponent & 7) - BKEY_MANTISSA_BITS; -+#endif -+ return (u16) v; -+} -+ -+static __always_inline void make_bfloat(struct btree *b, struct bset_tree *t, -+ unsigned j, -+ struct bkey_packed *min_key, -+ struct bkey_packed *max_key) -+{ -+ struct bkey_float *f = bkey_float(b, t, j); -+ struct bkey_packed *m = tree_to_bkey(b, t, j); -+ struct bkey_packed *l = is_power_of_2(j) -+ ? min_key -+ : tree_to_prev_bkey(b, t, j >> ffs(j)); -+ struct bkey_packed *r = is_power_of_2(j + 1) -+ ? max_key -+ : tree_to_bkey(b, t, j >> (ffz(j) + 1)); -+ unsigned mantissa; -+ int shift, exponent, high_bit; -+ -+ /* -+ * for failed bfloats, the lookup code falls back to comparing against -+ * the original key. -+ */ -+ -+ if (!bkey_packed(l) || !bkey_packed(r) || !bkey_packed(m) || -+ !b->nr_key_bits) { -+ f->exponent = BFLOAT_FAILED_UNPACKED; -+ return; -+ } -+ -+ /* -+ * The greatest differing bit of l and r is the first bit we must -+ * include in the bfloat mantissa we're creating in order to do -+ * comparisons - that bit always becomes the high bit of -+ * bfloat->mantissa, and thus the exponent we're calculating here is -+ * the position of what will become the low bit in bfloat->mantissa: -+ * -+ * Note that this may be negative - we may be running off the low end -+ * of the key: we handle this later: -+ */ -+ high_bit = max(bch2_bkey_greatest_differing_bit(b, l, r), -+ min_t(unsigned, BKEY_MANTISSA_BITS, b->nr_key_bits) - 1); -+ exponent = high_bit - (BKEY_MANTISSA_BITS - 1); -+ -+ /* -+ * Then we calculate the actual shift value, from the start of the key -+ * (k->_data), to get the key bits starting at exponent: -+ */ -+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -+ shift = (int) (b->format.key_u64s * 64 - b->nr_key_bits) + exponent; -+ -+ EBUG_ON(shift + BKEY_MANTISSA_BITS > b->format.key_u64s * 64); -+#else -+ shift = high_bit_offset + -+ b->nr_key_bits - -+ exponent - -+ BKEY_MANTISSA_BITS; -+ -+ EBUG_ON(shift < KEY_PACKED_BITS_START); -+#endif -+ EBUG_ON(shift < 0 || shift >= BFLOAT_FAILED); -+ -+ f->exponent = shift; -+ mantissa = bkey_mantissa(m, f, j); -+ -+ /* -+ * If we've got garbage bits, set them to all 1s - it's legal for the -+ * bfloat to compare larger than the original key, but not smaller: -+ */ -+ if (exponent < 0) -+ mantissa |= ~(~0U << -exponent); -+ -+ f->mantissa = mantissa; -+} -+ -+/* bytes remaining - only valid for last bset: */ -+static unsigned __bset_tree_capacity(const struct btree *b, const struct bset_tree *t) -+{ -+ bset_aux_tree_verify(b); -+ -+ return btree_aux_data_bytes(b) - t->aux_data_offset * sizeof(u64); -+} -+ -+static unsigned bset_ro_tree_capacity(const struct btree *b, const struct bset_tree *t) -+{ -+ return __bset_tree_capacity(b, t) / -+ (sizeof(struct bkey_float) + sizeof(u8)); -+} -+ -+static unsigned bset_rw_tree_capacity(const struct btree *b, const struct bset_tree *t) -+{ -+ return __bset_tree_capacity(b, t) / sizeof(struct rw_aux_tree); -+} -+ -+static noinline void __build_rw_aux_tree(struct btree *b, struct bset_tree *t) -+{ -+ struct bkey_packed *k; -+ -+ t->size = 1; -+ t->extra = BSET_RW_AUX_TREE_VAL; -+ rw_aux_tree(b, t)[0].offset = -+ __btree_node_key_to_offset(b, btree_bkey_first(b, t)); -+ -+ bset_tree_for_each_key(b, t, k) { -+ if (t->size == bset_rw_tree_capacity(b, t)) -+ break; -+ -+ if ((void *) k - (void *) rw_aux_to_bkey(b, t, t->size - 1) > -+ L1_CACHE_BYTES) -+ rw_aux_tree_set(b, t, t->size++, k); -+ } -+} -+ -+static noinline void __build_ro_aux_tree(struct btree *b, struct bset_tree *t) -+{ -+ struct bkey_packed *prev = NULL, *k = btree_bkey_first(b, t); -+ struct bkey_i min_key, max_key; -+ unsigned j, cacheline = 1; -+ -+ t->size = min(bkey_to_cacheline(b, t, btree_bkey_last(b, t)), -+ bset_ro_tree_capacity(b, t)); -+retry: -+ if (t->size < 2) { -+ t->size = 0; -+ t->extra = BSET_NO_AUX_TREE_VAL; -+ return; -+ } -+ -+ t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1; -+ -+ /* First we figure out where the first key in each cacheline is */ -+ eytzinger1_for_each(j, t->size - 1) { -+ while (bkey_to_cacheline(b, t, k) < cacheline) -+ prev = k, k = bkey_p_next(k); -+ -+ if (k >= btree_bkey_last(b, t)) { -+ /* XXX: this path sucks */ -+ t->size--; -+ goto retry; -+ } -+ -+ ro_aux_tree_prev(b, t)[j] = prev->u64s; -+ bkey_float(b, t, j)->key_offset = -+ bkey_to_cacheline_offset(b, t, cacheline++, k); -+ -+ EBUG_ON(tree_to_prev_bkey(b, t, j) != prev); -+ EBUG_ON(tree_to_bkey(b, t, j) != k); -+ } -+ -+ while (k != btree_bkey_last(b, t)) -+ prev = k, k = bkey_p_next(k); -+ -+ if (!bkey_pack_pos(bkey_to_packed(&min_key), b->data->min_key, b)) { -+ bkey_init(&min_key.k); -+ min_key.k.p = b->data->min_key; -+ } -+ -+ if (!bkey_pack_pos(bkey_to_packed(&max_key), b->data->max_key, b)) { -+ bkey_init(&max_key.k); -+ max_key.k.p = b->data->max_key; -+ } -+ -+ /* Then we build the tree */ -+ eytzinger1_for_each(j, t->size - 1) -+ make_bfloat(b, t, j, -+ bkey_to_packed(&min_key), -+ bkey_to_packed(&max_key)); -+} -+ -+static void bset_alloc_tree(struct btree *b, struct bset_tree *t) -+{ -+ struct bset_tree *i; -+ -+ for (i = b->set; i != t; i++) -+ BUG_ON(bset_has_rw_aux_tree(i)); -+ -+ bch2_bset_set_no_aux_tree(b, t); -+ -+ /* round up to next cacheline: */ -+ t->aux_data_offset = round_up(bset_aux_tree_buf_start(b, t), -+ SMP_CACHE_BYTES / sizeof(u64)); -+ -+ bset_aux_tree_verify(b); -+} -+ -+void bch2_bset_build_aux_tree(struct btree *b, struct bset_tree *t, -+ bool writeable) -+{ -+ if (writeable -+ ? bset_has_rw_aux_tree(t) -+ : bset_has_ro_aux_tree(t)) -+ return; -+ -+ bset_alloc_tree(b, t); -+ -+ if (!__bset_tree_capacity(b, t)) -+ return; -+ -+ if (writeable) -+ __build_rw_aux_tree(b, t); -+ else -+ __build_ro_aux_tree(b, t); -+ -+ bset_aux_tree_verify(b); -+} -+ -+void bch2_bset_init_first(struct btree *b, struct bset *i) -+{ -+ struct bset_tree *t; -+ -+ BUG_ON(b->nsets); -+ -+ memset(i, 0, sizeof(*i)); -+ get_random_bytes(&i->seq, sizeof(i->seq)); -+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); -+ -+ t = &b->set[b->nsets++]; -+ set_btree_bset(b, t, i); -+} -+ -+void bch2_bset_init_next(struct bch_fs *c, struct btree *b, -+ struct btree_node_entry *bne) -+{ -+ struct bset *i = &bne->keys; -+ struct bset_tree *t; -+ -+ BUG_ON(bset_byte_offset(b, bne) >= btree_bytes(c)); -+ BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b))); -+ BUG_ON(b->nsets >= MAX_BSETS); -+ -+ memset(i, 0, sizeof(*i)); -+ i->seq = btree_bset_first(b)->seq; -+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); -+ -+ t = &b->set[b->nsets++]; -+ set_btree_bset(b, t, i); -+} -+ -+/* -+ * find _some_ key in the same bset as @k that precedes @k - not necessarily the -+ * immediate predecessor: -+ */ -+static struct bkey_packed *__bkey_prev(struct btree *b, struct bset_tree *t, -+ struct bkey_packed *k) -+{ -+ struct bkey_packed *p; -+ unsigned offset; -+ int j; -+ -+ EBUG_ON(k < btree_bkey_first(b, t) || -+ k > btree_bkey_last(b, t)); -+ -+ if (k == btree_bkey_first(b, t)) -+ return NULL; -+ -+ switch (bset_aux_tree_type(t)) { -+ case BSET_NO_AUX_TREE: -+ p = btree_bkey_first(b, t); -+ break; -+ case BSET_RO_AUX_TREE: -+ j = min_t(unsigned, t->size - 1, bkey_to_cacheline(b, t, k)); -+ -+ do { -+ p = j ? tree_to_bkey(b, t, -+ __inorder_to_eytzinger1(j--, -+ t->size - 1, t->extra)) -+ : btree_bkey_first(b, t); -+ } while (p >= k); -+ break; -+ case BSET_RW_AUX_TREE: -+ offset = __btree_node_key_to_offset(b, k); -+ j = rw_aux_tree_bsearch(b, t, offset); -+ p = j ? rw_aux_to_bkey(b, t, j - 1) -+ : btree_bkey_first(b, t); -+ break; -+ } -+ -+ return p; -+} -+ -+struct bkey_packed *bch2_bkey_prev_filter(struct btree *b, -+ struct bset_tree *t, -+ struct bkey_packed *k, -+ unsigned min_key_type) -+{ -+ struct bkey_packed *p, *i, *ret = NULL, *orig_k = k; -+ -+ while ((p = __bkey_prev(b, t, k)) && !ret) { -+ for (i = p; i != k; i = bkey_p_next(i)) -+ if (i->type >= min_key_type) -+ ret = i; -+ -+ k = p; -+ } -+ -+ if (bch2_expensive_debug_checks) { -+ BUG_ON(ret >= orig_k); -+ -+ for (i = ret -+ ? bkey_p_next(ret) -+ : btree_bkey_first(b, t); -+ i != orig_k; -+ i = bkey_p_next(i)) -+ BUG_ON(i->type >= min_key_type); -+ } -+ -+ return ret; -+} -+ -+/* Insert */ -+ -+static void bch2_bset_fix_lookup_table(struct btree *b, -+ struct bset_tree *t, -+ struct bkey_packed *_where, -+ unsigned clobber_u64s, -+ unsigned new_u64s) -+{ -+ int shift = new_u64s - clobber_u64s; -+ unsigned l, j, where = __btree_node_key_to_offset(b, _where); -+ -+ EBUG_ON(bset_has_ro_aux_tree(t)); -+ -+ if (!bset_has_rw_aux_tree(t)) -+ return; -+ -+ /* returns first entry >= where */ -+ l = rw_aux_tree_bsearch(b, t, where); -+ -+ if (!l) /* never delete first entry */ -+ l++; -+ else if (l < t->size && -+ where < t->end_offset && -+ rw_aux_tree(b, t)[l].offset == where) -+ rw_aux_tree_set(b, t, l++, _where); -+ -+ /* l now > where */ -+ -+ for (j = l; -+ j < t->size && -+ rw_aux_tree(b, t)[j].offset < where + clobber_u64s; -+ j++) -+ ; -+ -+ if (j < t->size && -+ rw_aux_tree(b, t)[j].offset + shift == -+ rw_aux_tree(b, t)[l - 1].offset) -+ j++; -+ -+ memmove(&rw_aux_tree(b, t)[l], -+ &rw_aux_tree(b, t)[j], -+ (void *) &rw_aux_tree(b, t)[t->size] - -+ (void *) &rw_aux_tree(b, t)[j]); -+ t->size -= j - l; -+ -+ for (j = l; j < t->size; j++) -+ rw_aux_tree(b, t)[j].offset += shift; -+ -+ EBUG_ON(l < t->size && -+ rw_aux_tree(b, t)[l].offset == -+ rw_aux_tree(b, t)[l - 1].offset); -+ -+ if (t->size < bset_rw_tree_capacity(b, t) && -+ (l < t->size -+ ? rw_aux_tree(b, t)[l].offset -+ : t->end_offset) - -+ rw_aux_tree(b, t)[l - 1].offset > -+ L1_CACHE_BYTES / sizeof(u64)) { -+ struct bkey_packed *start = rw_aux_to_bkey(b, t, l - 1); -+ struct bkey_packed *end = l < t->size -+ ? rw_aux_to_bkey(b, t, l) -+ : btree_bkey_last(b, t); -+ struct bkey_packed *k = start; -+ -+ while (1) { -+ k = bkey_p_next(k); -+ if (k == end) -+ break; -+ -+ if ((void *) k - (void *) start >= L1_CACHE_BYTES) { -+ memmove(&rw_aux_tree(b, t)[l + 1], -+ &rw_aux_tree(b, t)[l], -+ (void *) &rw_aux_tree(b, t)[t->size] - -+ (void *) &rw_aux_tree(b, t)[l]); -+ t->size++; -+ rw_aux_tree_set(b, t, l, k); -+ break; -+ } -+ } -+ } -+ -+ bch2_bset_verify_rw_aux_tree(b, t); -+ bset_aux_tree_verify(b); -+} -+ -+void bch2_bset_insert(struct btree *b, -+ struct btree_node_iter *iter, -+ struct bkey_packed *where, -+ struct bkey_i *insert, -+ unsigned clobber_u64s) -+{ -+ struct bkey_format *f = &b->format; -+ struct bset_tree *t = bset_tree_last(b); -+ struct bkey_packed packed, *src = bkey_to_packed(insert); -+ -+ bch2_bset_verify_rw_aux_tree(b, t); -+ bch2_verify_insert_pos(b, where, bkey_to_packed(insert), clobber_u64s); -+ -+ if (bch2_bkey_pack_key(&packed, &insert->k, f)) -+ src = &packed; -+ -+ if (!bkey_deleted(&insert->k)) -+ btree_keys_account_key_add(&b->nr, t - b->set, src); -+ -+ if (src->u64s != clobber_u64s) { -+ u64 *src_p = (u64 *) where->_data + clobber_u64s; -+ u64 *dst_p = (u64 *) where->_data + src->u64s; -+ -+ EBUG_ON((int) le16_to_cpu(bset(b, t)->u64s) < -+ (int) clobber_u64s - src->u64s); -+ -+ memmove_u64s(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p); -+ le16_add_cpu(&bset(b, t)->u64s, src->u64s - clobber_u64s); -+ set_btree_bset_end(b, t); -+ } -+ -+ memcpy_u64s_small(where, src, -+ bkeyp_key_u64s(f, src)); -+ memcpy_u64s(bkeyp_val(f, where), &insert->v, -+ bkeyp_val_u64s(f, src)); -+ -+ if (src->u64s != clobber_u64s) -+ bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s); -+ -+ bch2_verify_btree_nr_keys(b); -+} -+ -+void bch2_bset_delete(struct btree *b, -+ struct bkey_packed *where, -+ unsigned clobber_u64s) -+{ -+ struct bset_tree *t = bset_tree_last(b); -+ u64 *src_p = (u64 *) where->_data + clobber_u64s; -+ u64 *dst_p = where->_data; -+ -+ bch2_bset_verify_rw_aux_tree(b, t); -+ -+ EBUG_ON(le16_to_cpu(bset(b, t)->u64s) < clobber_u64s); -+ -+ memmove_u64s_down(dst_p, src_p, btree_bkey_last(b, t)->_data - src_p); -+ le16_add_cpu(&bset(b, t)->u64s, -clobber_u64s); -+ set_btree_bset_end(b, t); -+ -+ bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, 0); -+} -+ -+/* Lookup */ -+ -+__flatten -+static struct bkey_packed *bset_search_write_set(const struct btree *b, -+ struct bset_tree *t, -+ struct bpos *search) -+{ -+ unsigned l = 0, r = t->size; -+ -+ while (l + 1 != r) { -+ unsigned m = (l + r) >> 1; -+ -+ if (bpos_lt(rw_aux_tree(b, t)[m].k, *search)) -+ l = m; -+ else -+ r = m; -+ } -+ -+ return rw_aux_to_bkey(b, t, l); -+} -+ -+static inline void prefetch_four_cachelines(void *p) -+{ -+#ifdef CONFIG_X86_64 -+ asm("prefetcht0 (-127 + 64 * 0)(%0);" -+ "prefetcht0 (-127 + 64 * 1)(%0);" -+ "prefetcht0 (-127 + 64 * 2)(%0);" -+ "prefetcht0 (-127 + 64 * 3)(%0);" -+ : -+ : "r" (p + 127)); -+#else -+ prefetch(p + L1_CACHE_BYTES * 0); -+ prefetch(p + L1_CACHE_BYTES * 1); -+ prefetch(p + L1_CACHE_BYTES * 2); -+ prefetch(p + L1_CACHE_BYTES * 3); -+#endif -+} -+ -+static inline bool bkey_mantissa_bits_dropped(const struct btree *b, -+ const struct bkey_float *f, -+ unsigned idx) -+{ -+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -+ unsigned key_bits_start = b->format.key_u64s * 64 - b->nr_key_bits; -+ -+ return f->exponent > key_bits_start; -+#else -+ unsigned key_bits_end = high_bit_offset + b->nr_key_bits; -+ -+ return f->exponent + BKEY_MANTISSA_BITS < key_bits_end; -+#endif -+} -+ -+__flatten -+static struct bkey_packed *bset_search_tree(const struct btree *b, -+ const struct bset_tree *t, -+ const struct bpos *search, -+ const struct bkey_packed *packed_search) -+{ -+ struct ro_aux_tree *base = ro_aux_tree_base(b, t); -+ struct bkey_float *f; -+ struct bkey_packed *k; -+ unsigned inorder, n = 1, l, r; -+ int cmp; -+ -+ do { -+ if (likely(n << 4 < t->size)) -+ prefetch(&base->f[n << 4]); -+ -+ f = &base->f[n]; -+ if (unlikely(f->exponent >= BFLOAT_FAILED)) -+ goto slowpath; -+ -+ l = f->mantissa; -+ r = bkey_mantissa(packed_search, f, n); -+ -+ if (unlikely(l == r) && bkey_mantissa_bits_dropped(b, f, n)) -+ goto slowpath; -+ -+ n = n * 2 + (l < r); -+ continue; -+slowpath: -+ k = tree_to_bkey(b, t, n); -+ cmp = bkey_cmp_p_or_unp(b, k, packed_search, search); -+ if (!cmp) -+ return k; -+ -+ n = n * 2 + (cmp < 0); -+ } while (n < t->size); -+ -+ inorder = __eytzinger1_to_inorder(n >> 1, t->size - 1, t->extra); -+ -+ /* -+ * n would have been the node we recursed to - the low bit tells us if -+ * we recursed left or recursed right. -+ */ -+ if (likely(!(n & 1))) { -+ --inorder; -+ if (unlikely(!inorder)) -+ return btree_bkey_first(b, t); -+ -+ f = &base->f[eytzinger1_prev(n >> 1, t->size - 1)]; -+ } -+ -+ return cacheline_to_bkey(b, t, inorder, f->key_offset); -+} -+ -+static __always_inline __flatten -+struct bkey_packed *__bch2_bset_search(struct btree *b, -+ struct bset_tree *t, -+ struct bpos *search, -+ const struct bkey_packed *lossy_packed_search) -+{ -+ -+ /* -+ * First, we search for a cacheline, then lastly we do a linear search -+ * within that cacheline. -+ * -+ * To search for the cacheline, there's three different possibilities: -+ * * The set is too small to have a search tree, so we just do a linear -+ * search over the whole set. -+ * * The set is the one we're currently inserting into; keeping a full -+ * auxiliary search tree up to date would be too expensive, so we -+ * use a much simpler lookup table to do a binary search - -+ * bset_search_write_set(). -+ * * Or we use the auxiliary search tree we constructed earlier - -+ * bset_search_tree() -+ */ -+ -+ switch (bset_aux_tree_type(t)) { -+ case BSET_NO_AUX_TREE: -+ return btree_bkey_first(b, t); -+ case BSET_RW_AUX_TREE: -+ return bset_search_write_set(b, t, search); -+ case BSET_RO_AUX_TREE: -+ return bset_search_tree(b, t, search, lossy_packed_search); -+ default: -+ BUG(); -+ } -+} -+ -+static __always_inline __flatten -+struct bkey_packed *bch2_bset_search_linear(struct btree *b, -+ struct bset_tree *t, -+ struct bpos *search, -+ struct bkey_packed *packed_search, -+ const struct bkey_packed *lossy_packed_search, -+ struct bkey_packed *m) -+{ -+ if (lossy_packed_search) -+ while (m != btree_bkey_last(b, t) && -+ bkey_iter_cmp_p_or_unp(b, m, -+ lossy_packed_search, search) < 0) -+ m = bkey_p_next(m); -+ -+ if (!packed_search) -+ while (m != btree_bkey_last(b, t) && -+ bkey_iter_pos_cmp(b, m, search) < 0) -+ m = bkey_p_next(m); -+ -+ if (bch2_expensive_debug_checks) { -+ struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m); -+ -+ BUG_ON(prev && -+ bkey_iter_cmp_p_or_unp(b, prev, -+ packed_search, search) >= 0); -+ } -+ -+ return m; -+} -+ -+/* Btree node iterator */ -+ -+static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter, -+ struct btree *b, -+ const struct bkey_packed *k, -+ const struct bkey_packed *end) -+{ -+ if (k != end) { -+ struct btree_node_iter_set *pos; -+ -+ btree_node_iter_for_each(iter, pos) -+ ; -+ -+ BUG_ON(pos >= iter->data + ARRAY_SIZE(iter->data)); -+ *pos = (struct btree_node_iter_set) { -+ __btree_node_key_to_offset(b, k), -+ __btree_node_key_to_offset(b, end) -+ }; -+ } -+} -+ -+void bch2_btree_node_iter_push(struct btree_node_iter *iter, -+ struct btree *b, -+ const struct bkey_packed *k, -+ const struct bkey_packed *end) -+{ -+ __bch2_btree_node_iter_push(iter, b, k, end); -+ bch2_btree_node_iter_sort(iter, b); -+} -+ -+noinline __flatten __cold -+static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, -+ struct btree *b, struct bpos *search) -+{ -+ struct bkey_packed *k; -+ -+ trace_bkey_pack_pos_fail(search); -+ -+ bch2_btree_node_iter_init_from_start(iter, b); -+ -+ while ((k = bch2_btree_node_iter_peek(iter, b)) && -+ bkey_iter_pos_cmp(b, k, search) < 0) -+ bch2_btree_node_iter_advance(iter, b); -+} -+ -+/** -+ * bch2_btree_node_iter_init - initialize a btree node iterator, starting from a -+ * given position -+ * -+ * @iter: iterator to initialize -+ * @b: btree node to search -+ * @search: search key -+ * -+ * Main entry point to the lookup code for individual btree nodes: -+ * -+ * NOTE: -+ * -+ * When you don't filter out deleted keys, btree nodes _do_ contain duplicate -+ * keys. This doesn't matter for most code, but it does matter for lookups. -+ * -+ * Some adjacent keys with a string of equal keys: -+ * i j k k k k l m -+ * -+ * If you search for k, the lookup code isn't guaranteed to return you any -+ * specific k. The lookup code is conceptually doing a binary search and -+ * iterating backwards is very expensive so if the pivot happens to land at the -+ * last k that's what you'll get. -+ * -+ * This works out ok, but it's something to be aware of: -+ * -+ * - For non extents, we guarantee that the live key comes last - see -+ * btree_node_iter_cmp(), keys_out_of_order(). So the duplicates you don't -+ * see will only be deleted keys you don't care about. -+ * -+ * - For extents, deleted keys sort last (see the comment at the top of this -+ * file). But when you're searching for extents, you actually want the first -+ * key strictly greater than your search key - an extent that compares equal -+ * to the search key is going to have 0 sectors after the search key. -+ * -+ * But this does mean that we can't just search for -+ * bpos_successor(start_of_range) to get the first extent that overlaps with -+ * the range we want - if we're unlucky and there's an extent that ends -+ * exactly where we searched, then there could be a deleted key at the same -+ * position and we'd get that when we search instead of the preceding extent -+ * we needed. -+ * -+ * So we've got to search for start_of_range, then after the lookup iterate -+ * past any extents that compare equal to the position we searched for. -+ */ -+__flatten -+void bch2_btree_node_iter_init(struct btree_node_iter *iter, -+ struct btree *b, struct bpos *search) -+{ -+ struct bkey_packed p, *packed_search = NULL; -+ struct btree_node_iter_set *pos = iter->data; -+ struct bkey_packed *k[MAX_BSETS]; -+ unsigned i; -+ -+ EBUG_ON(bpos_lt(*search, b->data->min_key)); -+ EBUG_ON(bpos_gt(*search, b->data->max_key)); -+ bset_aux_tree_verify(b); -+ -+ memset(iter, 0, sizeof(*iter)); -+ -+ switch (bch2_bkey_pack_pos_lossy(&p, *search, b)) { -+ case BKEY_PACK_POS_EXACT: -+ packed_search = &p; -+ break; -+ case BKEY_PACK_POS_SMALLER: -+ packed_search = NULL; -+ break; -+ case BKEY_PACK_POS_FAIL: -+ btree_node_iter_init_pack_failed(iter, b, search); -+ return; -+ } -+ -+ for (i = 0; i < b->nsets; i++) { -+ k[i] = __bch2_bset_search(b, b->set + i, search, &p); -+ prefetch_four_cachelines(k[i]); -+ } -+ -+ for (i = 0; i < b->nsets; i++) { -+ struct bset_tree *t = b->set + i; -+ struct bkey_packed *end = btree_bkey_last(b, t); -+ -+ k[i] = bch2_bset_search_linear(b, t, search, -+ packed_search, &p, k[i]); -+ if (k[i] != end) -+ *pos++ = (struct btree_node_iter_set) { -+ __btree_node_key_to_offset(b, k[i]), -+ __btree_node_key_to_offset(b, end) -+ }; -+ } -+ -+ bch2_btree_node_iter_sort(iter, b); -+} -+ -+void bch2_btree_node_iter_init_from_start(struct btree_node_iter *iter, -+ struct btree *b) -+{ -+ struct bset_tree *t; -+ -+ memset(iter, 0, sizeof(*iter)); -+ -+ for_each_bset(b, t) -+ __bch2_btree_node_iter_push(iter, b, -+ btree_bkey_first(b, t), -+ btree_bkey_last(b, t)); -+ bch2_btree_node_iter_sort(iter, b); -+} -+ -+struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *iter, -+ struct btree *b, -+ struct bset_tree *t) -+{ -+ struct btree_node_iter_set *set; -+ -+ btree_node_iter_for_each(iter, set) -+ if (set->end == t->end_offset) -+ return __btree_node_offset_to_key(b, set->k); -+ -+ return btree_bkey_last(b, t); -+} -+ -+static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter, -+ struct btree *b, -+ unsigned first) -+{ -+ bool ret; -+ -+ if ((ret = (btree_node_iter_cmp(b, -+ iter->data[first], -+ iter->data[first + 1]) > 0))) -+ swap(iter->data[first], iter->data[first + 1]); -+ return ret; -+} -+ -+void bch2_btree_node_iter_sort(struct btree_node_iter *iter, -+ struct btree *b) -+{ -+ /* unrolled bubble sort: */ -+ -+ if (!__btree_node_iter_set_end(iter, 2)) { -+ btree_node_iter_sort_two(iter, b, 0); -+ btree_node_iter_sort_two(iter, b, 1); -+ } -+ -+ if (!__btree_node_iter_set_end(iter, 1)) -+ btree_node_iter_sort_two(iter, b, 0); -+} -+ -+void bch2_btree_node_iter_set_drop(struct btree_node_iter *iter, -+ struct btree_node_iter_set *set) -+{ -+ struct btree_node_iter_set *last = -+ iter->data + ARRAY_SIZE(iter->data) - 1; -+ -+ memmove(&set[0], &set[1], (void *) last - (void *) set); -+ *last = (struct btree_node_iter_set) { 0, 0 }; -+} -+ -+static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter, -+ struct btree *b) -+{ -+ iter->data->k += __bch2_btree_node_iter_peek_all(iter, b)->u64s; -+ -+ EBUG_ON(iter->data->k > iter->data->end); -+ -+ if (unlikely(__btree_node_iter_set_end(iter, 0))) { -+ /* avoid an expensive memmove call: */ -+ iter->data[0] = iter->data[1]; -+ iter->data[1] = iter->data[2]; -+ iter->data[2] = (struct btree_node_iter_set) { 0, 0 }; -+ return; -+ } -+ -+ if (__btree_node_iter_set_end(iter, 1)) -+ return; -+ -+ if (!btree_node_iter_sort_two(iter, b, 0)) -+ return; -+ -+ if (__btree_node_iter_set_end(iter, 2)) -+ return; -+ -+ btree_node_iter_sort_two(iter, b, 1); -+} -+ -+void bch2_btree_node_iter_advance(struct btree_node_iter *iter, -+ struct btree *b) -+{ -+ if (bch2_expensive_debug_checks) { -+ bch2_btree_node_iter_verify(iter, b); -+ bch2_btree_node_iter_next_check(iter, b); -+ } -+ -+ __bch2_btree_node_iter_advance(iter, b); -+} -+ -+/* -+ * Expensive: -+ */ -+struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, -+ struct btree *b) -+{ -+ struct bkey_packed *k, *prev = NULL; -+ struct btree_node_iter_set *set; -+ struct bset_tree *t; -+ unsigned end = 0; -+ -+ if (bch2_expensive_debug_checks) -+ bch2_btree_node_iter_verify(iter, b); -+ -+ for_each_bset(b, t) { -+ k = bch2_bkey_prev_all(b, t, -+ bch2_btree_node_iter_bset_pos(iter, b, t)); -+ if (k && -+ (!prev || bkey_iter_cmp(b, k, prev) > 0)) { -+ prev = k; -+ end = t->end_offset; -+ } -+ } -+ -+ if (!prev) -+ return NULL; -+ -+ /* -+ * We're manually memmoving instead of just calling sort() to ensure the -+ * prev we picked ends up in slot 0 - sort won't necessarily put it -+ * there because of duplicate deleted keys: -+ */ -+ btree_node_iter_for_each(iter, set) -+ if (set->end == end) -+ goto found; -+ -+ BUG_ON(set != &iter->data[__btree_node_iter_used(iter)]); -+found: -+ BUG_ON(set >= iter->data + ARRAY_SIZE(iter->data)); -+ -+ memmove(&iter->data[1], -+ &iter->data[0], -+ (void *) set - (void *) &iter->data[0]); -+ -+ iter->data[0].k = __btree_node_key_to_offset(b, prev); -+ iter->data[0].end = end; -+ -+ if (bch2_expensive_debug_checks) -+ bch2_btree_node_iter_verify(iter, b); -+ return prev; -+} -+ -+struct bkey_packed *bch2_btree_node_iter_prev(struct btree_node_iter *iter, -+ struct btree *b) -+{ -+ struct bkey_packed *prev; -+ -+ do { -+ prev = bch2_btree_node_iter_prev_all(iter, b); -+ } while (prev && bkey_deleted(prev)); -+ -+ return prev; -+} -+ -+struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *iter, -+ struct btree *b, -+ struct bkey *u) -+{ -+ struct bkey_packed *k = bch2_btree_node_iter_peek(iter, b); -+ -+ return k ? bkey_disassemble(b, k, u) : bkey_s_c_null; -+} -+ -+/* Mergesort */ -+ -+void bch2_btree_keys_stats(const struct btree *b, struct bset_stats *stats) -+{ -+ const struct bset_tree *t; -+ -+ for_each_bset(b, t) { -+ enum bset_aux_tree_type type = bset_aux_tree_type(t); -+ size_t j; -+ -+ stats->sets[type].nr++; -+ stats->sets[type].bytes += le16_to_cpu(bset(b, t)->u64s) * -+ sizeof(u64); -+ -+ if (bset_has_ro_aux_tree(t)) { -+ stats->floats += t->size - 1; -+ -+ for (j = 1; j < t->size; j++) -+ stats->failed += -+ bkey_float(b, t, j)->exponent == -+ BFLOAT_FAILED; -+ } -+ } -+} -+ -+void bch2_bfloat_to_text(struct printbuf *out, struct btree *b, -+ struct bkey_packed *k) -+{ -+ struct bset_tree *t = bch2_bkey_to_bset(b, k); -+ struct bkey uk; -+ unsigned j, inorder; -+ -+ if (!bset_has_ro_aux_tree(t)) -+ return; -+ -+ inorder = bkey_to_cacheline(b, t, k); -+ if (!inorder || inorder >= t->size) -+ return; -+ -+ j = __inorder_to_eytzinger1(inorder, t->size - 1, t->extra); -+ if (k != tree_to_bkey(b, t, j)) -+ return; -+ -+ switch (bkey_float(b, t, j)->exponent) { -+ case BFLOAT_FAILED: -+ uk = bkey_unpack_key(b, k); -+ prt_printf(out, -+ " failed unpacked at depth %u\n" -+ "\t", -+ ilog2(j)); -+ bch2_bpos_to_text(out, uk.p); -+ prt_printf(out, "\n"); -+ break; -+ } -+} -diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h -new file mode 100644 -index 000000000000..632c2b8c5460 ---- /dev/null -+++ b/fs/bcachefs/bset.h -@@ -0,0 +1,541 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BSET_H -+#define _BCACHEFS_BSET_H -+ -+#include -+#include -+ -+#include "bcachefs.h" -+#include "bkey.h" -+#include "bkey_methods.h" -+#include "btree_types.h" -+#include "util.h" /* for time_stats */ -+#include "vstructs.h" -+ -+/* -+ * BKEYS: -+ * -+ * A bkey contains a key, a size field, a variable number of pointers, and some -+ * ancillary flag bits. -+ * -+ * We use two different functions for validating bkeys, bkey_invalid and -+ * bkey_deleted(). -+ * -+ * The one exception to the rule that ptr_invalid() filters out invalid keys is -+ * that it also filters out keys of size 0 - these are keys that have been -+ * completely overwritten. It'd be safe to delete these in memory while leaving -+ * them on disk, just unnecessary work - so we filter them out when resorting -+ * instead. -+ * -+ * We can't filter out stale keys when we're resorting, because garbage -+ * collection needs to find them to ensure bucket gens don't wrap around - -+ * unless we're rewriting the btree node those stale keys still exist on disk. -+ * -+ * We also implement functions here for removing some number of sectors from the -+ * front or the back of a bkey - this is mainly used for fixing overlapping -+ * extents, by removing the overlapping sectors from the older key. -+ * -+ * BSETS: -+ * -+ * A bset is an array of bkeys laid out contiguously in memory in sorted order, -+ * along with a header. A btree node is made up of a number of these, written at -+ * different times. -+ * -+ * There could be many of them on disk, but we never allow there to be more than -+ * 4 in memory - we lazily resort as needed. -+ * -+ * We implement code here for creating and maintaining auxiliary search trees -+ * (described below) for searching an individial bset, and on top of that we -+ * implement a btree iterator. -+ * -+ * BTREE ITERATOR: -+ * -+ * Most of the code in bcache doesn't care about an individual bset - it needs -+ * to search entire btree nodes and iterate over them in sorted order. -+ * -+ * The btree iterator code serves both functions; it iterates through the keys -+ * in a btree node in sorted order, starting from either keys after a specific -+ * point (if you pass it a search key) or the start of the btree node. -+ * -+ * AUXILIARY SEARCH TREES: -+ * -+ * Since keys are variable length, we can't use a binary search on a bset - we -+ * wouldn't be able to find the start of the next key. But binary searches are -+ * slow anyways, due to terrible cache behaviour; bcache originally used binary -+ * searches and that code topped out at under 50k lookups/second. -+ * -+ * So we need to construct some sort of lookup table. Since we only insert keys -+ * into the last (unwritten) set, most of the keys within a given btree node are -+ * usually in sets that are mostly constant. We use two different types of -+ * lookup tables to take advantage of this. -+ * -+ * Both lookup tables share in common that they don't index every key in the -+ * set; they index one key every BSET_CACHELINE bytes, and then a linear search -+ * is used for the rest. -+ * -+ * For sets that have been written to disk and are no longer being inserted -+ * into, we construct a binary search tree in an array - traversing a binary -+ * search tree in an array gives excellent locality of reference and is very -+ * fast, since both children of any node are adjacent to each other in memory -+ * (and their grandchildren, and great grandchildren...) - this means -+ * prefetching can be used to great effect. -+ * -+ * It's quite useful performance wise to keep these nodes small - not just -+ * because they're more likely to be in L2, but also because we can prefetch -+ * more nodes on a single cacheline and thus prefetch more iterations in advance -+ * when traversing this tree. -+ * -+ * Nodes in the auxiliary search tree must contain both a key to compare against -+ * (we don't want to fetch the key from the set, that would defeat the purpose), -+ * and a pointer to the key. We use a few tricks to compress both of these. -+ * -+ * To compress the pointer, we take advantage of the fact that one node in the -+ * search tree corresponds to precisely BSET_CACHELINE bytes in the set. We have -+ * a function (to_inorder()) that takes the index of a node in a binary tree and -+ * returns what its index would be in an inorder traversal, so we only have to -+ * store the low bits of the offset. -+ * -+ * The key is 84 bits (KEY_DEV + key->key, the offset on the device). To -+ * compress that, we take advantage of the fact that when we're traversing the -+ * search tree at every iteration we know that both our search key and the key -+ * we're looking for lie within some range - bounded by our previous -+ * comparisons. (We special case the start of a search so that this is true even -+ * at the root of the tree). -+ * -+ * So we know the key we're looking for is between a and b, and a and b don't -+ * differ higher than bit 50, we don't need to check anything higher than bit -+ * 50. -+ * -+ * We don't usually need the rest of the bits, either; we only need enough bits -+ * to partition the key range we're currently checking. Consider key n - the -+ * key our auxiliary search tree node corresponds to, and key p, the key -+ * immediately preceding n. The lowest bit we need to store in the auxiliary -+ * search tree is the highest bit that differs between n and p. -+ * -+ * Note that this could be bit 0 - we might sometimes need all 80 bits to do the -+ * comparison. But we'd really like our nodes in the auxiliary search tree to be -+ * of fixed size. -+ * -+ * The solution is to make them fixed size, and when we're constructing a node -+ * check if p and n differed in the bits we needed them to. If they don't we -+ * flag that node, and when doing lookups we fallback to comparing against the -+ * real key. As long as this doesn't happen to often (and it seems to reliably -+ * happen a bit less than 1% of the time), we win - even on failures, that key -+ * is then more likely to be in cache than if we were doing binary searches all -+ * the way, since we're touching so much less memory. -+ * -+ * The keys in the auxiliary search tree are stored in (software) floating -+ * point, with an exponent and a mantissa. The exponent needs to be big enough -+ * to address all the bits in the original key, but the number of bits in the -+ * mantissa is somewhat arbitrary; more bits just gets us fewer failures. -+ * -+ * We need 7 bits for the exponent and 3 bits for the key's offset (since keys -+ * are 8 byte aligned); using 22 bits for the mantissa means a node is 4 bytes. -+ * We need one node per 128 bytes in the btree node, which means the auxiliary -+ * search trees take up 3% as much memory as the btree itself. -+ * -+ * Constructing these auxiliary search trees is moderately expensive, and we -+ * don't want to be constantly rebuilding the search tree for the last set -+ * whenever we insert another key into it. For the unwritten set, we use a much -+ * simpler lookup table - it's just a flat array, so index i in the lookup table -+ * corresponds to the i range of BSET_CACHELINE bytes in the set. Indexing -+ * within each byte range works the same as with the auxiliary search trees. -+ * -+ * These are much easier to keep up to date when we insert a key - we do it -+ * somewhat lazily; when we shift a key up we usually just increment the pointer -+ * to it, only when it would overflow do we go to the trouble of finding the -+ * first key in that range of bytes again. -+ */ -+ -+enum bset_aux_tree_type { -+ BSET_NO_AUX_TREE, -+ BSET_RO_AUX_TREE, -+ BSET_RW_AUX_TREE, -+}; -+ -+#define BSET_TREE_NR_TYPES 3 -+ -+#define BSET_NO_AUX_TREE_VAL (U16_MAX) -+#define BSET_RW_AUX_TREE_VAL (U16_MAX - 1) -+ -+static inline enum bset_aux_tree_type bset_aux_tree_type(const struct bset_tree *t) -+{ -+ switch (t->extra) { -+ case BSET_NO_AUX_TREE_VAL: -+ EBUG_ON(t->size); -+ return BSET_NO_AUX_TREE; -+ case BSET_RW_AUX_TREE_VAL: -+ EBUG_ON(!t->size); -+ return BSET_RW_AUX_TREE; -+ default: -+ EBUG_ON(!t->size); -+ return BSET_RO_AUX_TREE; -+ } -+} -+ -+/* -+ * BSET_CACHELINE was originally intended to match the hardware cacheline size - -+ * it used to be 64, but I realized the lookup code would touch slightly less -+ * memory if it was 128. -+ * -+ * It definites the number of bytes (in struct bset) per struct bkey_float in -+ * the auxiliar search tree - when we're done searching the bset_float tree we -+ * have this many bytes left that we do a linear search over. -+ * -+ * Since (after level 5) every level of the bset_tree is on a new cacheline, -+ * we're touching one fewer cacheline in the bset tree in exchange for one more -+ * cacheline in the linear search - but the linear search might stop before it -+ * gets to the second cacheline. -+ */ -+ -+#define BSET_CACHELINE 256 -+ -+static inline size_t btree_keys_cachelines(const struct btree *b) -+{ -+ return (1U << b->byte_order) / BSET_CACHELINE; -+} -+ -+static inline size_t btree_aux_data_bytes(const struct btree *b) -+{ -+ return btree_keys_cachelines(b) * 8; -+} -+ -+static inline size_t btree_aux_data_u64s(const struct btree *b) -+{ -+ return btree_aux_data_bytes(b) / sizeof(u64); -+} -+ -+#define for_each_bset(_b, _t) \ -+ for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++) -+ -+#define bset_tree_for_each_key(_b, _t, _k) \ -+ for (_k = btree_bkey_first(_b, _t); \ -+ _k != btree_bkey_last(_b, _t); \ -+ _k = bkey_p_next(_k)) -+ -+static inline bool bset_has_ro_aux_tree(const struct bset_tree *t) -+{ -+ return bset_aux_tree_type(t) == BSET_RO_AUX_TREE; -+} -+ -+static inline bool bset_has_rw_aux_tree(struct bset_tree *t) -+{ -+ return bset_aux_tree_type(t) == BSET_RW_AUX_TREE; -+} -+ -+static inline void bch2_bset_set_no_aux_tree(struct btree *b, -+ struct bset_tree *t) -+{ -+ BUG_ON(t < b->set); -+ -+ for (; t < b->set + ARRAY_SIZE(b->set); t++) { -+ t->size = 0; -+ t->extra = BSET_NO_AUX_TREE_VAL; -+ t->aux_data_offset = U16_MAX; -+ } -+} -+ -+static inline void btree_node_set_format(struct btree *b, -+ struct bkey_format f) -+{ -+ int len; -+ -+ b->format = f; -+ b->nr_key_bits = bkey_format_key_bits(&f); -+ -+ len = bch2_compile_bkey_format(&b->format, b->aux_data); -+ BUG_ON(len < 0 || len > U8_MAX); -+ -+ b->unpack_fn_len = len; -+ -+ bch2_bset_set_no_aux_tree(b, b->set); -+} -+ -+static inline struct bset *bset_next_set(struct btree *b, -+ unsigned block_bytes) -+{ -+ struct bset *i = btree_bset_last(b); -+ -+ EBUG_ON(!is_power_of_2(block_bytes)); -+ -+ return ((void *) i) + round_up(vstruct_bytes(i), block_bytes); -+} -+ -+void bch2_btree_keys_init(struct btree *); -+ -+void bch2_bset_init_first(struct btree *, struct bset *); -+void bch2_bset_init_next(struct bch_fs *, struct btree *, -+ struct btree_node_entry *); -+void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool); -+ -+void bch2_bset_insert(struct btree *, struct btree_node_iter *, -+ struct bkey_packed *, struct bkey_i *, unsigned); -+void bch2_bset_delete(struct btree *, struct bkey_packed *, unsigned); -+ -+/* Bkey utility code */ -+ -+/* packed or unpacked */ -+static inline int bkey_cmp_p_or_unp(const struct btree *b, -+ const struct bkey_packed *l, -+ const struct bkey_packed *r_packed, -+ const struct bpos *r) -+{ -+ EBUG_ON(r_packed && !bkey_packed(r_packed)); -+ -+ if (unlikely(!bkey_packed(l))) -+ return bpos_cmp(packed_to_bkey_c(l)->p, *r); -+ -+ if (likely(r_packed)) -+ return __bch2_bkey_cmp_packed_format_checked(l, r_packed, b); -+ -+ return __bch2_bkey_cmp_left_packed_format_checked(b, l, r); -+} -+ -+static inline struct bset_tree * -+bch2_bkey_to_bset_inlined(struct btree *b, struct bkey_packed *k) -+{ -+ unsigned offset = __btree_node_key_to_offset(b, k); -+ struct bset_tree *t; -+ -+ for_each_bset(b, t) -+ if (offset <= t->end_offset) { -+ EBUG_ON(offset < btree_bkey_first_offset(t)); -+ return t; -+ } -+ -+ BUG(); -+} -+ -+struct bset_tree *bch2_bkey_to_bset(struct btree *, struct bkey_packed *); -+ -+struct bkey_packed *bch2_bkey_prev_filter(struct btree *, struct bset_tree *, -+ struct bkey_packed *, unsigned); -+ -+static inline struct bkey_packed * -+bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k) -+{ -+ return bch2_bkey_prev_filter(b, t, k, 0); -+} -+ -+static inline struct bkey_packed * -+bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k) -+{ -+ return bch2_bkey_prev_filter(b, t, k, 1); -+} -+ -+/* Btree key iteration */ -+ -+void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *, -+ const struct bkey_packed *, -+ const struct bkey_packed *); -+void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *, -+ struct bpos *); -+void bch2_btree_node_iter_init_from_start(struct btree_node_iter *, -+ struct btree *); -+struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *, -+ struct btree *, -+ struct bset_tree *); -+ -+void bch2_btree_node_iter_sort(struct btree_node_iter *, struct btree *); -+void bch2_btree_node_iter_set_drop(struct btree_node_iter *, -+ struct btree_node_iter_set *); -+void bch2_btree_node_iter_advance(struct btree_node_iter *, struct btree *); -+ -+#define btree_node_iter_for_each(_iter, _set) \ -+ for (_set = (_iter)->data; \ -+ _set < (_iter)->data + ARRAY_SIZE((_iter)->data) && \ -+ (_set)->k != (_set)->end; \ -+ _set++) -+ -+static inline bool __btree_node_iter_set_end(struct btree_node_iter *iter, -+ unsigned i) -+{ -+ return iter->data[i].k == iter->data[i].end; -+} -+ -+static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter) -+{ -+ return __btree_node_iter_set_end(iter, 0); -+} -+ -+/* -+ * When keys compare equal, deleted keys compare first: -+ * -+ * XXX: only need to compare pointers for keys that are both within a -+ * btree_node_iterator - we need to break ties for prev() to work correctly -+ */ -+static inline int bkey_iter_cmp(const struct btree *b, -+ const struct bkey_packed *l, -+ const struct bkey_packed *r) -+{ -+ return bch2_bkey_cmp_packed(b, l, r) -+ ?: (int) bkey_deleted(r) - (int) bkey_deleted(l) -+ ?: cmp_int(l, r); -+} -+ -+static inline int btree_node_iter_cmp(const struct btree *b, -+ struct btree_node_iter_set l, -+ struct btree_node_iter_set r) -+{ -+ return bkey_iter_cmp(b, -+ __btree_node_offset_to_key(b, l.k), -+ __btree_node_offset_to_key(b, r.k)); -+} -+ -+/* These assume r (the search key) is not a deleted key: */ -+static inline int bkey_iter_pos_cmp(const struct btree *b, -+ const struct bkey_packed *l, -+ const struct bpos *r) -+{ -+ return bkey_cmp_left_packed(b, l, r) -+ ?: -((int) bkey_deleted(l)); -+} -+ -+static inline int bkey_iter_cmp_p_or_unp(const struct btree *b, -+ const struct bkey_packed *l, -+ const struct bkey_packed *r_packed, -+ const struct bpos *r) -+{ -+ return bkey_cmp_p_or_unp(b, l, r_packed, r) -+ ?: -((int) bkey_deleted(l)); -+} -+ -+static inline struct bkey_packed * -+__bch2_btree_node_iter_peek_all(struct btree_node_iter *iter, -+ struct btree *b) -+{ -+ return __btree_node_offset_to_key(b, iter->data->k); -+} -+ -+static inline struct bkey_packed * -+bch2_btree_node_iter_peek_all(struct btree_node_iter *iter, struct btree *b) -+{ -+ return !bch2_btree_node_iter_end(iter) -+ ? __btree_node_offset_to_key(b, iter->data->k) -+ : NULL; -+} -+ -+static inline struct bkey_packed * -+bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b) -+{ -+ struct bkey_packed *k; -+ -+ while ((k = bch2_btree_node_iter_peek_all(iter, b)) && -+ bkey_deleted(k)) -+ bch2_btree_node_iter_advance(iter, b); -+ -+ return k; -+} -+ -+static inline struct bkey_packed * -+bch2_btree_node_iter_next_all(struct btree_node_iter *iter, struct btree *b) -+{ -+ struct bkey_packed *ret = bch2_btree_node_iter_peek_all(iter, b); -+ -+ if (ret) -+ bch2_btree_node_iter_advance(iter, b); -+ -+ return ret; -+} -+ -+struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *, -+ struct btree *); -+struct bkey_packed *bch2_btree_node_iter_prev(struct btree_node_iter *, -+ struct btree *); -+ -+struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *, -+ struct btree *, -+ struct bkey *); -+ -+#define for_each_btree_node_key(b, k, iter) \ -+ for (bch2_btree_node_iter_init_from_start((iter), (b)); \ -+ (k = bch2_btree_node_iter_peek((iter), (b))); \ -+ bch2_btree_node_iter_advance(iter, b)) -+ -+#define for_each_btree_node_key_unpack(b, k, iter, unpacked) \ -+ for (bch2_btree_node_iter_init_from_start((iter), (b)); \ -+ (k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\ -+ bch2_btree_node_iter_advance(iter, b)) -+ -+/* Accounting: */ -+ -+static inline void btree_keys_account_key(struct btree_nr_keys *n, -+ unsigned bset, -+ struct bkey_packed *k, -+ int sign) -+{ -+ n->live_u64s += k->u64s * sign; -+ n->bset_u64s[bset] += k->u64s * sign; -+ -+ if (bkey_packed(k)) -+ n->packed_keys += sign; -+ else -+ n->unpacked_keys += sign; -+} -+ -+static inline void btree_keys_account_val_delta(struct btree *b, -+ struct bkey_packed *k, -+ int delta) -+{ -+ struct bset_tree *t = bch2_bkey_to_bset(b, k); -+ -+ b->nr.live_u64s += delta; -+ b->nr.bset_u64s[t - b->set] += delta; -+} -+ -+#define btree_keys_account_key_add(_nr, _bset_idx, _k) \ -+ btree_keys_account_key(_nr, _bset_idx, _k, 1) -+#define btree_keys_account_key_drop(_nr, _bset_idx, _k) \ -+ btree_keys_account_key(_nr, _bset_idx, _k, -1) -+ -+#define btree_account_key_add(_b, _k) \ -+ btree_keys_account_key(&(_b)->nr, \ -+ bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, 1) -+#define btree_account_key_drop(_b, _k) \ -+ btree_keys_account_key(&(_b)->nr, \ -+ bch2_bkey_to_bset(_b, _k) - (_b)->set, _k, -1) -+ -+struct bset_stats { -+ struct { -+ size_t nr, bytes; -+ } sets[BSET_TREE_NR_TYPES]; -+ -+ size_t floats; -+ size_t failed; -+}; -+ -+void bch2_btree_keys_stats(const struct btree *, struct bset_stats *); -+void bch2_bfloat_to_text(struct printbuf *, struct btree *, -+ struct bkey_packed *); -+ -+/* Debug stuff */ -+ -+void bch2_dump_bset(struct bch_fs *, struct btree *, struct bset *, unsigned); -+void bch2_dump_btree_node(struct bch_fs *, struct btree *); -+void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *); -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ -+void __bch2_verify_btree_nr_keys(struct btree *); -+void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *); -+void bch2_verify_insert_pos(struct btree *, struct bkey_packed *, -+ struct bkey_packed *, unsigned); -+ -+#else -+ -+static inline void __bch2_verify_btree_nr_keys(struct btree *b) {} -+static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter, -+ struct btree *b) {} -+static inline void bch2_verify_insert_pos(struct btree *b, -+ struct bkey_packed *where, -+ struct bkey_packed *insert, -+ unsigned clobber_u64s) {} -+#endif -+ -+static inline void bch2_verify_btree_nr_keys(struct btree *b) -+{ -+ if (bch2_debug_check_btree_accounting) -+ __bch2_verify_btree_nr_keys(b); -+} -+ -+#endif /* _BCACHEFS_BSET_H */ -diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c -new file mode 100644 -index 000000000000..0b084fbc478a ---- /dev/null -+++ b/fs/bcachefs/btree_cache.c -@@ -0,0 +1,1215 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "bkey_buf.h" -+#include "btree_cache.h" -+#include "btree_io.h" -+#include "btree_iter.h" -+#include "btree_locking.h" -+#include "debug.h" -+#include "errcode.h" -+#include "error.h" -+#include "trace.h" -+ -+#include -+#include -+ -+const char * const bch2_btree_node_flags[] = { -+#define x(f) #f, -+ BTREE_FLAGS() -+#undef x -+ NULL -+}; -+ -+void bch2_recalc_btree_reserve(struct bch_fs *c) -+{ -+ unsigned i, reserve = 16; -+ -+ if (!c->btree_roots_known[0].b) -+ reserve += 8; -+ -+ for (i = 0; i < btree_id_nr_alive(c); i++) { -+ struct btree_root *r = bch2_btree_id_root(c, i); -+ -+ if (r->b) -+ reserve += min_t(unsigned, 1, r->b->c.level) * 8; -+ } -+ -+ c->btree_cache.reserve = reserve; -+} -+ -+static inline unsigned btree_cache_can_free(struct btree_cache *bc) -+{ -+ return max_t(int, 0, bc->used - bc->reserve); -+} -+ -+static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b) -+{ -+ if (b->c.lock.readers) -+ list_move(&b->list, &bc->freed_pcpu); -+ else -+ list_move(&b->list, &bc->freed_nonpcpu); -+} -+ -+static void btree_node_data_free(struct bch_fs *c, struct btree *b) -+{ -+ struct btree_cache *bc = &c->btree_cache; -+ -+ EBUG_ON(btree_node_write_in_flight(b)); -+ -+ clear_btree_node_just_written(b); -+ -+ kvpfree(b->data, btree_bytes(c)); -+ b->data = NULL; -+#ifdef __KERNEL__ -+ kvfree(b->aux_data); -+#else -+ munmap(b->aux_data, btree_aux_data_bytes(b)); -+#endif -+ b->aux_data = NULL; -+ -+ bc->used--; -+ -+ btree_node_to_freedlist(bc, b); -+} -+ -+static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, -+ const void *obj) -+{ -+ const struct btree *b = obj; -+ const u64 *v = arg->key; -+ -+ return b->hash_val == *v ? 0 : 1; -+} -+ -+static const struct rhashtable_params bch_btree_cache_params = { -+ .head_offset = offsetof(struct btree, hash), -+ .key_offset = offsetof(struct btree, hash_val), -+ .key_len = sizeof(u64), -+ .obj_cmpfn = bch2_btree_cache_cmp_fn, -+}; -+ -+static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) -+{ -+ BUG_ON(b->data || b->aux_data); -+ -+ b->data = kvpmalloc(btree_bytes(c), gfp); -+ if (!b->data) -+ return -BCH_ERR_ENOMEM_btree_node_mem_alloc; -+#ifdef __KERNEL__ -+ b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp); -+#else -+ b->aux_data = mmap(NULL, btree_aux_data_bytes(b), -+ PROT_READ|PROT_WRITE|PROT_EXEC, -+ MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); -+ if (b->aux_data == MAP_FAILED) -+ b->aux_data = NULL; -+#endif -+ if (!b->aux_data) { -+ kvpfree(b->data, btree_bytes(c)); -+ b->data = NULL; -+ return -BCH_ERR_ENOMEM_btree_node_mem_alloc; -+ } -+ -+ return 0; -+} -+ -+static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) -+{ -+ struct btree *b; -+ -+ b = kzalloc(sizeof(struct btree), gfp); -+ if (!b) -+ return NULL; -+ -+ bkey_btree_ptr_init(&b->key); -+ INIT_LIST_HEAD(&b->list); -+ INIT_LIST_HEAD(&b->write_blocked); -+ b->byte_order = ilog2(btree_bytes(c)); -+ return b; -+} -+ -+struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) -+{ -+ struct btree_cache *bc = &c->btree_cache; -+ struct btree *b; -+ -+ b = __btree_node_mem_alloc(c, GFP_KERNEL); -+ if (!b) -+ return NULL; -+ -+ if (btree_node_data_alloc(c, b, GFP_KERNEL)) { -+ kfree(b); -+ return NULL; -+ } -+ -+ bch2_btree_lock_init(&b->c, 0); -+ -+ bc->used++; -+ list_add(&b->list, &bc->freeable); -+ return b; -+} -+ -+/* Btree in memory cache - hash table */ -+ -+void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) -+{ -+ int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); -+ -+ BUG_ON(ret); -+ -+ /* Cause future lookups for this node to fail: */ -+ b->hash_val = 0; -+} -+ -+int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) -+{ -+ BUG_ON(b->hash_val); -+ b->hash_val = btree_ptr_hash_val(&b->key); -+ -+ return rhashtable_lookup_insert_fast(&bc->table, &b->hash, -+ bch_btree_cache_params); -+} -+ -+int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, -+ unsigned level, enum btree_id id) -+{ -+ int ret; -+ -+ b->c.level = level; -+ b->c.btree_id = id; -+ -+ mutex_lock(&bc->lock); -+ ret = __bch2_btree_node_hash_insert(bc, b); -+ if (!ret) -+ list_add_tail(&b->list, &bc->live); -+ mutex_unlock(&bc->lock); -+ -+ return ret; -+} -+ -+__flatten -+static inline struct btree *btree_cache_find(struct btree_cache *bc, -+ const struct bkey_i *k) -+{ -+ u64 v = btree_ptr_hash_val(k); -+ -+ return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params); -+} -+ -+/* -+ * this version is for btree nodes that have already been freed (we're not -+ * reaping a real btree node) -+ */ -+static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) -+{ -+ struct btree_cache *bc = &c->btree_cache; -+ int ret = 0; -+ -+ lockdep_assert_held(&bc->lock); -+wait_on_io: -+ if (b->flags & ((1U << BTREE_NODE_dirty)| -+ (1U << BTREE_NODE_read_in_flight)| -+ (1U << BTREE_NODE_write_in_flight))) { -+ if (!flush) -+ return -BCH_ERR_ENOMEM_btree_node_reclaim; -+ -+ /* XXX: waiting on IO with btree cache lock held */ -+ bch2_btree_node_wait_on_read(b); -+ bch2_btree_node_wait_on_write(b); -+ } -+ -+ if (!six_trylock_intent(&b->c.lock)) -+ return -BCH_ERR_ENOMEM_btree_node_reclaim; -+ -+ if (!six_trylock_write(&b->c.lock)) -+ goto out_unlock_intent; -+ -+ /* recheck under lock */ -+ if (b->flags & ((1U << BTREE_NODE_read_in_flight)| -+ (1U << BTREE_NODE_write_in_flight))) { -+ if (!flush) -+ goto out_unlock; -+ six_unlock_write(&b->c.lock); -+ six_unlock_intent(&b->c.lock); -+ goto wait_on_io; -+ } -+ -+ if (btree_node_noevict(b) || -+ btree_node_write_blocked(b) || -+ btree_node_will_make_reachable(b)) -+ goto out_unlock; -+ -+ if (btree_node_dirty(b)) { -+ if (!flush) -+ goto out_unlock; -+ /* -+ * Using the underscore version because we don't want to compact -+ * bsets after the write, since this node is about to be evicted -+ * - unless btree verify mode is enabled, since it runs out of -+ * the post write cleanup: -+ */ -+ if (bch2_verify_btree_ondisk) -+ bch2_btree_node_write(c, b, SIX_LOCK_intent, -+ BTREE_WRITE_cache_reclaim); -+ else -+ __bch2_btree_node_write(c, b, -+ BTREE_WRITE_cache_reclaim); -+ -+ six_unlock_write(&b->c.lock); -+ six_unlock_intent(&b->c.lock); -+ goto wait_on_io; -+ } -+out: -+ if (b->hash_val && !ret) -+ trace_and_count(c, btree_cache_reap, c, b); -+ return ret; -+out_unlock: -+ six_unlock_write(&b->c.lock); -+out_unlock_intent: -+ six_unlock_intent(&b->c.lock); -+ ret = -BCH_ERR_ENOMEM_btree_node_reclaim; -+ goto out; -+} -+ -+static int btree_node_reclaim(struct bch_fs *c, struct btree *b) -+{ -+ return __btree_node_reclaim(c, b, false); -+} -+ -+static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b) -+{ -+ return __btree_node_reclaim(c, b, true); -+} -+ -+static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, -+ struct shrink_control *sc) -+{ -+ struct bch_fs *c = container_of(shrink, struct bch_fs, -+ btree_cache.shrink); -+ struct btree_cache *bc = &c->btree_cache; -+ struct btree *b, *t; -+ unsigned long nr = sc->nr_to_scan; -+ unsigned long can_free = 0; -+ unsigned long freed = 0; -+ unsigned long touched = 0; -+ unsigned i, flags; -+ unsigned long ret = SHRINK_STOP; -+ bool trigger_writes = atomic_read(&bc->dirty) + nr >= -+ bc->used * 3 / 4; -+ -+ if (bch2_btree_shrinker_disabled) -+ return SHRINK_STOP; -+ -+ mutex_lock(&bc->lock); -+ flags = memalloc_nofs_save(); -+ -+ /* -+ * It's _really_ critical that we don't free too many btree nodes - we -+ * have to always leave ourselves a reserve. The reserve is how we -+ * guarantee that allocating memory for a new btree node can always -+ * succeed, so that inserting keys into the btree can always succeed and -+ * IO can always make forward progress: -+ */ -+ can_free = btree_cache_can_free(bc); -+ nr = min_t(unsigned long, nr, can_free); -+ -+ i = 0; -+ list_for_each_entry_safe(b, t, &bc->freeable, list) { -+ /* -+ * Leave a few nodes on the freeable list, so that a btree split -+ * won't have to hit the system allocator: -+ */ -+ if (++i <= 3) -+ continue; -+ -+ touched++; -+ -+ if (touched >= nr) -+ goto out; -+ -+ if (!btree_node_reclaim(c, b)) { -+ btree_node_data_free(c, b); -+ six_unlock_write(&b->c.lock); -+ six_unlock_intent(&b->c.lock); -+ freed++; -+ } -+ } -+restart: -+ list_for_each_entry_safe(b, t, &bc->live, list) { -+ touched++; -+ -+ if (btree_node_accessed(b)) { -+ clear_btree_node_accessed(b); -+ } else if (!btree_node_reclaim(c, b)) { -+ freed++; -+ btree_node_data_free(c, b); -+ -+ bch2_btree_node_hash_remove(bc, b); -+ six_unlock_write(&b->c.lock); -+ six_unlock_intent(&b->c.lock); -+ -+ if (freed == nr) -+ goto out_rotate; -+ } else if (trigger_writes && -+ btree_node_dirty(b) && -+ !btree_node_will_make_reachable(b) && -+ !btree_node_write_blocked(b) && -+ six_trylock_read(&b->c.lock)) { -+ list_move(&bc->live, &b->list); -+ mutex_unlock(&bc->lock); -+ __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); -+ six_unlock_read(&b->c.lock); -+ if (touched >= nr) -+ goto out_nounlock; -+ mutex_lock(&bc->lock); -+ goto restart; -+ } -+ -+ if (touched >= nr) -+ break; -+ } -+out_rotate: -+ if (&t->list != &bc->live) -+ list_move_tail(&bc->live, &t->list); -+out: -+ mutex_unlock(&bc->lock); -+out_nounlock: -+ ret = freed; -+ memalloc_nofs_restore(flags); -+ trace_and_count(c, btree_cache_scan, sc->nr_to_scan, can_free, ret); -+ return ret; -+} -+ -+static unsigned long bch2_btree_cache_count(struct shrinker *shrink, -+ struct shrink_control *sc) -+{ -+ struct bch_fs *c = container_of(shrink, struct bch_fs, -+ btree_cache.shrink); -+ struct btree_cache *bc = &c->btree_cache; -+ -+ if (bch2_btree_shrinker_disabled) -+ return 0; -+ -+ return btree_cache_can_free(bc); -+} -+ -+void bch2_fs_btree_cache_exit(struct bch_fs *c) -+{ -+ struct btree_cache *bc = &c->btree_cache; -+ struct btree *b; -+ unsigned i, flags; -+ -+ unregister_shrinker(&bc->shrink); -+ -+ /* vfree() can allocate memory: */ -+ flags = memalloc_nofs_save(); -+ mutex_lock(&bc->lock); -+ -+ if (c->verify_data) -+ list_move(&c->verify_data->list, &bc->live); -+ -+ kvpfree(c->verify_ondisk, btree_bytes(c)); -+ -+ for (i = 0; i < btree_id_nr_alive(c); i++) { -+ struct btree_root *r = bch2_btree_id_root(c, i); -+ -+ if (r->b) -+ list_add(&r->b->list, &bc->live); -+ } -+ -+ list_splice(&bc->freeable, &bc->live); -+ -+ while (!list_empty(&bc->live)) { -+ b = list_first_entry(&bc->live, struct btree, list); -+ -+ BUG_ON(btree_node_read_in_flight(b) || -+ btree_node_write_in_flight(b)); -+ -+ if (btree_node_dirty(b)) -+ bch2_btree_complete_write(c, b, btree_current_write(b)); -+ clear_btree_node_dirty_acct(c, b); -+ -+ btree_node_data_free(c, b); -+ } -+ -+ BUG_ON(atomic_read(&c->btree_cache.dirty)); -+ -+ list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); -+ -+ while (!list_empty(&bc->freed_nonpcpu)) { -+ b = list_first_entry(&bc->freed_nonpcpu, struct btree, list); -+ list_del(&b->list); -+ six_lock_exit(&b->c.lock); -+ kfree(b); -+ } -+ -+ mutex_unlock(&bc->lock); -+ memalloc_nofs_restore(flags); -+ -+ if (bc->table_init_done) -+ rhashtable_destroy(&bc->table); -+} -+ -+int bch2_fs_btree_cache_init(struct bch_fs *c) -+{ -+ struct btree_cache *bc = &c->btree_cache; -+ unsigned i; -+ int ret = 0; -+ -+ ret = rhashtable_init(&bc->table, &bch_btree_cache_params); -+ if (ret) -+ goto err; -+ -+ bc->table_init_done = true; -+ -+ bch2_recalc_btree_reserve(c); -+ -+ for (i = 0; i < bc->reserve; i++) -+ if (!__bch2_btree_node_mem_alloc(c)) -+ goto err; -+ -+ list_splice_init(&bc->live, &bc->freeable); -+ -+ mutex_init(&c->verify_lock); -+ -+ bc->shrink.count_objects = bch2_btree_cache_count; -+ bc->shrink.scan_objects = bch2_btree_cache_scan; -+ bc->shrink.seeks = 4; -+ ret = register_shrinker(&bc->shrink, "%s-btree_cache", c->name); -+ if (ret) -+ goto err; -+ -+ return 0; -+err: -+ return -BCH_ERR_ENOMEM_fs_btree_cache_init; -+} -+ -+void bch2_fs_btree_cache_init_early(struct btree_cache *bc) -+{ -+ mutex_init(&bc->lock); -+ INIT_LIST_HEAD(&bc->live); -+ INIT_LIST_HEAD(&bc->freeable); -+ INIT_LIST_HEAD(&bc->freed_pcpu); -+ INIT_LIST_HEAD(&bc->freed_nonpcpu); -+} -+ -+/* -+ * We can only have one thread cannibalizing other cached btree nodes at a time, -+ * or we'll deadlock. We use an open coded mutex to ensure that, which a -+ * cannibalize_bucket() will take. This means every time we unlock the root of -+ * the btree, we need to release this lock if we have it held. -+ */ -+void bch2_btree_cache_cannibalize_unlock(struct bch_fs *c) -+{ -+ struct btree_cache *bc = &c->btree_cache; -+ -+ if (bc->alloc_lock == current) { -+ trace_and_count(c, btree_cache_cannibalize_unlock, c); -+ bc->alloc_lock = NULL; -+ closure_wake_up(&bc->alloc_wait); -+ } -+} -+ -+int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl) -+{ -+ struct btree_cache *bc = &c->btree_cache; -+ struct task_struct *old; -+ -+ old = cmpxchg(&bc->alloc_lock, NULL, current); -+ if (old == NULL || old == current) -+ goto success; -+ -+ if (!cl) { -+ trace_and_count(c, btree_cache_cannibalize_lock_fail, c); -+ return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock; -+ } -+ -+ closure_wait(&bc->alloc_wait, cl); -+ -+ /* Try again, after adding ourselves to waitlist */ -+ old = cmpxchg(&bc->alloc_lock, NULL, current); -+ if (old == NULL || old == current) { -+ /* We raced */ -+ closure_wake_up(&bc->alloc_wait); -+ goto success; -+ } -+ -+ trace_and_count(c, btree_cache_cannibalize_lock_fail, c); -+ return -BCH_ERR_btree_cache_cannibalize_lock_blocked; -+ -+success: -+ trace_and_count(c, btree_cache_cannibalize_lock, c); -+ return 0; -+} -+ -+static struct btree *btree_node_cannibalize(struct bch_fs *c) -+{ -+ struct btree_cache *bc = &c->btree_cache; -+ struct btree *b; -+ -+ list_for_each_entry_reverse(b, &bc->live, list) -+ if (!btree_node_reclaim(c, b)) -+ return b; -+ -+ while (1) { -+ list_for_each_entry_reverse(b, &bc->live, list) -+ if (!btree_node_write_and_reclaim(c, b)) -+ return b; -+ -+ /* -+ * Rare case: all nodes were intent-locked. -+ * Just busy-wait. -+ */ -+ WARN_ONCE(1, "btree cache cannibalize failed\n"); -+ cond_resched(); -+ } -+} -+ -+struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_read_locks) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_cache *bc = &c->btree_cache; -+ struct list_head *freed = pcpu_read_locks -+ ? &bc->freed_pcpu -+ : &bc->freed_nonpcpu; -+ struct btree *b, *b2; -+ u64 start_time = local_clock(); -+ unsigned flags; -+ -+ flags = memalloc_nofs_save(); -+ mutex_lock(&bc->lock); -+ -+ /* -+ * We never free struct btree itself, just the memory that holds the on -+ * disk node. Check the freed list before allocating a new one: -+ */ -+ list_for_each_entry(b, freed, list) -+ if (!btree_node_reclaim(c, b)) { -+ list_del_init(&b->list); -+ goto got_node; -+ } -+ -+ b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN); -+ if (!b) { -+ mutex_unlock(&bc->lock); -+ bch2_trans_unlock(trans); -+ b = __btree_node_mem_alloc(c, GFP_KERNEL); -+ if (!b) -+ goto err; -+ mutex_lock(&bc->lock); -+ } -+ -+ bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0); -+ -+ BUG_ON(!six_trylock_intent(&b->c.lock)); -+ BUG_ON(!six_trylock_write(&b->c.lock)); -+got_node: -+ -+ /* -+ * btree_free() doesn't free memory; it sticks the node on the end of -+ * the list. Check if there's any freed nodes there: -+ */ -+ list_for_each_entry(b2, &bc->freeable, list) -+ if (!btree_node_reclaim(c, b2)) { -+ swap(b->data, b2->data); -+ swap(b->aux_data, b2->aux_data); -+ btree_node_to_freedlist(bc, b2); -+ six_unlock_write(&b2->c.lock); -+ six_unlock_intent(&b2->c.lock); -+ goto got_mem; -+ } -+ -+ mutex_unlock(&bc->lock); -+ -+ if (btree_node_data_alloc(c, b, GFP_NOWAIT|__GFP_NOWARN)) { -+ bch2_trans_unlock(trans); -+ if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN)) -+ goto err; -+ } -+ -+ mutex_lock(&bc->lock); -+ bc->used++; -+got_mem: -+ mutex_unlock(&bc->lock); -+ -+ BUG_ON(btree_node_hashed(b)); -+ BUG_ON(btree_node_dirty(b)); -+ BUG_ON(btree_node_write_in_flight(b)); -+out: -+ b->flags = 0; -+ b->written = 0; -+ b->nsets = 0; -+ b->sib_u64s[0] = 0; -+ b->sib_u64s[1] = 0; -+ b->whiteout_u64s = 0; -+ bch2_btree_keys_init(b); -+ set_btree_node_accessed(b); -+ -+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc], -+ start_time); -+ -+ memalloc_nofs_restore(flags); -+ return b; -+err: -+ mutex_lock(&bc->lock); -+ -+ /* Try to cannibalize another cached btree node: */ -+ if (bc->alloc_lock == current) { -+ b2 = btree_node_cannibalize(c); -+ clear_btree_node_just_written(b2); -+ bch2_btree_node_hash_remove(bc, b2); -+ -+ if (b) { -+ swap(b->data, b2->data); -+ swap(b->aux_data, b2->aux_data); -+ btree_node_to_freedlist(bc, b2); -+ six_unlock_write(&b2->c.lock); -+ six_unlock_intent(&b2->c.lock); -+ } else { -+ b = b2; -+ list_del_init(&b->list); -+ } -+ -+ mutex_unlock(&bc->lock); -+ -+ trace_and_count(c, btree_cache_cannibalize, c); -+ goto out; -+ } -+ -+ mutex_unlock(&bc->lock); -+ memalloc_nofs_restore(flags); -+ return ERR_PTR(-BCH_ERR_ENOMEM_btree_node_mem_alloc); -+} -+ -+/* Slowpath, don't want it inlined into btree_iter_traverse() */ -+static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, -+ struct btree_path *path, -+ const struct bkey_i *k, -+ enum btree_id btree_id, -+ unsigned level, -+ enum six_lock_type lock_type, -+ bool sync) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_cache *bc = &c->btree_cache; -+ struct btree *b; -+ u32 seq; -+ -+ BUG_ON(level + 1 >= BTREE_MAX_DEPTH); -+ /* -+ * Parent node must be locked, else we could read in a btree node that's -+ * been freed: -+ */ -+ if (path && !bch2_btree_node_relock(trans, path, level + 1)) { -+ trace_and_count(c, trans_restart_relock_parent_for_fill, trans, _THIS_IP_, path); -+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock)); -+ } -+ -+ b = bch2_btree_node_mem_alloc(trans, level != 0); -+ -+ if (bch2_err_matches(PTR_ERR_OR_ZERO(b), ENOMEM)) { -+ trans->memory_allocation_failure = true; -+ trace_and_count(c, trans_restart_memory_allocation_failure, trans, _THIS_IP_, path); -+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail)); -+ } -+ -+ if (IS_ERR(b)) -+ return b; -+ -+ /* -+ * Btree nodes read in from disk should not have the accessed bit set -+ * initially, so that linear scans don't thrash the cache: -+ */ -+ clear_btree_node_accessed(b); -+ -+ bkey_copy(&b->key, k); -+ if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) { -+ /* raced with another fill: */ -+ -+ /* mark as unhashed... */ -+ b->hash_val = 0; -+ -+ mutex_lock(&bc->lock); -+ list_add(&b->list, &bc->freeable); -+ mutex_unlock(&bc->lock); -+ -+ six_unlock_write(&b->c.lock); -+ six_unlock_intent(&b->c.lock); -+ return NULL; -+ } -+ -+ set_btree_node_read_in_flight(b); -+ -+ six_unlock_write(&b->c.lock); -+ seq = six_lock_seq(&b->c.lock); -+ six_unlock_intent(&b->c.lock); -+ -+ /* Unlock before doing IO: */ -+ if (path && sync) -+ bch2_trans_unlock_noassert(trans); -+ -+ bch2_btree_node_read(c, b, sync); -+ -+ if (!sync) -+ return NULL; -+ -+ if (path) { -+ int ret = bch2_trans_relock(trans) ?: -+ bch2_btree_path_relock_intent(trans, path); -+ if (ret) { -+ BUG_ON(!trans->restarted); -+ return ERR_PTR(ret); -+ } -+ } -+ -+ if (!six_relock_type(&b->c.lock, lock_type, seq)) { -+ if (path) -+ trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path); -+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill)); -+ } -+ -+ return b; -+} -+ -+static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) -+{ -+ struct printbuf buf = PRINTBUF; -+ -+ if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations) -+ return; -+ -+ prt_printf(&buf, -+ "btree node header doesn't match ptr\n" -+ "btree %s level %u\n" -+ "ptr: ", -+ bch2_btree_id_str(b->c.btree_id), b->c.level); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); -+ -+ prt_printf(&buf, "\nheader: btree %s level %llu\n" -+ "min ", -+ bch2_btree_id_str(BTREE_NODE_ID(b->data)), -+ BTREE_NODE_LEVEL(b->data)); -+ bch2_bpos_to_text(&buf, b->data->min_key); -+ -+ prt_printf(&buf, "\nmax "); -+ bch2_bpos_to_text(&buf, b->data->max_key); -+ -+ bch2_fs_inconsistent(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+} -+ -+static inline void btree_check_header(struct bch_fs *c, struct btree *b) -+{ -+ if (b->c.btree_id != BTREE_NODE_ID(b->data) || -+ b->c.level != BTREE_NODE_LEVEL(b->data) || -+ !bpos_eq(b->data->max_key, b->key.k.p) || -+ (b->key.k.type == KEY_TYPE_btree_ptr_v2 && -+ !bpos_eq(b->data->min_key, -+ bkey_i_to_btree_ptr_v2(&b->key)->v.min_key))) -+ btree_bad_header(c, b); -+} -+ -+static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path, -+ const struct bkey_i *k, unsigned level, -+ enum six_lock_type lock_type, -+ unsigned long trace_ip) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_cache *bc = &c->btree_cache; -+ struct btree *b; -+ struct bset_tree *t; -+ bool need_relock = false; -+ int ret; -+ -+ EBUG_ON(level >= BTREE_MAX_DEPTH); -+retry: -+ b = btree_cache_find(bc, k); -+ if (unlikely(!b)) { -+ /* -+ * We must have the parent locked to call bch2_btree_node_fill(), -+ * else we could read in a btree node from disk that's been -+ * freed: -+ */ -+ b = bch2_btree_node_fill(trans, path, k, path->btree_id, -+ level, lock_type, true); -+ need_relock = true; -+ -+ /* We raced and found the btree node in the cache */ -+ if (!b) -+ goto retry; -+ -+ if (IS_ERR(b)) -+ return b; -+ } else { -+ if (btree_node_read_locked(path, level + 1)) -+ btree_node_unlock(trans, path, level + 1); -+ -+ ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ return ERR_PTR(ret); -+ -+ BUG_ON(ret); -+ -+ if (unlikely(b->hash_val != btree_ptr_hash_val(k) || -+ b->c.level != level || -+ race_fault())) { -+ six_unlock_type(&b->c.lock, lock_type); -+ if (bch2_btree_node_relock(trans, path, level + 1)) -+ goto retry; -+ -+ trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path); -+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused)); -+ } -+ -+ /* avoid atomic set bit if it's not needed: */ -+ if (!btree_node_accessed(b)) -+ set_btree_node_accessed(b); -+ } -+ -+ if (unlikely(btree_node_read_in_flight(b))) { -+ u32 seq = six_lock_seq(&b->c.lock); -+ -+ six_unlock_type(&b->c.lock, lock_type); -+ bch2_trans_unlock(trans); -+ need_relock = true; -+ -+ bch2_btree_node_wait_on_read(b); -+ -+ /* -+ * should_be_locked is not set on this path yet, so we need to -+ * relock it specifically: -+ */ -+ if (!six_relock_type(&b->c.lock, lock_type, seq)) -+ goto retry; -+ } -+ -+ if (unlikely(need_relock)) { -+ ret = bch2_trans_relock(trans) ?: -+ bch2_btree_path_relock_intent(trans, path); -+ if (ret) { -+ six_unlock_type(&b->c.lock, lock_type); -+ return ERR_PTR(ret); -+ } -+ } -+ -+ prefetch(b->aux_data); -+ -+ for_each_bset(b, t) { -+ void *p = (u64 *) b->aux_data + t->aux_data_offset; -+ -+ prefetch(p + L1_CACHE_BYTES * 0); -+ prefetch(p + L1_CACHE_BYTES * 1); -+ prefetch(p + L1_CACHE_BYTES * 2); -+ } -+ -+ if (unlikely(btree_node_read_error(b))) { -+ six_unlock_type(&b->c.lock, lock_type); -+ return ERR_PTR(-EIO); -+ } -+ -+ EBUG_ON(b->c.btree_id != path->btree_id); -+ EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); -+ btree_check_header(c, b); -+ -+ return b; -+} -+ -+/** -+ * bch2_btree_node_get - find a btree node in the cache and lock it, reading it -+ * in from disk if necessary. -+ * -+ * @trans: btree transaction object -+ * @path: btree_path being traversed -+ * @k: pointer to btree node (generally KEY_TYPE_btree_ptr_v2) -+ * @level: level of btree node being looked up (0 == leaf node) -+ * @lock_type: SIX_LOCK_read or SIX_LOCK_intent -+ * @trace_ip: ip of caller of btree iterator code (i.e. caller of bch2_btree_iter_peek()) -+ * -+ * The btree node will have either a read or a write lock held, depending on -+ * the @write parameter. -+ * -+ * Returns: btree node or ERR_PTR() -+ */ -+struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path, -+ const struct bkey_i *k, unsigned level, -+ enum six_lock_type lock_type, -+ unsigned long trace_ip) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree *b; -+ struct bset_tree *t; -+ int ret; -+ -+ EBUG_ON(level >= BTREE_MAX_DEPTH); -+ -+ b = btree_node_mem_ptr(k); -+ -+ /* -+ * Check b->hash_val _before_ calling btree_node_lock() - this might not -+ * be the node we want anymore, and trying to lock the wrong node could -+ * cause an unneccessary transaction restart: -+ */ -+ if (unlikely(!c->opts.btree_node_mem_ptr_optimization || -+ !b || -+ b->hash_val != btree_ptr_hash_val(k))) -+ return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); -+ -+ if (btree_node_read_locked(path, level + 1)) -+ btree_node_unlock(trans, path, level + 1); -+ -+ ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ return ERR_PTR(ret); -+ -+ BUG_ON(ret); -+ -+ if (unlikely(b->hash_val != btree_ptr_hash_val(k) || -+ b->c.level != level || -+ race_fault())) { -+ six_unlock_type(&b->c.lock, lock_type); -+ if (bch2_btree_node_relock(trans, path, level + 1)) -+ return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); -+ -+ trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path); -+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused)); -+ } -+ -+ if (unlikely(btree_node_read_in_flight(b))) { -+ six_unlock_type(&b->c.lock, lock_type); -+ return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); -+ } -+ -+ prefetch(b->aux_data); -+ -+ for_each_bset(b, t) { -+ void *p = (u64 *) b->aux_data + t->aux_data_offset; -+ -+ prefetch(p + L1_CACHE_BYTES * 0); -+ prefetch(p + L1_CACHE_BYTES * 1); -+ prefetch(p + L1_CACHE_BYTES * 2); -+ } -+ -+ /* avoid atomic set bit if it's not needed: */ -+ if (!btree_node_accessed(b)) -+ set_btree_node_accessed(b); -+ -+ if (unlikely(btree_node_read_error(b))) { -+ six_unlock_type(&b->c.lock, lock_type); -+ return ERR_PTR(-EIO); -+ } -+ -+ EBUG_ON(b->c.btree_id != path->btree_id); -+ EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); -+ btree_check_header(c, b); -+ -+ return b; -+} -+ -+struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, -+ const struct bkey_i *k, -+ enum btree_id btree_id, -+ unsigned level, -+ bool nofill) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_cache *bc = &c->btree_cache; -+ struct btree *b; -+ struct bset_tree *t; -+ int ret; -+ -+ EBUG_ON(level >= BTREE_MAX_DEPTH); -+ -+ if (c->opts.btree_node_mem_ptr_optimization) { -+ b = btree_node_mem_ptr(k); -+ if (b) -+ goto lock_node; -+ } -+retry: -+ b = btree_cache_find(bc, k); -+ if (unlikely(!b)) { -+ if (nofill) -+ goto out; -+ -+ b = bch2_btree_node_fill(trans, NULL, k, btree_id, -+ level, SIX_LOCK_read, true); -+ -+ /* We raced and found the btree node in the cache */ -+ if (!b) -+ goto retry; -+ -+ if (IS_ERR(b) && -+ !bch2_btree_cache_cannibalize_lock(c, NULL)) -+ goto retry; -+ -+ if (IS_ERR(b)) -+ goto out; -+ } else { -+lock_node: -+ ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read, _THIS_IP_); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ return ERR_PTR(ret); -+ -+ BUG_ON(ret); -+ -+ if (unlikely(b->hash_val != btree_ptr_hash_val(k) || -+ b->c.btree_id != btree_id || -+ b->c.level != level)) { -+ six_unlock_read(&b->c.lock); -+ goto retry; -+ } -+ } -+ -+ /* XXX: waiting on IO with btree locks held: */ -+ __bch2_btree_node_wait_on_read(b); -+ -+ prefetch(b->aux_data); -+ -+ for_each_bset(b, t) { -+ void *p = (u64 *) b->aux_data + t->aux_data_offset; -+ -+ prefetch(p + L1_CACHE_BYTES * 0); -+ prefetch(p + L1_CACHE_BYTES * 1); -+ prefetch(p + L1_CACHE_BYTES * 2); -+ } -+ -+ /* avoid atomic set bit if it's not needed: */ -+ if (!btree_node_accessed(b)) -+ set_btree_node_accessed(b); -+ -+ if (unlikely(btree_node_read_error(b))) { -+ six_unlock_read(&b->c.lock); -+ b = ERR_PTR(-EIO); -+ goto out; -+ } -+ -+ EBUG_ON(b->c.btree_id != btree_id); -+ EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); -+ btree_check_header(c, b); -+out: -+ bch2_btree_cache_cannibalize_unlock(c); -+ return b; -+} -+ -+int bch2_btree_node_prefetch(struct btree_trans *trans, -+ struct btree_path *path, -+ const struct bkey_i *k, -+ enum btree_id btree_id, unsigned level) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_cache *bc = &c->btree_cache; -+ struct btree *b; -+ -+ BUG_ON(trans && !btree_node_locked(path, level + 1)); -+ BUG_ON(level >= BTREE_MAX_DEPTH); -+ -+ b = btree_cache_find(bc, k); -+ if (b) -+ return 0; -+ -+ b = bch2_btree_node_fill(trans, path, k, btree_id, -+ level, SIX_LOCK_read, false); -+ return PTR_ERR_OR_ZERO(b); -+} -+ -+void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_cache *bc = &c->btree_cache; -+ struct btree *b; -+ -+ b = btree_cache_find(bc, k); -+ if (!b) -+ return; -+wait_on_io: -+ /* not allowed to wait on io with btree locks held: */ -+ -+ /* XXX we're called from btree_gc which will be holding other btree -+ * nodes locked -+ */ -+ __bch2_btree_node_wait_on_read(b); -+ __bch2_btree_node_wait_on_write(b); -+ -+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); -+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); -+ -+ if (btree_node_dirty(b)) { -+ __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); -+ six_unlock_write(&b->c.lock); -+ six_unlock_intent(&b->c.lock); -+ goto wait_on_io; -+ } -+ -+ BUG_ON(btree_node_dirty(b)); -+ -+ mutex_lock(&bc->lock); -+ btree_node_data_free(c, b); -+ bch2_btree_node_hash_remove(bc, b); -+ mutex_unlock(&bc->lock); -+ -+ six_unlock_write(&b->c.lock); -+ six_unlock_intent(&b->c.lock); -+} -+ -+const char *bch2_btree_id_str(enum btree_id btree) -+{ -+ return btree < BTREE_ID_NR ? __bch2_btree_ids[btree] : "(unknown)"; -+} -+ -+void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) -+{ -+ prt_printf(out, "%s level %u/%u\n ", -+ bch2_btree_id_str(b->c.btree_id), -+ b->c.level, -+ bch2_btree_id_root(c, b->c.btree_id)->level); -+ bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); -+} -+ -+void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) -+{ -+ struct bset_stats stats; -+ -+ memset(&stats, 0, sizeof(stats)); -+ -+ bch2_btree_keys_stats(b, &stats); -+ -+ prt_printf(out, "l %u ", b->c.level); -+ bch2_bpos_to_text(out, b->data->min_key); -+ prt_printf(out, " - "); -+ bch2_bpos_to_text(out, b->data->max_key); -+ prt_printf(out, ":\n" -+ " ptrs: "); -+ bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key)); -+ prt_newline(out); -+ -+ prt_printf(out, -+ " format: "); -+ bch2_bkey_format_to_text(out, &b->format); -+ -+ prt_printf(out, -+ " unpack fn len: %u\n" -+ " bytes used %zu/%zu (%zu%% full)\n" -+ " sib u64s: %u, %u (merge threshold %u)\n" -+ " nr packed keys %u\n" -+ " nr unpacked keys %u\n" -+ " floats %zu\n" -+ " failed unpacked %zu\n", -+ b->unpack_fn_len, -+ b->nr.live_u64s * sizeof(u64), -+ btree_bytes(c) - sizeof(struct btree_node), -+ b->nr.live_u64s * 100 / btree_max_u64s(c), -+ b->sib_u64s[0], -+ b->sib_u64s[1], -+ c->btree_foreground_merge_threshold, -+ b->nr.packed_keys, -+ b->nr.unpacked_keys, -+ stats.floats, -+ stats.failed); -+} -+ -+void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c) -+{ -+ prt_printf(out, "nr nodes:\t\t%u\n", c->btree_cache.used); -+ prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty)); -+ prt_printf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock); -+} -diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h -new file mode 100644 -index 000000000000..cfb80b201d61 ---- /dev/null -+++ b/fs/bcachefs/btree_cache.h -@@ -0,0 +1,131 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BTREE_CACHE_H -+#define _BCACHEFS_BTREE_CACHE_H -+ -+#include "bcachefs.h" -+#include "btree_types.h" -+#include "bkey_methods.h" -+ -+extern const char * const bch2_btree_node_flags[]; -+ -+struct btree_iter; -+ -+void bch2_recalc_btree_reserve(struct bch_fs *); -+ -+void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *); -+int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *); -+int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *, -+ unsigned, enum btree_id); -+ -+void bch2_btree_cache_cannibalize_unlock(struct bch_fs *); -+int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *); -+ -+struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *); -+struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool); -+ -+struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *, -+ const struct bkey_i *, unsigned, -+ enum six_lock_type, unsigned long); -+ -+struct btree *bch2_btree_node_get_noiter(struct btree_trans *, const struct bkey_i *, -+ enum btree_id, unsigned, bool); -+ -+int bch2_btree_node_prefetch(struct btree_trans *, struct btree_path *, -+ const struct bkey_i *, enum btree_id, unsigned); -+ -+void bch2_btree_node_evict(struct btree_trans *, const struct bkey_i *); -+ -+void bch2_fs_btree_cache_exit(struct bch_fs *); -+int bch2_fs_btree_cache_init(struct bch_fs *); -+void bch2_fs_btree_cache_init_early(struct btree_cache *); -+ -+static inline u64 btree_ptr_hash_val(const struct bkey_i *k) -+{ -+ switch (k->k.type) { -+ case KEY_TYPE_btree_ptr: -+ return *((u64 *) bkey_i_to_btree_ptr_c(k)->v.start); -+ case KEY_TYPE_btree_ptr_v2: -+ /* -+ * The cast/deref is only necessary to avoid sparse endianness -+ * warnings: -+ */ -+ return *((u64 *) &bkey_i_to_btree_ptr_v2_c(k)->v.seq); -+ default: -+ return 0; -+ } -+} -+ -+static inline struct btree *btree_node_mem_ptr(const struct bkey_i *k) -+{ -+ return k->k.type == KEY_TYPE_btree_ptr_v2 -+ ? (void *)(unsigned long)bkey_i_to_btree_ptr_v2_c(k)->v.mem_ptr -+ : NULL; -+} -+ -+/* is btree node in hash table? */ -+static inline bool btree_node_hashed(struct btree *b) -+{ -+ return b->hash_val != 0; -+} -+ -+#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \ -+ for ((_tbl) = rht_dereference_rcu((_c)->btree_cache.table.tbl, \ -+ &(_c)->btree_cache.table), \ -+ _iter = 0; _iter < (_tbl)->size; _iter++) \ -+ rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash) -+ -+static inline size_t btree_bytes(struct bch_fs *c) -+{ -+ return c->opts.btree_node_size; -+} -+ -+static inline size_t btree_max_u64s(struct bch_fs *c) -+{ -+ return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64); -+} -+ -+static inline size_t btree_pages(struct bch_fs *c) -+{ -+ return btree_bytes(c) / PAGE_SIZE; -+} -+ -+static inline unsigned btree_blocks(struct bch_fs *c) -+{ -+ return btree_sectors(c) >> c->block_bits; -+} -+ -+#define BTREE_SPLIT_THRESHOLD(c) (btree_max_u64s(c) * 2 / 3) -+ -+#define BTREE_FOREGROUND_MERGE_THRESHOLD(c) (btree_max_u64s(c) * 1 / 3) -+#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c) \ -+ (BTREE_FOREGROUND_MERGE_THRESHOLD(c) + \ -+ (BTREE_FOREGROUND_MERGE_THRESHOLD(c) >> 2)) -+ -+static inline unsigned btree_id_nr_alive(struct bch_fs *c) -+{ -+ return BTREE_ID_NR + c->btree_roots_extra.nr; -+} -+ -+static inline struct btree_root *bch2_btree_id_root(struct bch_fs *c, unsigned id) -+{ -+ if (likely(id < BTREE_ID_NR)) { -+ return &c->btree_roots_known[id]; -+ } else { -+ unsigned idx = id - BTREE_ID_NR; -+ -+ EBUG_ON(idx >= c->btree_roots_extra.nr); -+ return &c->btree_roots_extra.data[idx]; -+ } -+} -+ -+static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b) -+{ -+ return bch2_btree_id_root(c, b->c.btree_id)->b; -+} -+ -+const char *bch2_btree_id_str(enum btree_id); -+void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *); -+void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *); -+void bch2_btree_cache_to_text(struct printbuf *, const struct bch_fs *); -+ -+#endif /* _BCACHEFS_BTREE_CACHE_H */ -diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c -new file mode 100644 -index 000000000000..0b5d09c8475d ---- /dev/null -+++ b/fs/bcachefs/btree_gc.c -@@ -0,0 +1,2145 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2010 Kent Overstreet -+ * Copyright (C) 2014 Datera Inc. -+ */ -+ -+#include "bcachefs.h" -+#include "alloc_background.h" -+#include "alloc_foreground.h" -+#include "bkey_methods.h" -+#include "bkey_buf.h" -+#include "btree_journal_iter.h" -+#include "btree_key_cache.h" -+#include "btree_locking.h" -+#include "btree_update_interior.h" -+#include "btree_io.h" -+#include "btree_gc.h" -+#include "buckets.h" -+#include "clock.h" -+#include "debug.h" -+#include "ec.h" -+#include "error.h" -+#include "extents.h" -+#include "journal.h" -+#include "keylist.h" -+#include "move.h" -+#include "recovery.h" -+#include "reflink.h" -+#include "replicas.h" -+#include "super-io.h" -+#include "trace.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define DROP_THIS_NODE 10 -+#define DROP_PREV_NODE 11 -+ -+static bool should_restart_for_topology_repair(struct bch_fs *c) -+{ -+ return c->opts.fix_errors != FSCK_FIX_no && -+ !(c->recovery_passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_topology)); -+} -+ -+static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) -+{ -+ preempt_disable(); -+ write_seqcount_begin(&c->gc_pos_lock); -+ c->gc_pos = new_pos; -+ write_seqcount_end(&c->gc_pos_lock); -+ preempt_enable(); -+} -+ -+static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) -+{ -+ BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0); -+ __gc_pos_set(c, new_pos); -+} -+ -+/* -+ * Missing: if an interior btree node is empty, we need to do something - -+ * perhaps just kill it -+ */ -+static int bch2_gc_check_topology(struct bch_fs *c, -+ struct btree *b, -+ struct bkey_buf *prev, -+ struct bkey_buf cur, -+ bool is_last) -+{ -+ struct bpos node_start = b->data->min_key; -+ struct bpos node_end = b->data->max_key; -+ struct bpos expected_start = bkey_deleted(&prev->k->k) -+ ? node_start -+ : bpos_successor(prev->k->k.p); -+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; -+ int ret = 0; -+ -+ if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) { -+ struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k); -+ -+ if (!bpos_eq(expected_start, bp->v.min_key)) { -+ bch2_topology_error(c); -+ -+ if (bkey_deleted(&prev->k->k)) { -+ prt_printf(&buf1, "start of node: "); -+ bch2_bpos_to_text(&buf1, node_start); -+ } else { -+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(prev->k)); -+ } -+ bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k)); -+ -+ if (__fsck_err(c, -+ FSCK_CAN_FIX| -+ FSCK_CAN_IGNORE| -+ FSCK_NO_RATELIMIT, -+ btree_node_topology_bad_min_key, -+ "btree node with incorrect min_key at btree %s level %u:\n" -+ " prev %s\n" -+ " cur %s", -+ bch2_btree_id_str(b->c.btree_id), b->c.level, -+ buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) { -+ bch_info(c, "Halting mark and sweep to start topology repair pass"); -+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); -+ goto err; -+ } else { -+ set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); -+ } -+ } -+ } -+ -+ if (is_last && !bpos_eq(cur.k->k.p, node_end)) { -+ bch2_topology_error(c); -+ -+ printbuf_reset(&buf1); -+ printbuf_reset(&buf2); -+ -+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k)); -+ bch2_bpos_to_text(&buf2, node_end); -+ -+ if (__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE|FSCK_NO_RATELIMIT, -+ btree_node_topology_bad_max_key, -+ "btree node with incorrect max_key at btree %s level %u:\n" -+ " %s\n" -+ " expected %s", -+ bch2_btree_id_str(b->c.btree_id), b->c.level, -+ buf1.buf, buf2.buf) && -+ should_restart_for_topology_repair(c)) { -+ bch_info(c, "Halting mark and sweep to start topology repair pass"); -+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); -+ goto err; -+ } else { -+ set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); -+ } -+ } -+ -+ bch2_bkey_buf_copy(prev, c, cur.k); -+err: -+fsck_err: -+ printbuf_exit(&buf2); -+ printbuf_exit(&buf1); -+ return ret; -+} -+ -+static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst) -+{ -+ switch (b->key.k.type) { -+ case KEY_TYPE_btree_ptr: { -+ struct bkey_i_btree_ptr *src = bkey_i_to_btree_ptr(&b->key); -+ -+ dst->k.p = src->k.p; -+ dst->v.mem_ptr = 0; -+ dst->v.seq = b->data->keys.seq; -+ dst->v.sectors_written = 0; -+ dst->v.flags = 0; -+ dst->v.min_key = b->data->min_key; -+ set_bkey_val_bytes(&dst->k, sizeof(dst->v) + bkey_val_bytes(&src->k)); -+ memcpy(dst->v.start, src->v.start, bkey_val_bytes(&src->k)); -+ break; -+ } -+ case KEY_TYPE_btree_ptr_v2: -+ bkey_copy(&dst->k_i, &b->key); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static void bch2_btree_node_update_key_early(struct btree_trans *trans, -+ enum btree_id btree, unsigned level, -+ struct bkey_s_c old, struct bkey_i *new) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree *b; -+ struct bkey_buf tmp; -+ int ret; -+ -+ bch2_bkey_buf_init(&tmp); -+ bch2_bkey_buf_reassemble(&tmp, c, old); -+ -+ b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true); -+ if (!IS_ERR_OR_NULL(b)) { -+ mutex_lock(&c->btree_cache.lock); -+ -+ bch2_btree_node_hash_remove(&c->btree_cache, b); -+ -+ bkey_copy(&b->key, new); -+ ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); -+ BUG_ON(ret); -+ -+ mutex_unlock(&c->btree_cache.lock); -+ six_unlock_read(&b->c.lock); -+ } -+ -+ bch2_bkey_buf_exit(&tmp, c); -+} -+ -+static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min) -+{ -+ struct bkey_i_btree_ptr_v2 *new; -+ int ret; -+ -+ new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL); -+ if (!new) -+ return -BCH_ERR_ENOMEM_gc_repair_key; -+ -+ btree_ptr_to_v2(b, new); -+ b->data->min_key = new_min; -+ new->v.min_key = new_min; -+ SET_BTREE_PTR_RANGE_UPDATED(&new->v, true); -+ -+ ret = bch2_journal_key_insert_take(c, b->c.btree_id, b->c.level + 1, &new->k_i); -+ if (ret) { -+ kfree(new); -+ return ret; -+ } -+ -+ bch2_btree_node_drop_keys_outside_node(b); -+ bkey_copy(&b->key, &new->k_i); -+ return 0; -+} -+ -+static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) -+{ -+ struct bkey_i_btree_ptr_v2 *new; -+ int ret; -+ -+ ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p); -+ if (ret) -+ return ret; -+ -+ new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL); -+ if (!new) -+ return -BCH_ERR_ENOMEM_gc_repair_key; -+ -+ btree_ptr_to_v2(b, new); -+ b->data->max_key = new_max; -+ new->k.p = new_max; -+ SET_BTREE_PTR_RANGE_UPDATED(&new->v, true); -+ -+ ret = bch2_journal_key_insert_take(c, b->c.btree_id, b->c.level + 1, &new->k_i); -+ if (ret) { -+ kfree(new); -+ return ret; -+ } -+ -+ bch2_btree_node_drop_keys_outside_node(b); -+ -+ mutex_lock(&c->btree_cache.lock); -+ bch2_btree_node_hash_remove(&c->btree_cache, b); -+ -+ bkey_copy(&b->key, &new->k_i); -+ ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); -+ BUG_ON(ret); -+ mutex_unlock(&c->btree_cache.lock); -+ return 0; -+} -+ -+static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, -+ struct btree *prev, struct btree *cur) -+{ -+ struct bpos expected_start = !prev -+ ? b->data->min_key -+ : bpos_successor(prev->key.k.p); -+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; -+ int ret = 0; -+ -+ if (!prev) { -+ prt_printf(&buf1, "start of node: "); -+ bch2_bpos_to_text(&buf1, b->data->min_key); -+ } else { -+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&prev->key)); -+ } -+ -+ bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&cur->key)); -+ -+ if (prev && -+ bpos_gt(expected_start, cur->data->min_key) && -+ BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) { -+ /* cur overwrites prev: */ -+ -+ if (mustfix_fsck_err_on(bpos_ge(prev->data->min_key, -+ cur->data->min_key), c, -+ btree_node_topology_overwritten_by_next_node, -+ "btree node overwritten by next node at btree %s level %u:\n" -+ " node %s\n" -+ " next %s", -+ bch2_btree_id_str(b->c.btree_id), b->c.level, -+ buf1.buf, buf2.buf)) { -+ ret = DROP_PREV_NODE; -+ goto out; -+ } -+ -+ if (mustfix_fsck_err_on(!bpos_eq(prev->key.k.p, -+ bpos_predecessor(cur->data->min_key)), c, -+ btree_node_topology_bad_max_key, -+ "btree node with incorrect max_key at btree %s level %u:\n" -+ " node %s\n" -+ " next %s", -+ bch2_btree_id_str(b->c.btree_id), b->c.level, -+ buf1.buf, buf2.buf)) -+ ret = set_node_max(c, prev, -+ bpos_predecessor(cur->data->min_key)); -+ } else { -+ /* prev overwrites cur: */ -+ -+ if (mustfix_fsck_err_on(bpos_ge(expected_start, -+ cur->data->max_key), c, -+ btree_node_topology_overwritten_by_prev_node, -+ "btree node overwritten by prev node at btree %s level %u:\n" -+ " prev %s\n" -+ " node %s", -+ bch2_btree_id_str(b->c.btree_id), b->c.level, -+ buf1.buf, buf2.buf)) { -+ ret = DROP_THIS_NODE; -+ goto out; -+ } -+ -+ if (mustfix_fsck_err_on(!bpos_eq(expected_start, cur->data->min_key), c, -+ btree_node_topology_bad_min_key, -+ "btree node with incorrect min_key at btree %s level %u:\n" -+ " prev %s\n" -+ " node %s", -+ bch2_btree_id_str(b->c.btree_id), b->c.level, -+ buf1.buf, buf2.buf)) -+ ret = set_node_min(c, cur, expected_start); -+ } -+out: -+fsck_err: -+ printbuf_exit(&buf2); -+ printbuf_exit(&buf1); -+ return ret; -+} -+ -+static int btree_repair_node_end(struct bch_fs *c, struct btree *b, -+ struct btree *child) -+{ -+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; -+ int ret = 0; -+ -+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&child->key)); -+ bch2_bpos_to_text(&buf2, b->key.k.p); -+ -+ if (mustfix_fsck_err_on(!bpos_eq(child->key.k.p, b->key.k.p), c, -+ btree_node_topology_bad_max_key, -+ "btree node with incorrect max_key at btree %s level %u:\n" -+ " %s\n" -+ " expected %s", -+ bch2_btree_id_str(b->c.btree_id), b->c.level, -+ buf1.buf, buf2.buf)) { -+ ret = set_node_max(c, child, b->key.k.p); -+ if (ret) -+ goto err; -+ } -+err: -+fsck_err: -+ printbuf_exit(&buf2); -+ printbuf_exit(&buf1); -+ return ret; -+} -+ -+static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_and_journal_iter iter; -+ struct bkey_s_c k; -+ struct bkey_buf prev_k, cur_k; -+ struct btree *prev = NULL, *cur = NULL; -+ bool have_child, dropped_children = false; -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ if (!b->c.level) -+ return 0; -+again: -+ prev = NULL; -+ have_child = dropped_children = false; -+ bch2_bkey_buf_init(&prev_k); -+ bch2_bkey_buf_init(&cur_k); -+ bch2_btree_and_journal_iter_init_node_iter(&iter, c, b); -+ -+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { -+ BUG_ON(bpos_lt(k.k->p, b->data->min_key)); -+ BUG_ON(bpos_gt(k.k->p, b->data->max_key)); -+ -+ bch2_btree_and_journal_iter_advance(&iter); -+ bch2_bkey_buf_reassemble(&cur_k, c, k); -+ -+ cur = bch2_btree_node_get_noiter(trans, cur_k.k, -+ b->c.btree_id, b->c.level - 1, -+ false); -+ ret = PTR_ERR_OR_ZERO(cur); -+ -+ printbuf_reset(&buf); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); -+ -+ if (mustfix_fsck_err_on(ret == -EIO, c, -+ btree_node_unreadable, -+ "Topology repair: unreadable btree node at btree %s level %u:\n" -+ " %s", -+ bch2_btree_id_str(b->c.btree_id), -+ b->c.level - 1, -+ buf.buf)) { -+ bch2_btree_node_evict(trans, cur_k.k); -+ ret = bch2_journal_key_delete(c, b->c.btree_id, -+ b->c.level, cur_k.k->k.p); -+ cur = NULL; -+ if (ret) -+ break; -+ continue; -+ } -+ -+ if (ret) { -+ bch_err_msg(c, ret, "getting btree node"); -+ break; -+ } -+ -+ ret = btree_repair_node_boundaries(c, b, prev, cur); -+ -+ if (ret == DROP_THIS_NODE) { -+ six_unlock_read(&cur->c.lock); -+ bch2_btree_node_evict(trans, cur_k.k); -+ ret = bch2_journal_key_delete(c, b->c.btree_id, -+ b->c.level, cur_k.k->k.p); -+ cur = NULL; -+ if (ret) -+ break; -+ continue; -+ } -+ -+ if (prev) -+ six_unlock_read(&prev->c.lock); -+ prev = NULL; -+ -+ if (ret == DROP_PREV_NODE) { -+ bch2_btree_node_evict(trans, prev_k.k); -+ ret = bch2_journal_key_delete(c, b->c.btree_id, -+ b->c.level, prev_k.k->k.p); -+ if (ret) -+ break; -+ -+ bch2_btree_and_journal_iter_exit(&iter); -+ bch2_bkey_buf_exit(&prev_k, c); -+ bch2_bkey_buf_exit(&cur_k, c); -+ goto again; -+ } else if (ret) -+ break; -+ -+ prev = cur; -+ cur = NULL; -+ bch2_bkey_buf_copy(&prev_k, c, cur_k.k); -+ } -+ -+ if (!ret && !IS_ERR_OR_NULL(prev)) { -+ BUG_ON(cur); -+ ret = btree_repair_node_end(c, b, prev); -+ } -+ -+ if (!IS_ERR_OR_NULL(prev)) -+ six_unlock_read(&prev->c.lock); -+ prev = NULL; -+ if (!IS_ERR_OR_NULL(cur)) -+ six_unlock_read(&cur->c.lock); -+ cur = NULL; -+ -+ if (ret) -+ goto err; -+ -+ bch2_btree_and_journal_iter_exit(&iter); -+ bch2_btree_and_journal_iter_init_node_iter(&iter, c, b); -+ -+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { -+ bch2_bkey_buf_reassemble(&cur_k, c, k); -+ bch2_btree_and_journal_iter_advance(&iter); -+ -+ cur = bch2_btree_node_get_noiter(trans, cur_k.k, -+ b->c.btree_id, b->c.level - 1, -+ false); -+ ret = PTR_ERR_OR_ZERO(cur); -+ -+ if (ret) { -+ bch_err_msg(c, ret, "getting btree node"); -+ goto err; -+ } -+ -+ ret = bch2_btree_repair_topology_recurse(trans, cur); -+ six_unlock_read(&cur->c.lock); -+ cur = NULL; -+ -+ if (ret == DROP_THIS_NODE) { -+ bch2_btree_node_evict(trans, cur_k.k); -+ ret = bch2_journal_key_delete(c, b->c.btree_id, -+ b->c.level, cur_k.k->k.p); -+ dropped_children = true; -+ } -+ -+ if (ret) -+ goto err; -+ -+ have_child = true; -+ } -+ -+ printbuf_reset(&buf); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); -+ -+ if (mustfix_fsck_err_on(!have_child, c, -+ btree_node_topology_interior_node_empty, -+ "empty interior btree node at btree %s level %u\n" -+ " %s", -+ bch2_btree_id_str(b->c.btree_id), -+ b->c.level, buf.buf)) -+ ret = DROP_THIS_NODE; -+err: -+fsck_err: -+ if (!IS_ERR_OR_NULL(prev)) -+ six_unlock_read(&prev->c.lock); -+ if (!IS_ERR_OR_NULL(cur)) -+ six_unlock_read(&cur->c.lock); -+ -+ bch2_btree_and_journal_iter_exit(&iter); -+ bch2_bkey_buf_exit(&prev_k, c); -+ bch2_bkey_buf_exit(&cur_k, c); -+ -+ if (!ret && dropped_children) -+ goto again; -+ -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+int bch2_check_topology(struct bch_fs *c) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree *b; -+ unsigned i; -+ int ret = 0; -+ -+ for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) { -+ struct btree_root *r = bch2_btree_id_root(c, i); -+ -+ if (!r->alive) -+ continue; -+ -+ b = r->b; -+ if (btree_node_fake(b)) -+ continue; -+ -+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); -+ ret = bch2_btree_repair_topology_recurse(trans, b); -+ six_unlock_read(&b->c.lock); -+ -+ if (ret == DROP_THIS_NODE) { -+ bch_err(c, "empty btree root - repair unimplemented"); -+ ret = -BCH_ERR_fsck_repair_unimplemented; -+ } -+ } -+ -+ bch2_trans_put(trans); -+ -+ return ret; -+} -+ -+static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id, -+ unsigned level, bool is_root, -+ struct bkey_s_c *k) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_ptrs_c ptrs_c = bch2_bkey_ptrs_c(*k); -+ const union bch_extent_entry *entry_c; -+ struct extent_ptr_decoded p = { 0 }; -+ bool do_update = false; -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ /* -+ * XXX -+ * use check_bucket_ref here -+ */ -+ bkey_for_each_ptr_decode(k->k, ptrs_c, p, entry_c) { -+ struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); -+ struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); -+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr); -+ -+ if (!g->gen_valid && -+ (c->opts.reconstruct_alloc || -+ fsck_err(c, ptr_to_missing_alloc_key, -+ "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n" -+ "while marking %s", -+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), -+ bch2_data_types[ptr_data_type(k->k, &p.ptr)], -+ p.ptr.gen, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) { -+ if (!p.ptr.cached) { -+ g->gen_valid = true; -+ g->gen = p.ptr.gen; -+ } else { -+ do_update = true; -+ } -+ } -+ -+ if (gen_cmp(p.ptr.gen, g->gen) > 0 && -+ (c->opts.reconstruct_alloc || -+ fsck_err(c, ptr_gen_newer_than_bucket_gen, -+ "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" -+ "while marking %s", -+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), -+ bch2_data_types[ptr_data_type(k->k, &p.ptr)], -+ p.ptr.gen, g->gen, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) { -+ if (!p.ptr.cached) { -+ g->gen_valid = true; -+ g->gen = p.ptr.gen; -+ g->data_type = 0; -+ g->dirty_sectors = 0; -+ g->cached_sectors = 0; -+ set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); -+ } else { -+ do_update = true; -+ } -+ } -+ -+ if (gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX && -+ (c->opts.reconstruct_alloc || -+ fsck_err(c, ptr_gen_newer_than_bucket_gen, -+ "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" -+ "while marking %s", -+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, -+ bch2_data_types[ptr_data_type(k->k, &p.ptr)], -+ p.ptr.gen, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) -+ do_update = true; -+ -+ if (!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0 && -+ (c->opts.reconstruct_alloc || -+ fsck_err(c, stale_dirty_ptr, -+ "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n" -+ "while marking %s", -+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), -+ bch2_data_types[ptr_data_type(k->k, &p.ptr)], -+ p.ptr.gen, g->gen, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) -+ do_update = true; -+ -+ if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen) -+ continue; -+ -+ if (fsck_err_on(bucket_data_type(g->data_type) && -+ bucket_data_type(g->data_type) != data_type, c, -+ ptr_bucket_data_type_mismatch, -+ "bucket %u:%zu different types of data in same bucket: %s, %s\n" -+ "while marking %s", -+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), -+ bch2_data_types[g->data_type], -+ bch2_data_types[data_type], -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) { -+ if (data_type == BCH_DATA_btree) { -+ g->data_type = data_type; -+ set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); -+ } else { -+ do_update = true; -+ } -+ } -+ -+ if (p.has_ec) { -+ struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx); -+ -+ if (fsck_err_on(!m || !m->alive, c, -+ ptr_to_missing_stripe, -+ "pointer to nonexistent stripe %llu\n" -+ "while marking %s", -+ (u64) p.ec.idx, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) -+ do_update = true; -+ -+ if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p), c, -+ ptr_to_incorrect_stripe, -+ "pointer does not match stripe %llu\n" -+ "while marking %s", -+ (u64) p.ec.idx, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) -+ do_update = true; -+ } -+ } -+ -+ if (do_update) { -+ struct bkey_ptrs ptrs; -+ union bch_extent_entry *entry; -+ struct bch_extent_ptr *ptr; -+ struct bkey_i *new; -+ -+ if (is_root) { -+ bch_err(c, "cannot update btree roots yet"); -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ new = kmalloc(bkey_bytes(k->k), GFP_KERNEL); -+ if (!new) { -+ bch_err_msg(c, ret, "allocating new key"); -+ ret = -BCH_ERR_ENOMEM_gc_repair_key; -+ goto err; -+ } -+ -+ bkey_reassemble(new, *k); -+ -+ if (level) { -+ /* -+ * We don't want to drop btree node pointers - if the -+ * btree node isn't there anymore, the read path will -+ * sort it out: -+ */ -+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); -+ bkey_for_each_ptr(ptrs, ptr) { -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); -+ struct bucket *g = PTR_GC_BUCKET(ca, ptr); -+ -+ ptr->gen = g->gen; -+ } -+ } else { -+ bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({ -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); -+ struct bucket *g = PTR_GC_BUCKET(ca, ptr); -+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, ptr); -+ -+ (ptr->cached && -+ (!g->gen_valid || gen_cmp(ptr->gen, g->gen) > 0)) || -+ (!ptr->cached && -+ gen_cmp(ptr->gen, g->gen) < 0) || -+ gen_cmp(g->gen, ptr->gen) > BUCKET_GC_GEN_MAX || -+ (g->data_type && -+ g->data_type != data_type); -+ })); -+again: -+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); -+ bkey_extent_entry_for_each(ptrs, entry) { -+ if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) { -+ struct gc_stripe *m = genradix_ptr(&c->gc_stripes, -+ entry->stripe_ptr.idx); -+ union bch_extent_entry *next_ptr; -+ -+ bkey_extent_entry_for_each_from(ptrs, next_ptr, entry) -+ if (extent_entry_type(next_ptr) == BCH_EXTENT_ENTRY_ptr) -+ goto found; -+ next_ptr = NULL; -+found: -+ if (!next_ptr) { -+ bch_err(c, "aieee, found stripe ptr with no data ptr"); -+ continue; -+ } -+ -+ if (!m || !m->alive || -+ !__bch2_ptr_matches_stripe(&m->ptrs[entry->stripe_ptr.block], -+ &next_ptr->ptr, -+ m->sectors)) { -+ bch2_bkey_extent_entry_drop(new, entry); -+ goto again; -+ } -+ } -+ } -+ } -+ -+ ret = bch2_journal_key_insert_take(c, btree_id, level, new); -+ if (ret) { -+ kfree(new); -+ goto err; -+ } -+ -+ if (level) -+ bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new); -+ -+ if (0) { -+ printbuf_reset(&buf); -+ bch2_bkey_val_to_text(&buf, c, *k); -+ bch_info(c, "updated %s", buf.buf); -+ -+ printbuf_reset(&buf); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new)); -+ bch_info(c, "new key %s", buf.buf); -+ } -+ -+ *k = bkey_i_to_s_c(new); -+ } -+err: -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+/* marking of btree keys/nodes: */ -+ -+static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, -+ unsigned level, bool is_root, -+ struct bkey_s_c *k, -+ bool initial) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey deleted = KEY(0, 0, 0); -+ struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL }; -+ unsigned flags = -+ BTREE_TRIGGER_GC| -+ (initial ? BTREE_TRIGGER_NOATOMIC : 0); -+ int ret = 0; -+ -+ deleted.p = k->k->p; -+ -+ if (initial) { -+ BUG_ON(bch2_journal_seq_verify && -+ k->k->version.lo > atomic64_read(&c->journal.seq)); -+ -+ ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k); -+ if (ret) -+ goto err; -+ -+ if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c, -+ bkey_version_in_future, -+ "key version number higher than recorded: %llu > %llu", -+ k->k->version.lo, -+ atomic64_read(&c->key_version))) -+ atomic64_set(&c->key_version, k->k->version.lo); -+ } -+ -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_mark_key(trans, btree_id, level, old, *k, flags)); -+fsck_err: -+err: -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, bool initial) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_node_iter iter; -+ struct bkey unpacked; -+ struct bkey_s_c k; -+ struct bkey_buf prev, cur; -+ int ret = 0; -+ -+ if (!btree_node_type_needs_gc(btree_node_type(b))) -+ return 0; -+ -+ bch2_btree_node_iter_init_from_start(&iter, b); -+ bch2_bkey_buf_init(&prev); -+ bch2_bkey_buf_init(&cur); -+ bkey_init(&prev.k->k); -+ -+ while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) { -+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false, -+ &k, initial); -+ if (ret) -+ break; -+ -+ bch2_btree_node_iter_advance(&iter, b); -+ -+ if (b->c.level) { -+ bch2_bkey_buf_reassemble(&cur, c, k); -+ -+ ret = bch2_gc_check_topology(c, b, &prev, cur, -+ bch2_btree_node_iter_end(&iter)); -+ if (ret) -+ break; -+ } -+ } -+ -+ bch2_bkey_buf_exit(&cur, c); -+ bch2_bkey_buf_exit(&prev, c); -+ return ret; -+} -+ -+static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree_id, -+ bool initial, bool metadata_only) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct btree *b; -+ unsigned depth = metadata_only ? 1 : 0; -+ int ret = 0; -+ -+ gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0)); -+ -+ __for_each_btree_node(trans, iter, btree_id, POS_MIN, -+ 0, depth, BTREE_ITER_PREFETCH, b, ret) { -+ bch2_verify_btree_nr_keys(b); -+ -+ gc_pos_set(c, gc_pos_btree_node(b)); -+ -+ ret = btree_gc_mark_node(trans, b, initial); -+ if (ret) -+ break; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (ret) -+ return ret; -+ -+ mutex_lock(&c->btree_root_lock); -+ b = bch2_btree_id_root(c, btree_id)->b; -+ if (!btree_node_fake(b)) { -+ struct bkey_s_c k = bkey_i_to_s_c(&b->key); -+ -+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1, -+ true, &k, initial); -+ } -+ gc_pos_set(c, gc_pos_btree_root(b->c.btree_id)); -+ mutex_unlock(&c->btree_root_lock); -+ -+ return ret; -+} -+ -+static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b, -+ unsigned target_depth) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_and_journal_iter iter; -+ struct bkey_s_c k; -+ struct bkey_buf cur, prev; -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ bch2_btree_and_journal_iter_init_node_iter(&iter, c, b); -+ bch2_bkey_buf_init(&prev); -+ bch2_bkey_buf_init(&cur); -+ bkey_init(&prev.k->k); -+ -+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { -+ BUG_ON(bpos_lt(k.k->p, b->data->min_key)); -+ BUG_ON(bpos_gt(k.k->p, b->data->max_key)); -+ -+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, -+ false, &k, true); -+ if (ret) -+ goto fsck_err; -+ -+ if (b->c.level) { -+ bch2_bkey_buf_reassemble(&cur, c, k); -+ k = bkey_i_to_s_c(cur.k); -+ -+ bch2_btree_and_journal_iter_advance(&iter); -+ -+ ret = bch2_gc_check_topology(c, b, -+ &prev, cur, -+ !bch2_btree_and_journal_iter_peek(&iter).k); -+ if (ret) -+ goto fsck_err; -+ } else { -+ bch2_btree_and_journal_iter_advance(&iter); -+ } -+ } -+ -+ if (b->c.level > target_depth) { -+ bch2_btree_and_journal_iter_exit(&iter); -+ bch2_btree_and_journal_iter_init_node_iter(&iter, c, b); -+ -+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { -+ struct btree *child; -+ -+ bch2_bkey_buf_reassemble(&cur, c, k); -+ bch2_btree_and_journal_iter_advance(&iter); -+ -+ child = bch2_btree_node_get_noiter(trans, cur.k, -+ b->c.btree_id, b->c.level - 1, -+ false); -+ ret = PTR_ERR_OR_ZERO(child); -+ -+ if (ret == -EIO) { -+ bch2_topology_error(c); -+ -+ if (__fsck_err(c, -+ FSCK_CAN_FIX| -+ FSCK_CAN_IGNORE| -+ FSCK_NO_RATELIMIT, -+ btree_node_read_error, -+ "Unreadable btree node at btree %s level %u:\n" -+ " %s", -+ bch2_btree_id_str(b->c.btree_id), -+ b->c.level - 1, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) && -+ should_restart_for_topology_repair(c)) { -+ bch_info(c, "Halting mark and sweep to start topology repair pass"); -+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); -+ goto fsck_err; -+ } else { -+ /* Continue marking when opted to not -+ * fix the error: */ -+ ret = 0; -+ set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); -+ continue; -+ } -+ } else if (ret) { -+ bch_err_msg(c, ret, "getting btree node"); -+ break; -+ } -+ -+ ret = bch2_gc_btree_init_recurse(trans, child, -+ target_depth); -+ six_unlock_read(&child->c.lock); -+ -+ if (ret) -+ break; -+ } -+ } -+fsck_err: -+ bch2_bkey_buf_exit(&cur, c); -+ bch2_bkey_buf_exit(&prev, c); -+ bch2_btree_and_journal_iter_exit(&iter); -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static int bch2_gc_btree_init(struct btree_trans *trans, -+ enum btree_id btree_id, -+ bool metadata_only) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree *b; -+ unsigned target_depth = metadata_only ? 1 : 0; -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ b = bch2_btree_id_root(c, btree_id)->b; -+ -+ if (btree_node_fake(b)) -+ return 0; -+ -+ six_lock_read(&b->c.lock, NULL, NULL); -+ printbuf_reset(&buf); -+ bch2_bpos_to_text(&buf, b->data->min_key); -+ if (mustfix_fsck_err_on(!bpos_eq(b->data->min_key, POS_MIN), c, -+ btree_root_bad_min_key, -+ "btree root with incorrect min_key: %s", buf.buf)) { -+ bch_err(c, "repair unimplemented"); -+ ret = -BCH_ERR_fsck_repair_unimplemented; -+ goto fsck_err; -+ } -+ -+ printbuf_reset(&buf); -+ bch2_bpos_to_text(&buf, b->data->max_key); -+ if (mustfix_fsck_err_on(!bpos_eq(b->data->max_key, SPOS_MAX), c, -+ btree_root_bad_max_key, -+ "btree root with incorrect max_key: %s", buf.buf)) { -+ bch_err(c, "repair unimplemented"); -+ ret = -BCH_ERR_fsck_repair_unimplemented; -+ goto fsck_err; -+ } -+ -+ if (b->c.level >= target_depth) -+ ret = bch2_gc_btree_init_recurse(trans, b, target_depth); -+ -+ if (!ret) { -+ struct bkey_s_c k = bkey_i_to_s_c(&b->key); -+ -+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1, true, -+ &k, true); -+ } -+fsck_err: -+ six_unlock_read(&b->c.lock); -+ -+ if (ret < 0) -+ bch_err_fn(c, ret); -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r) -+{ -+ return (int) btree_id_to_gc_phase(l) - -+ (int) btree_id_to_gc_phase(r); -+} -+ -+static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ enum btree_id ids[BTREE_ID_NR]; -+ unsigned i; -+ int ret = 0; -+ -+ for (i = 0; i < BTREE_ID_NR; i++) -+ ids[i] = i; -+ bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp); -+ -+ for (i = 0; i < BTREE_ID_NR && !ret; i++) -+ ret = initial -+ ? bch2_gc_btree_init(trans, ids[i], metadata_only) -+ : bch2_gc_btree(trans, ids[i], initial, metadata_only); -+ -+ for (i = BTREE_ID_NR; i < btree_id_nr_alive(c) && !ret; i++) { -+ if (!bch2_btree_id_root(c, i)->alive) -+ continue; -+ -+ ret = initial -+ ? bch2_gc_btree_init(trans, i, metadata_only) -+ : bch2_gc_btree(trans, i, initial, metadata_only); -+ } -+ -+ if (ret < 0) -+ bch_err_fn(c, ret); -+ -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca, -+ u64 start, u64 end, -+ enum bch_data_type type, -+ unsigned flags) -+{ -+ u64 b = sector_to_bucket(ca, start); -+ -+ do { -+ unsigned sectors = -+ min_t(u64, bucket_to_sector(ca, b + 1), end) - start; -+ -+ bch2_mark_metadata_bucket(c, ca, b, type, sectors, -+ gc_phase(GC_PHASE_SB), flags); -+ b++; -+ start += sectors; -+ } while (start < end); -+} -+ -+static void bch2_mark_dev_superblock(struct bch_fs *c, struct bch_dev *ca, -+ unsigned flags) -+{ -+ struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; -+ unsigned i; -+ u64 b; -+ -+ for (i = 0; i < layout->nr_superblocks; i++) { -+ u64 offset = le64_to_cpu(layout->sb_offset[i]); -+ -+ if (offset == BCH_SB_SECTOR) -+ mark_metadata_sectors(c, ca, 0, BCH_SB_SECTOR, -+ BCH_DATA_sb, flags); -+ -+ mark_metadata_sectors(c, ca, offset, -+ offset + (1 << layout->sb_max_size_bits), -+ BCH_DATA_sb, flags); -+ } -+ -+ for (i = 0; i < ca->journal.nr; i++) { -+ b = ca->journal.buckets[i]; -+ bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_journal, -+ ca->mi.bucket_size, -+ gc_phase(GC_PHASE_SB), flags); -+ } -+} -+ -+static void bch2_mark_superblocks(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ unsigned i; -+ -+ mutex_lock(&c->sb_lock); -+ gc_pos_set(c, gc_phase(GC_PHASE_SB)); -+ -+ for_each_online_member(ca, c, i) -+ bch2_mark_dev_superblock(c, ca, BTREE_TRIGGER_GC); -+ mutex_unlock(&c->sb_lock); -+} -+ -+#if 0 -+/* Also see bch2_pending_btree_node_free_insert_done() */ -+static void bch2_mark_pending_btree_node_frees(struct bch_fs *c) -+{ -+ struct btree_update *as; -+ struct pending_btree_node_free *d; -+ -+ mutex_lock(&c->btree_interior_update_lock); -+ gc_pos_set(c, gc_phase(GC_PHASE_PENDING_DELETE)); -+ -+ for_each_pending_btree_node_free(c, as, d) -+ if (d->index_update_done) -+ bch2_mark_key(c, bkey_i_to_s_c(&d->key), BTREE_TRIGGER_GC); -+ -+ mutex_unlock(&c->btree_interior_update_lock); -+} -+#endif -+ -+static void bch2_gc_free(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ unsigned i; -+ -+ genradix_free(&c->reflink_gc_table); -+ genradix_free(&c->gc_stripes); -+ -+ for_each_member_device(ca, c, i) { -+ kvpfree(rcu_dereference_protected(ca->buckets_gc, 1), -+ sizeof(struct bucket_array) + -+ ca->mi.nbuckets * sizeof(struct bucket)); -+ ca->buckets_gc = NULL; -+ -+ free_percpu(ca->usage_gc); -+ ca->usage_gc = NULL; -+ } -+ -+ free_percpu(c->usage_gc); -+ c->usage_gc = NULL; -+} -+ -+static int bch2_gc_done(struct bch_fs *c, -+ bool initial, bool metadata_only) -+{ -+ struct bch_dev *ca = NULL; -+ struct printbuf buf = PRINTBUF; -+ bool verify = !metadata_only && -+ !c->opts.reconstruct_alloc && -+ (!initial || (c->sb.compat & (1ULL << BCH_COMPAT_alloc_info))); -+ unsigned i, dev; -+ int ret = 0; -+ -+ percpu_down_write(&c->mark_lock); -+ -+#define copy_field(_err, _f, _msg, ...) \ -+ if (dst->_f != src->_f && \ -+ (!verify || \ -+ fsck_err(c, _err, _msg ": got %llu, should be %llu" \ -+ , ##__VA_ARGS__, dst->_f, src->_f))) \ -+ dst->_f = src->_f -+#define copy_dev_field(_err, _f, _msg, ...) \ -+ copy_field(_err, _f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__) -+#define copy_fs_field(_err, _f, _msg, ...) \ -+ copy_field(_err, _f, "fs has wrong " _msg, ##__VA_ARGS__) -+ -+ for (i = 0; i < ARRAY_SIZE(c->usage); i++) -+ bch2_fs_usage_acc_to_base(c, i); -+ -+ for_each_member_device(ca, c, dev) { -+ struct bch_dev_usage *dst = ca->usage_base; -+ struct bch_dev_usage *src = (void *) -+ bch2_acc_percpu_u64s((u64 __percpu *) ca->usage_gc, -+ dev_usage_u64s()); -+ -+ for (i = 0; i < BCH_DATA_NR; i++) { -+ copy_dev_field(dev_usage_buckets_wrong, -+ d[i].buckets, "%s buckets", bch2_data_types[i]); -+ copy_dev_field(dev_usage_sectors_wrong, -+ d[i].sectors, "%s sectors", bch2_data_types[i]); -+ copy_dev_field(dev_usage_fragmented_wrong, -+ d[i].fragmented, "%s fragmented", bch2_data_types[i]); -+ } -+ -+ copy_dev_field(dev_usage_buckets_ec_wrong, -+ buckets_ec, "buckets_ec"); -+ } -+ -+ { -+ unsigned nr = fs_usage_u64s(c); -+ struct bch_fs_usage *dst = c->usage_base; -+ struct bch_fs_usage *src = (void *) -+ bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr); -+ -+ copy_fs_field(fs_usage_hidden_wrong, -+ hidden, "hidden"); -+ copy_fs_field(fs_usage_btree_wrong, -+ btree, "btree"); -+ -+ if (!metadata_only) { -+ copy_fs_field(fs_usage_data_wrong, -+ data, "data"); -+ copy_fs_field(fs_usage_cached_wrong, -+ cached, "cached"); -+ copy_fs_field(fs_usage_reserved_wrong, -+ reserved, "reserved"); -+ copy_fs_field(fs_usage_nr_inodes_wrong, -+ nr_inodes,"nr_inodes"); -+ -+ for (i = 0; i < BCH_REPLICAS_MAX; i++) -+ copy_fs_field(fs_usage_persistent_reserved_wrong, -+ persistent_reserved[i], -+ "persistent_reserved[%i]", i); -+ } -+ -+ for (i = 0; i < c->replicas.nr; i++) { -+ struct bch_replicas_entry *e = -+ cpu_replicas_entry(&c->replicas, i); -+ -+ if (metadata_only && -+ (e->data_type == BCH_DATA_user || -+ e->data_type == BCH_DATA_cached)) -+ continue; -+ -+ printbuf_reset(&buf); -+ bch2_replicas_entry_to_text(&buf, e); -+ -+ copy_fs_field(fs_usage_replicas_wrong, -+ replicas[i], "%s", buf.buf); -+ } -+ } -+ -+#undef copy_fs_field -+#undef copy_dev_field -+#undef copy_stripe_field -+#undef copy_field -+fsck_err: -+ if (ca) -+ percpu_ref_put(&ca->ref); -+ if (ret) -+ bch_err_fn(c, ret); -+ -+ percpu_up_write(&c->mark_lock); -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static int bch2_gc_start(struct bch_fs *c) -+{ -+ struct bch_dev *ca = NULL; -+ unsigned i; -+ -+ BUG_ON(c->usage_gc); -+ -+ c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64), -+ sizeof(u64), GFP_KERNEL); -+ if (!c->usage_gc) { -+ bch_err(c, "error allocating c->usage_gc"); -+ return -BCH_ERR_ENOMEM_gc_start; -+ } -+ -+ for_each_member_device(ca, c, i) { -+ BUG_ON(ca->usage_gc); -+ -+ ca->usage_gc = alloc_percpu(struct bch_dev_usage); -+ if (!ca->usage_gc) { -+ bch_err(c, "error allocating ca->usage_gc"); -+ percpu_ref_put(&ca->ref); -+ return -BCH_ERR_ENOMEM_gc_start; -+ } -+ -+ this_cpu_write(ca->usage_gc->d[BCH_DATA_free].buckets, -+ ca->mi.nbuckets - ca->mi.first_bucket); -+ } -+ -+ return 0; -+} -+ -+static int bch2_gc_reset(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ unsigned i; -+ -+ for_each_member_device(ca, c, i) { -+ free_percpu(ca->usage_gc); -+ ca->usage_gc = NULL; -+ } -+ -+ free_percpu(c->usage_gc); -+ c->usage_gc = NULL; -+ -+ return bch2_gc_start(c); -+} -+ -+/* returns true if not equal */ -+static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l, -+ struct bch_alloc_v4 r) -+{ -+ return l.gen != r.gen || -+ l.oldest_gen != r.oldest_gen || -+ l.data_type != r.data_type || -+ l.dirty_sectors != r.dirty_sectors || -+ l.cached_sectors != r.cached_sectors || -+ l.stripe_redundancy != r.stripe_redundancy || -+ l.stripe != r.stripe; -+} -+ -+static int bch2_alloc_write_key(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k, -+ bool metadata_only) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode); -+ struct bucket gc, *b; -+ struct bkey_i_alloc_v4 *a; -+ struct bch_alloc_v4 old_convert, new; -+ const struct bch_alloc_v4 *old; -+ enum bch_data_type type; -+ int ret; -+ -+ if (bkey_ge(iter->pos, POS(ca->dev_idx, ca->mi.nbuckets))) -+ return 1; -+ -+ old = bch2_alloc_to_v4(k, &old_convert); -+ new = *old; -+ -+ percpu_down_read(&c->mark_lock); -+ b = gc_bucket(ca, iter->pos.offset); -+ -+ /* -+ * b->data_type doesn't yet include need_discard & need_gc_gen states - -+ * fix that here: -+ */ -+ type = __alloc_data_type(b->dirty_sectors, -+ b->cached_sectors, -+ b->stripe, -+ *old, -+ b->data_type); -+ if (b->data_type != type) { -+ struct bch_dev_usage *u; -+ -+ preempt_disable(); -+ u = this_cpu_ptr(ca->usage_gc); -+ u->d[b->data_type].buckets--; -+ b->data_type = type; -+ u->d[b->data_type].buckets++; -+ preempt_enable(); -+ } -+ -+ gc = *b; -+ percpu_up_read(&c->mark_lock); -+ -+ if (metadata_only && -+ gc.data_type != BCH_DATA_sb && -+ gc.data_type != BCH_DATA_journal && -+ gc.data_type != BCH_DATA_btree) -+ return 0; -+ -+ if (gen_after(old->gen, gc.gen)) -+ return 0; -+ -+ if (c->opts.reconstruct_alloc || -+ fsck_err_on(new.data_type != gc.data_type, c, -+ alloc_key_data_type_wrong, -+ "bucket %llu:%llu gen %u has wrong data_type" -+ ": got %s, should be %s", -+ iter->pos.inode, iter->pos.offset, -+ gc.gen, -+ bch2_data_types[new.data_type], -+ bch2_data_types[gc.data_type])) -+ new.data_type = gc.data_type; -+ -+#define copy_bucket_field(_errtype, _f) \ -+ if (c->opts.reconstruct_alloc || \ -+ fsck_err_on(new._f != gc._f, c, _errtype, \ -+ "bucket %llu:%llu gen %u data type %s has wrong " #_f \ -+ ": got %u, should be %u", \ -+ iter->pos.inode, iter->pos.offset, \ -+ gc.gen, \ -+ bch2_data_types[gc.data_type], \ -+ new._f, gc._f)) \ -+ new._f = gc._f; \ -+ -+ copy_bucket_field(alloc_key_gen_wrong, -+ gen); -+ copy_bucket_field(alloc_key_dirty_sectors_wrong, -+ dirty_sectors); -+ copy_bucket_field(alloc_key_cached_sectors_wrong, -+ cached_sectors); -+ copy_bucket_field(alloc_key_stripe_wrong, -+ stripe); -+ copy_bucket_field(alloc_key_stripe_redundancy_wrong, -+ stripe_redundancy); -+#undef copy_bucket_field -+ -+ if (!bch2_alloc_v4_cmp(*old, new)) -+ return 0; -+ -+ a = bch2_alloc_to_v4_mut(trans, k); -+ ret = PTR_ERR_OR_ZERO(a); -+ if (ret) -+ return ret; -+ -+ a->v = new; -+ -+ /* -+ * The trigger normally makes sure this is set, but we're not running -+ * triggers: -+ */ -+ if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ]) -+ a->v.io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); -+ -+ ret = bch2_trans_update(trans, iter, &a->k_i, BTREE_TRIGGER_NORUN); -+fsck_err: -+ return ret; -+} -+ -+static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bch_dev *ca; -+ unsigned i; -+ int ret = 0; -+ -+ for_each_member_device(ca, c, i) { -+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, -+ POS(ca->dev_idx, ca->mi.first_bucket), -+ BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k, -+ NULL, NULL, BTREE_INSERT_LAZY_RW, -+ bch2_alloc_write_key(trans, &iter, k, metadata_only)); -+ -+ if (ret < 0) { -+ bch_err_fn(c, ret); -+ percpu_ref_put(&ca->ref); -+ break; -+ } -+ } -+ -+ bch2_trans_put(trans); -+ return ret < 0 ? ret : 0; -+} -+ -+static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) -+{ -+ struct bch_dev *ca; -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bucket *g; -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a; -+ unsigned i; -+ int ret; -+ -+ for_each_member_device(ca, c, i) { -+ struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) + -+ ca->mi.nbuckets * sizeof(struct bucket), -+ GFP_KERNEL|__GFP_ZERO); -+ if (!buckets) { -+ percpu_ref_put(&ca->ref); -+ bch_err(c, "error allocating ca->buckets[gc]"); -+ ret = -BCH_ERR_ENOMEM_gc_alloc_start; -+ goto err; -+ } -+ -+ buckets->first_bucket = ca->mi.first_bucket; -+ buckets->nbuckets = ca->mi.nbuckets; -+ rcu_assign_pointer(ca->buckets_gc, buckets); -+ } -+ -+ for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, -+ BTREE_ITER_PREFETCH, k, ret) { -+ ca = bch_dev_bkey_exists(c, k.k->p.inode); -+ g = gc_bucket(ca, k.k->p.offset); -+ -+ a = bch2_alloc_to_v4(k, &a_convert); -+ -+ g->gen_valid = 1; -+ g->gen = a->gen; -+ -+ if (metadata_only && -+ (a->data_type == BCH_DATA_user || -+ a->data_type == BCH_DATA_cached || -+ a->data_type == BCH_DATA_parity)) { -+ g->data_type = a->data_type; -+ g->dirty_sectors = a->dirty_sectors; -+ g->cached_sectors = a->cached_sectors; -+ g->stripe = a->stripe; -+ g->stripe_redundancy = a->stripe_redundancy; -+ } -+ } -+ bch2_trans_iter_exit(trans, &iter); -+err: -+ bch2_trans_put(trans); -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only) -+{ -+ struct bch_dev *ca; -+ unsigned i; -+ -+ for_each_member_device(ca, c, i) { -+ struct bucket_array *buckets = gc_bucket_array(ca); -+ struct bucket *g; -+ -+ for_each_bucket(g, buckets) { -+ if (metadata_only && -+ (g->data_type == BCH_DATA_user || -+ g->data_type == BCH_DATA_cached || -+ g->data_type == BCH_DATA_parity)) -+ continue; -+ g->data_type = 0; -+ g->dirty_sectors = 0; -+ g->cached_sectors = 0; -+ } -+ } -+} -+ -+static int bch2_gc_write_reflink_key(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k, -+ size_t *idx) -+{ -+ struct bch_fs *c = trans->c; -+ const __le64 *refcount = bkey_refcount_c(k); -+ struct printbuf buf = PRINTBUF; -+ struct reflink_gc *r; -+ int ret = 0; -+ -+ if (!refcount) -+ return 0; -+ -+ while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) && -+ r->offset < k.k->p.offset) -+ ++*idx; -+ -+ if (!r || -+ r->offset != k.k->p.offset || -+ r->size != k.k->size) { -+ bch_err(c, "unexpected inconsistency walking reflink table at gc finish"); -+ return -EINVAL; -+ } -+ -+ if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c, -+ reflink_v_refcount_wrong, -+ "reflink key has wrong refcount:\n" -+ " %s\n" -+ " should be %u", -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf), -+ r->refcount)) { -+ struct bkey_i *new = bch2_bkey_make_mut(trans, iter, &k, 0); -+ -+ ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ return ret; -+ -+ if (!r->refcount) -+ new->k.type = KEY_TYPE_deleted; -+ else -+ *bkey_refcount(new) = cpu_to_le64(r->refcount); -+ } -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) -+{ -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ size_t idx = 0; -+ int ret = 0; -+ -+ if (metadata_only) -+ return 0; -+ -+ trans = bch2_trans_get(c); -+ -+ ret = for_each_btree_key_commit(trans, iter, -+ BTREE_ID_reflink, POS_MIN, -+ BTREE_ITER_PREFETCH, k, -+ NULL, NULL, BTREE_INSERT_NOFAIL, -+ bch2_gc_write_reflink_key(trans, &iter, k, &idx)); -+ -+ c->reflink_gc_nr = 0; -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static int bch2_gc_reflink_start(struct bch_fs *c, -+ bool metadata_only) -+{ -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct reflink_gc *r; -+ int ret = 0; -+ -+ if (metadata_only) -+ return 0; -+ -+ trans = bch2_trans_get(c); -+ c->reflink_gc_nr = 0; -+ -+ for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, -+ BTREE_ITER_PREFETCH, k, ret) { -+ const __le64 *refcount = bkey_refcount_c(k); -+ -+ if (!refcount) -+ continue; -+ -+ r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++, -+ GFP_KERNEL); -+ if (!r) { -+ ret = -BCH_ERR_ENOMEM_gc_reflink_start; -+ break; -+ } -+ -+ r->offset = k.k->p.offset; -+ r->size = k.k->size; -+ r->refcount = 0; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static void bch2_gc_reflink_reset(struct bch_fs *c, bool metadata_only) -+{ -+ struct genradix_iter iter; -+ struct reflink_gc *r; -+ -+ genradix_for_each(&c->reflink_gc_table, iter, r) -+ r->refcount = 0; -+} -+ -+static int bch2_gc_write_stripes_key(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ struct printbuf buf = PRINTBUF; -+ const struct bch_stripe *s; -+ struct gc_stripe *m; -+ bool bad = false; -+ unsigned i; -+ int ret = 0; -+ -+ if (k.k->type != KEY_TYPE_stripe) -+ return 0; -+ -+ s = bkey_s_c_to_stripe(k).v; -+ m = genradix_ptr(&c->gc_stripes, k.k->p.offset); -+ -+ for (i = 0; i < s->nr_blocks; i++) { -+ u32 old = stripe_blockcount_get(s, i); -+ u32 new = (m ? m->block_sectors[i] : 0); -+ -+ if (old != new) { -+ prt_printf(&buf, "stripe block %u has wrong sector count: got %u, should be %u\n", -+ i, old, new); -+ bad = true; -+ } -+ } -+ -+ if (bad) -+ bch2_bkey_val_to_text(&buf, c, k); -+ -+ if (fsck_err_on(bad, c, stripe_sector_count_wrong, -+ "%s", buf.buf)) { -+ struct bkey_i_stripe *new; -+ -+ new = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); -+ ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ return ret; -+ -+ bkey_reassemble(&new->k_i, k); -+ -+ for (i = 0; i < new->v.nr_blocks; i++) -+ stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0); -+ -+ ret = bch2_trans_update(trans, iter, &new->k_i, 0); -+ } -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only) -+{ -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ if (metadata_only) -+ return 0; -+ -+ trans = bch2_trans_get(c); -+ -+ ret = for_each_btree_key_commit(trans, iter, -+ BTREE_ID_stripes, POS_MIN, -+ BTREE_ITER_PREFETCH, k, -+ NULL, NULL, BTREE_INSERT_NOFAIL, -+ bch2_gc_write_stripes_key(trans, &iter, k)); -+ -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static void bch2_gc_stripes_reset(struct bch_fs *c, bool metadata_only) -+{ -+ genradix_free(&c->gc_stripes); -+} -+ -+/** -+ * bch2_gc - walk _all_ references to buckets, and recompute them: -+ * -+ * @c: filesystem object -+ * @initial: are we in recovery? -+ * @metadata_only: are we just checking metadata references, or everything? -+ * -+ * Returns: 0 on success, or standard errcode on failure -+ * -+ * Order matters here: -+ * - Concurrent GC relies on the fact that we have a total ordering for -+ * everything that GC walks - see gc_will_visit_node(), -+ * gc_will_visit_root() -+ * -+ * - also, references move around in the course of index updates and -+ * various other crap: everything needs to agree on the ordering -+ * references are allowed to move around in - e.g., we're allowed to -+ * start with a reference owned by an open_bucket (the allocator) and -+ * move it to the btree, but not the reverse. -+ * -+ * This is necessary to ensure that gc doesn't miss references that -+ * move around - if references move backwards in the ordering GC -+ * uses, GC could skip past them -+ */ -+int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only) -+{ -+ unsigned iter = 0; -+ int ret; -+ -+ lockdep_assert_held(&c->state_lock); -+ -+ down_write(&c->gc_lock); -+ -+ bch2_btree_interior_updates_flush(c); -+ -+ ret = bch2_gc_start(c) ?: -+ bch2_gc_alloc_start(c, metadata_only) ?: -+ bch2_gc_reflink_start(c, metadata_only); -+ if (ret) -+ goto out; -+again: -+ gc_pos_set(c, gc_phase(GC_PHASE_START)); -+ -+ bch2_mark_superblocks(c); -+ -+ ret = bch2_gc_btrees(c, initial, metadata_only); -+ -+ if (ret) -+ goto out; -+ -+#if 0 -+ bch2_mark_pending_btree_node_frees(c); -+#endif -+ c->gc_count++; -+ -+ if (test_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags) || -+ (!iter && bch2_test_restart_gc)) { -+ if (iter++ > 2) { -+ bch_info(c, "Unable to fix bucket gens, looping"); -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ /* -+ * XXX: make sure gens we fixed got saved -+ */ -+ bch_info(c, "Second GC pass needed, restarting:"); -+ clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); -+ __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); -+ -+ bch2_gc_stripes_reset(c, metadata_only); -+ bch2_gc_alloc_reset(c, metadata_only); -+ bch2_gc_reflink_reset(c, metadata_only); -+ ret = bch2_gc_reset(c); -+ if (ret) -+ goto out; -+ -+ /* flush fsck errors, reset counters */ -+ bch2_flush_fsck_errs(c); -+ goto again; -+ } -+out: -+ if (!ret) { -+ bch2_journal_block(&c->journal); -+ -+ ret = bch2_gc_stripes_done(c, metadata_only) ?: -+ bch2_gc_reflink_done(c, metadata_only) ?: -+ bch2_gc_alloc_done(c, metadata_only) ?: -+ bch2_gc_done(c, initial, metadata_only); -+ -+ bch2_journal_unblock(&c->journal); -+ } -+ -+ percpu_down_write(&c->mark_lock); -+ /* Indicates that gc is no longer in progress: */ -+ __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); -+ -+ bch2_gc_free(c); -+ percpu_up_write(&c->mark_lock); -+ -+ up_write(&c->gc_lock); -+ -+ /* -+ * At startup, allocations can happen directly instead of via the -+ * allocator thread - issue wakeup in case they blocked on gc_lock: -+ */ -+ closure_wake_up(&c->freelist_wait); -+ -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int gc_btree_gens_key(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const struct bch_extent_ptr *ptr; -+ struct bkey_i *u; -+ int ret; -+ -+ percpu_down_read(&c->mark_lock); -+ bkey_for_each_ptr(ptrs, ptr) { -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); -+ -+ if (ptr_stale(ca, ptr) > 16) { -+ percpu_up_read(&c->mark_lock); -+ goto update; -+ } -+ } -+ -+ bkey_for_each_ptr(ptrs, ptr) { -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); -+ u8 *gen = &ca->oldest_gen[PTR_BUCKET_NR(ca, ptr)]; -+ -+ if (gen_after(*gen, ptr->gen)) -+ *gen = ptr->gen; -+ } -+ percpu_up_read(&c->mark_lock); -+ return 0; -+update: -+ u = bch2_bkey_make_mut(trans, iter, &k, 0); -+ ret = PTR_ERR_OR_ZERO(u); -+ if (ret) -+ return ret; -+ -+ bch2_extent_normalize(c, bkey_i_to_s(u)); -+ return 0; -+} -+ -+static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bch_dev *ca = bch_dev_bkey_exists(trans->c, iter->pos.inode); -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert); -+ struct bkey_i_alloc_v4 *a_mut; -+ int ret; -+ -+ if (a->oldest_gen == ca->oldest_gen[iter->pos.offset]) -+ return 0; -+ -+ a_mut = bch2_alloc_to_v4_mut(trans, k); -+ ret = PTR_ERR_OR_ZERO(a_mut); -+ if (ret) -+ return ret; -+ -+ a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset]; -+ a_mut->v.data_type = alloc_data_type(a_mut->v, a_mut->v.data_type); -+ -+ return bch2_trans_update(trans, iter, &a_mut->k_i, 0); -+} -+ -+int bch2_gc_gens(struct bch_fs *c) -+{ -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bch_dev *ca; -+ u64 b, start_time = local_clock(); -+ unsigned i; -+ int ret; -+ -+ /* -+ * Ideally we would be using state_lock and not gc_lock here, but that -+ * introduces a deadlock in the RO path - we currently take the state -+ * lock at the start of going RO, thus the gc thread may get stuck: -+ */ -+ if (!mutex_trylock(&c->gc_gens_lock)) -+ return 0; -+ -+ trace_and_count(c, gc_gens_start, c); -+ down_read(&c->gc_lock); -+ trans = bch2_trans_get(c); -+ -+ for_each_member_device(ca, c, i) { -+ struct bucket_gens *gens = bucket_gens(ca); -+ -+ BUG_ON(ca->oldest_gen); -+ -+ ca->oldest_gen = kvmalloc(gens->nbuckets, GFP_KERNEL); -+ if (!ca->oldest_gen) { -+ percpu_ref_put(&ca->ref); -+ ret = -BCH_ERR_ENOMEM_gc_gens; -+ goto err; -+ } -+ -+ for (b = gens->first_bucket; -+ b < gens->nbuckets; b++) -+ ca->oldest_gen[b] = gens->b[b]; -+ } -+ -+ for (i = 0; i < BTREE_ID_NR; i++) -+ if (btree_type_has_ptrs(i)) { -+ c->gc_gens_btree = i; -+ c->gc_gens_pos = POS_MIN; -+ -+ ret = for_each_btree_key_commit(trans, iter, i, -+ POS_MIN, -+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, -+ k, -+ NULL, NULL, -+ BTREE_INSERT_NOFAIL, -+ gc_btree_gens_key(trans, &iter, k)); -+ if (ret && !bch2_err_matches(ret, EROFS)) -+ bch_err_fn(c, ret); -+ if (ret) -+ goto err; -+ } -+ -+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, -+ POS_MIN, -+ BTREE_ITER_PREFETCH, -+ k, -+ NULL, NULL, -+ BTREE_INSERT_NOFAIL, -+ bch2_alloc_write_oldest_gen(trans, &iter, k)); -+ if (ret && !bch2_err_matches(ret, EROFS)) -+ bch_err_fn(c, ret); -+ if (ret) -+ goto err; -+ -+ c->gc_gens_btree = 0; -+ c->gc_gens_pos = POS_MIN; -+ -+ c->gc_count++; -+ -+ bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time); -+ trace_and_count(c, gc_gens_end, c); -+err: -+ for_each_member_device(ca, c, i) { -+ kvfree(ca->oldest_gen); -+ ca->oldest_gen = NULL; -+ } -+ -+ bch2_trans_put(trans); -+ up_read(&c->gc_lock); -+ mutex_unlock(&c->gc_gens_lock); -+ return ret; -+} -+ -+static int bch2_gc_thread(void *arg) -+{ -+ struct bch_fs *c = arg; -+ struct io_clock *clock = &c->io_clock[WRITE]; -+ unsigned long last = atomic64_read(&clock->now); -+ unsigned last_kick = atomic_read(&c->kick_gc); -+ int ret; -+ -+ set_freezable(); -+ -+ while (1) { -+ while (1) { -+ set_current_state(TASK_INTERRUPTIBLE); -+ -+ if (kthread_should_stop()) { -+ __set_current_state(TASK_RUNNING); -+ return 0; -+ } -+ -+ if (atomic_read(&c->kick_gc) != last_kick) -+ break; -+ -+ if (c->btree_gc_periodic) { -+ unsigned long next = last + c->capacity / 16; -+ -+ if (atomic64_read(&clock->now) >= next) -+ break; -+ -+ bch2_io_clock_schedule_timeout(clock, next); -+ } else { -+ schedule(); -+ } -+ -+ try_to_freeze(); -+ } -+ __set_current_state(TASK_RUNNING); -+ -+ last = atomic64_read(&clock->now); -+ last_kick = atomic_read(&c->kick_gc); -+ -+ /* -+ * Full gc is currently incompatible with btree key cache: -+ */ -+#if 0 -+ ret = bch2_gc(c, false, false); -+#else -+ ret = bch2_gc_gens(c); -+#endif -+ if (ret < 0) -+ bch_err_fn(c, ret); -+ -+ debug_check_no_locks_held(); -+ } -+ -+ return 0; -+} -+ -+void bch2_gc_thread_stop(struct bch_fs *c) -+{ -+ struct task_struct *p; -+ -+ p = c->gc_thread; -+ c->gc_thread = NULL; -+ -+ if (p) { -+ kthread_stop(p); -+ put_task_struct(p); -+ } -+} -+ -+int bch2_gc_thread_start(struct bch_fs *c) -+{ -+ struct task_struct *p; -+ -+ if (c->gc_thread) -+ return 0; -+ -+ p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name); -+ if (IS_ERR(p)) { -+ bch_err_fn(c, PTR_ERR(p)); -+ return PTR_ERR(p); -+ } -+ -+ get_task_struct(p); -+ c->gc_thread = p; -+ wake_up_process(p); -+ return 0; -+} -diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h -new file mode 100644 -index 000000000000..607575f83a00 ---- /dev/null -+++ b/fs/bcachefs/btree_gc.h -@@ -0,0 +1,114 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BTREE_GC_H -+#define _BCACHEFS_BTREE_GC_H -+ -+#include "bkey.h" -+#include "btree_types.h" -+ -+int bch2_check_topology(struct bch_fs *); -+int bch2_gc(struct bch_fs *, bool, bool); -+int bch2_gc_gens(struct bch_fs *); -+void bch2_gc_thread_stop(struct bch_fs *); -+int bch2_gc_thread_start(struct bch_fs *); -+ -+/* -+ * For concurrent mark and sweep (with other index updates), we define a total -+ * ordering of _all_ references GC walks: -+ * -+ * Note that some references will have the same GC position as others - e.g. -+ * everything within the same btree node; in those cases we're relying on -+ * whatever locking exists for where those references live, i.e. the write lock -+ * on a btree node. -+ * -+ * That locking is also required to ensure GC doesn't pass the updater in -+ * between the updater adding/removing the reference and updating the GC marks; -+ * without that, we would at best double count sometimes. -+ * -+ * That part is important - whenever calling bch2_mark_pointers(), a lock _must_ -+ * be held that prevents GC from passing the position the updater is at. -+ * -+ * (What about the start of gc, when we're clearing all the marks? GC clears the -+ * mark with the gc pos seqlock held, and bch_mark_bucket checks against the gc -+ * position inside its cmpxchg loop, so crap magically works). -+ */ -+ -+/* Position of (the start of) a gc phase: */ -+static inline struct gc_pos gc_phase(enum gc_phase phase) -+{ -+ return (struct gc_pos) { -+ .phase = phase, -+ .pos = POS_MIN, -+ .level = 0, -+ }; -+} -+ -+static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r) -+{ -+ return cmp_int(l.phase, r.phase) ?: -+ bpos_cmp(l.pos, r.pos) ?: -+ cmp_int(l.level, r.level); -+} -+ -+static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id) -+{ -+ switch (id) { -+#define x(name, v, ...) case BTREE_ID_##name: return GC_PHASE_BTREE_##name; -+ BCH_BTREE_IDS() -+#undef x -+ default: -+ BUG(); -+ } -+} -+ -+static inline struct gc_pos gc_pos_btree(enum btree_id id, -+ struct bpos pos, unsigned level) -+{ -+ return (struct gc_pos) { -+ .phase = btree_id_to_gc_phase(id), -+ .pos = pos, -+ .level = level, -+ }; -+} -+ -+/* -+ * GC position of the pointers within a btree node: note, _not_ for &b->key -+ * itself, that lives in the parent node: -+ */ -+static inline struct gc_pos gc_pos_btree_node(struct btree *b) -+{ -+ return gc_pos_btree(b->c.btree_id, b->key.k.p, b->c.level); -+} -+ -+/* -+ * GC position of the pointer to a btree root: we don't use -+ * gc_pos_pointer_to_btree_node() here to avoid a potential race with -+ * btree_split() increasing the tree depth - the new root will have level > the -+ * old root and thus have a greater gc position than the old root, but that -+ * would be incorrect since once gc has marked the root it's not coming back. -+ */ -+static inline struct gc_pos gc_pos_btree_root(enum btree_id id) -+{ -+ return gc_pos_btree(id, SPOS_MAX, BTREE_MAX_DEPTH); -+} -+ -+static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos) -+{ -+ unsigned seq; -+ bool ret; -+ -+ do { -+ seq = read_seqcount_begin(&c->gc_pos_lock); -+ ret = gc_pos_cmp(pos, c->gc_pos) <= 0; -+ } while (read_seqcount_retry(&c->gc_pos_lock, seq)); -+ -+ return ret; -+} -+ -+static inline void bch2_do_gc_gens(struct bch_fs *c) -+{ -+ atomic_inc(&c->kick_gc); -+ if (c->gc_thread) -+ wake_up_process(c->gc_thread); -+} -+ -+#endif /* _BCACHEFS_BTREE_GC_H */ -diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c -new file mode 100644 -index 000000000000..37d896edb06e ---- /dev/null -+++ b/fs/bcachefs/btree_io.c -@@ -0,0 +1,2298 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "bkey_methods.h" -+#include "bkey_sort.h" -+#include "btree_cache.h" -+#include "btree_io.h" -+#include "btree_iter.h" -+#include "btree_locking.h" -+#include "btree_update.h" -+#include "btree_update_interior.h" -+#include "buckets.h" -+#include "checksum.h" -+#include "debug.h" -+#include "error.h" -+#include "extents.h" -+#include "io_write.h" -+#include "journal_reclaim.h" -+#include "journal_seq_blacklist.h" -+#include "recovery.h" -+#include "super-io.h" -+#include "trace.h" -+ -+#include -+ -+void bch2_btree_node_io_unlock(struct btree *b) -+{ -+ EBUG_ON(!btree_node_write_in_flight(b)); -+ -+ clear_btree_node_write_in_flight_inner(b); -+ clear_btree_node_write_in_flight(b); -+ wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); -+} -+ -+void bch2_btree_node_io_lock(struct btree *b) -+{ -+ bch2_assert_btree_nodes_not_locked(); -+ -+ wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight, -+ TASK_UNINTERRUPTIBLE); -+} -+ -+void __bch2_btree_node_wait_on_read(struct btree *b) -+{ -+ wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, -+ TASK_UNINTERRUPTIBLE); -+} -+ -+void __bch2_btree_node_wait_on_write(struct btree *b) -+{ -+ wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, -+ TASK_UNINTERRUPTIBLE); -+} -+ -+void bch2_btree_node_wait_on_read(struct btree *b) -+{ -+ bch2_assert_btree_nodes_not_locked(); -+ -+ wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, -+ TASK_UNINTERRUPTIBLE); -+} -+ -+void bch2_btree_node_wait_on_write(struct btree *b) -+{ -+ bch2_assert_btree_nodes_not_locked(); -+ -+ wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, -+ TASK_UNINTERRUPTIBLE); -+} -+ -+static void verify_no_dups(struct btree *b, -+ struct bkey_packed *start, -+ struct bkey_packed *end) -+{ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ struct bkey_packed *k, *p; -+ -+ if (start == end) -+ return; -+ -+ for (p = start, k = bkey_p_next(start); -+ k != end; -+ p = k, k = bkey_p_next(k)) { -+ struct bkey l = bkey_unpack_key(b, p); -+ struct bkey r = bkey_unpack_key(b, k); -+ -+ BUG_ON(bpos_ge(l.p, bkey_start_pos(&r))); -+ } -+#endif -+} -+ -+static void set_needs_whiteout(struct bset *i, int v) -+{ -+ struct bkey_packed *k; -+ -+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) -+ k->needs_whiteout = v; -+} -+ -+static void btree_bounce_free(struct bch_fs *c, size_t size, -+ bool used_mempool, void *p) -+{ -+ if (used_mempool) -+ mempool_free(p, &c->btree_bounce_pool); -+ else -+ vpfree(p, size); -+} -+ -+static void *btree_bounce_alloc(struct bch_fs *c, size_t size, -+ bool *used_mempool) -+{ -+ unsigned flags = memalloc_nofs_save(); -+ void *p; -+ -+ BUG_ON(size > btree_bytes(c)); -+ -+ *used_mempool = false; -+ p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT); -+ if (!p) { -+ *used_mempool = true; -+ p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); -+ } -+ memalloc_nofs_restore(flags); -+ return p; -+} -+ -+static void sort_bkey_ptrs(const struct btree *bt, -+ struct bkey_packed **ptrs, unsigned nr) -+{ -+ unsigned n = nr, a = nr / 2, b, c, d; -+ -+ if (!a) -+ return; -+ -+ /* Heap sort: see lib/sort.c: */ -+ while (1) { -+ if (a) -+ a--; -+ else if (--n) -+ swap(ptrs[0], ptrs[n]); -+ else -+ break; -+ -+ for (b = a; c = 2 * b + 1, (d = c + 1) < n;) -+ b = bch2_bkey_cmp_packed(bt, -+ ptrs[c], -+ ptrs[d]) >= 0 ? c : d; -+ if (d == n) -+ b = c; -+ -+ while (b != a && -+ bch2_bkey_cmp_packed(bt, -+ ptrs[a], -+ ptrs[b]) >= 0) -+ b = (b - 1) / 2; -+ c = b; -+ while (b != a) { -+ b = (b - 1) / 2; -+ swap(ptrs[b], ptrs[c]); -+ } -+ } -+} -+ -+static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) -+{ -+ struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k; -+ bool used_mempool = false; -+ size_t bytes = b->whiteout_u64s * sizeof(u64); -+ -+ if (!b->whiteout_u64s) -+ return; -+ -+ new_whiteouts = btree_bounce_alloc(c, bytes, &used_mempool); -+ -+ ptrs = ptrs_end = ((void *) new_whiteouts + bytes); -+ -+ for (k = unwritten_whiteouts_start(c, b); -+ k != unwritten_whiteouts_end(c, b); -+ k = bkey_p_next(k)) -+ *--ptrs = k; -+ -+ sort_bkey_ptrs(b, ptrs, ptrs_end - ptrs); -+ -+ k = new_whiteouts; -+ -+ while (ptrs != ptrs_end) { -+ bkey_p_copy(k, *ptrs); -+ k = bkey_p_next(k); -+ ptrs++; -+ } -+ -+ verify_no_dups(b, new_whiteouts, -+ (void *) ((u64 *) new_whiteouts + b->whiteout_u64s)); -+ -+ memcpy_u64s(unwritten_whiteouts_start(c, b), -+ new_whiteouts, b->whiteout_u64s); -+ -+ btree_bounce_free(c, bytes, used_mempool, new_whiteouts); -+} -+ -+static bool should_compact_bset(struct btree *b, struct bset_tree *t, -+ bool compacting, enum compact_mode mode) -+{ -+ if (!bset_dead_u64s(b, t)) -+ return false; -+ -+ switch (mode) { -+ case COMPACT_LAZY: -+ return should_compact_bset_lazy(b, t) || -+ (compacting && !bset_written(b, bset(b, t))); -+ case COMPACT_ALL: -+ return true; -+ default: -+ BUG(); -+ } -+} -+ -+static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode) -+{ -+ struct bset_tree *t; -+ bool ret = false; -+ -+ for_each_bset(b, t) { -+ struct bset *i = bset(b, t); -+ struct bkey_packed *k, *n, *out, *start, *end; -+ struct btree_node_entry *src = NULL, *dst = NULL; -+ -+ if (t != b->set && !bset_written(b, i)) { -+ src = container_of(i, struct btree_node_entry, keys); -+ dst = max(write_block(b), -+ (void *) btree_bkey_last(b, t - 1)); -+ } -+ -+ if (src != dst) -+ ret = true; -+ -+ if (!should_compact_bset(b, t, ret, mode)) { -+ if (src != dst) { -+ memmove(dst, src, sizeof(*src) + -+ le16_to_cpu(src->keys.u64s) * -+ sizeof(u64)); -+ i = &dst->keys; -+ set_btree_bset(b, t, i); -+ } -+ continue; -+ } -+ -+ start = btree_bkey_first(b, t); -+ end = btree_bkey_last(b, t); -+ -+ if (src != dst) { -+ memmove(dst, src, sizeof(*src)); -+ i = &dst->keys; -+ set_btree_bset(b, t, i); -+ } -+ -+ out = i->start; -+ -+ for (k = start; k != end; k = n) { -+ n = bkey_p_next(k); -+ -+ if (!bkey_deleted(k)) { -+ bkey_p_copy(out, k); -+ out = bkey_p_next(out); -+ } else { -+ BUG_ON(k->needs_whiteout); -+ } -+ } -+ -+ i->u64s = cpu_to_le16((u64 *) out - i->_data); -+ set_btree_bset_end(b, t); -+ bch2_bset_set_no_aux_tree(b, t); -+ ret = true; -+ } -+ -+ bch2_verify_btree_nr_keys(b); -+ -+ bch2_btree_build_aux_trees(b); -+ -+ return ret; -+} -+ -+bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, -+ enum compact_mode mode) -+{ -+ return bch2_drop_whiteouts(b, mode); -+} -+ -+static void btree_node_sort(struct bch_fs *c, struct btree *b, -+ unsigned start_idx, -+ unsigned end_idx, -+ bool filter_whiteouts) -+{ -+ struct btree_node *out; -+ struct sort_iter_stack sort_iter; -+ struct bset_tree *t; -+ struct bset *start_bset = bset(b, &b->set[start_idx]); -+ bool used_mempool = false; -+ u64 start_time, seq = 0; -+ unsigned i, u64s = 0, bytes, shift = end_idx - start_idx - 1; -+ bool sorting_entire_node = start_idx == 0 && -+ end_idx == b->nsets; -+ -+ sort_iter_stack_init(&sort_iter, b); -+ -+ for (t = b->set + start_idx; -+ t < b->set + end_idx; -+ t++) { -+ u64s += le16_to_cpu(bset(b, t)->u64s); -+ sort_iter_add(&sort_iter.iter, -+ btree_bkey_first(b, t), -+ btree_bkey_last(b, t)); -+ } -+ -+ bytes = sorting_entire_node -+ ? btree_bytes(c) -+ : __vstruct_bytes(struct btree_node, u64s); -+ -+ out = btree_bounce_alloc(c, bytes, &used_mempool); -+ -+ start_time = local_clock(); -+ -+ u64s = bch2_sort_keys(out->keys.start, &sort_iter.iter, filter_whiteouts); -+ -+ out->keys.u64s = cpu_to_le16(u64s); -+ -+ BUG_ON(vstruct_end(&out->keys) > (void *) out + bytes); -+ -+ if (sorting_entire_node) -+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort], -+ start_time); -+ -+ /* Make sure we preserve bset journal_seq: */ -+ for (t = b->set + start_idx; t < b->set + end_idx; t++) -+ seq = max(seq, le64_to_cpu(bset(b, t)->journal_seq)); -+ start_bset->journal_seq = cpu_to_le64(seq); -+ -+ if (sorting_entire_node) { -+ u64s = le16_to_cpu(out->keys.u64s); -+ -+ BUG_ON(bytes != btree_bytes(c)); -+ -+ /* -+ * Our temporary buffer is the same size as the btree node's -+ * buffer, we can just swap buffers instead of doing a big -+ * memcpy() -+ */ -+ *out = *b->data; -+ out->keys.u64s = cpu_to_le16(u64s); -+ swap(out, b->data); -+ set_btree_bset(b, b->set, &b->data->keys); -+ } else { -+ start_bset->u64s = out->keys.u64s; -+ memcpy_u64s(start_bset->start, -+ out->keys.start, -+ le16_to_cpu(out->keys.u64s)); -+ } -+ -+ for (i = start_idx + 1; i < end_idx; i++) -+ b->nr.bset_u64s[start_idx] += -+ b->nr.bset_u64s[i]; -+ -+ b->nsets -= shift; -+ -+ for (i = start_idx + 1; i < b->nsets; i++) { -+ b->nr.bset_u64s[i] = b->nr.bset_u64s[i + shift]; -+ b->set[i] = b->set[i + shift]; -+ } -+ -+ for (i = b->nsets; i < MAX_BSETS; i++) -+ b->nr.bset_u64s[i] = 0; -+ -+ set_btree_bset_end(b, &b->set[start_idx]); -+ bch2_bset_set_no_aux_tree(b, &b->set[start_idx]); -+ -+ btree_bounce_free(c, bytes, used_mempool, out); -+ -+ bch2_verify_btree_nr_keys(b); -+} -+ -+void bch2_btree_sort_into(struct bch_fs *c, -+ struct btree *dst, -+ struct btree *src) -+{ -+ struct btree_nr_keys nr; -+ struct btree_node_iter src_iter; -+ u64 start_time = local_clock(); -+ -+ BUG_ON(dst->nsets != 1); -+ -+ bch2_bset_set_no_aux_tree(dst, dst->set); -+ -+ bch2_btree_node_iter_init_from_start(&src_iter, src); -+ -+ nr = bch2_sort_repack(btree_bset_first(dst), -+ src, &src_iter, -+ &dst->format, -+ true); -+ -+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort], -+ start_time); -+ -+ set_btree_bset_end(dst, dst->set); -+ -+ dst->nr.live_u64s += nr.live_u64s; -+ dst->nr.bset_u64s[0] += nr.bset_u64s[0]; -+ dst->nr.packed_keys += nr.packed_keys; -+ dst->nr.unpacked_keys += nr.unpacked_keys; -+ -+ bch2_verify_btree_nr_keys(dst); -+} -+ -+/* -+ * We're about to add another bset to the btree node, so if there's currently -+ * too many bsets - sort some of them together: -+ */ -+static bool btree_node_compact(struct bch_fs *c, struct btree *b) -+{ -+ unsigned unwritten_idx; -+ bool ret = false; -+ -+ for (unwritten_idx = 0; -+ unwritten_idx < b->nsets; -+ unwritten_idx++) -+ if (!bset_written(b, bset(b, &b->set[unwritten_idx]))) -+ break; -+ -+ if (b->nsets - unwritten_idx > 1) { -+ btree_node_sort(c, b, unwritten_idx, -+ b->nsets, false); -+ ret = true; -+ } -+ -+ if (unwritten_idx > 1) { -+ btree_node_sort(c, b, 0, unwritten_idx, false); -+ ret = true; -+ } -+ -+ return ret; -+} -+ -+void bch2_btree_build_aux_trees(struct btree *b) -+{ -+ struct bset_tree *t; -+ -+ for_each_bset(b, t) -+ bch2_bset_build_aux_tree(b, t, -+ !bset_written(b, bset(b, t)) && -+ t == bset_tree_last(b)); -+} -+ -+/* -+ * If we have MAX_BSETS (3) bsets, should we sort them all down to just one? -+ * -+ * The first bset is going to be of similar order to the size of the node, the -+ * last bset is bounded by btree_write_set_buffer(), which is set to keep the -+ * memmove on insert from being too expensive: the middle bset should, ideally, -+ * be the geometric mean of the first and the last. -+ * -+ * Returns true if the middle bset is greater than that geometric mean: -+ */ -+static inline bool should_compact_all(struct bch_fs *c, struct btree *b) -+{ -+ unsigned mid_u64s_bits = -+ (ilog2(btree_max_u64s(c)) + BTREE_WRITE_SET_U64s_BITS) / 2; -+ -+ return bset_u64s(&b->set[1]) > 1U << mid_u64s_bits; -+} -+ -+/* -+ * @bch_btree_init_next - initialize a new (unwritten) bset that can then be -+ * inserted into -+ * -+ * Safe to call if there already is an unwritten bset - will only add a new bset -+ * if @b doesn't already have one. -+ * -+ * Returns true if we sorted (i.e. invalidated iterators -+ */ -+void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_node_entry *bne; -+ bool reinit_iter = false; -+ -+ EBUG_ON(!six_lock_counts(&b->c.lock).n[SIX_LOCK_write]); -+ BUG_ON(bset_written(b, bset(b, &b->set[1]))); -+ BUG_ON(btree_node_just_written(b)); -+ -+ if (b->nsets == MAX_BSETS && -+ !btree_node_write_in_flight(b) && -+ should_compact_all(c, b)) { -+ bch2_btree_node_write(c, b, SIX_LOCK_write, -+ BTREE_WRITE_init_next_bset); -+ reinit_iter = true; -+ } -+ -+ if (b->nsets == MAX_BSETS && -+ btree_node_compact(c, b)) -+ reinit_iter = true; -+ -+ BUG_ON(b->nsets >= MAX_BSETS); -+ -+ bne = want_new_bset(c, b); -+ if (bne) -+ bch2_bset_init_next(c, b, bne); -+ -+ bch2_btree_build_aux_trees(b); -+ -+ if (reinit_iter) -+ bch2_trans_node_reinit_iter(trans, b); -+} -+ -+static void btree_err_msg(struct printbuf *out, struct bch_fs *c, -+ struct bch_dev *ca, -+ struct btree *b, struct bset *i, -+ unsigned offset, int write) -+{ -+ prt_printf(out, bch2_log_msg(c, "%s"), -+ write == READ -+ ? "error validating btree node " -+ : "corrupt btree node before write "); -+ if (ca) -+ prt_printf(out, "on %s ", ca->name); -+ prt_printf(out, "at btree "); -+ bch2_btree_pos_to_text(out, c, b); -+ -+ prt_printf(out, "\n node offset %u", b->written); -+ if (i) -+ prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s)); -+ prt_str(out, ": "); -+} -+ -+__printf(9, 10) -+static int __btree_err(int ret, -+ struct bch_fs *c, -+ struct bch_dev *ca, -+ struct btree *b, -+ struct bset *i, -+ int write, -+ bool have_retry, -+ enum bch_sb_error_id err_type, -+ const char *fmt, ...) -+{ -+ struct printbuf out = PRINTBUF; -+ va_list args; -+ -+ btree_err_msg(&out, c, ca, b, i, b->written, write); -+ -+ va_start(args, fmt); -+ prt_vprintf(&out, fmt, args); -+ va_end(args); -+ -+ if (write == WRITE) { -+ bch2_print_string_as_lines(KERN_ERR, out.buf); -+ ret = c->opts.errors == BCH_ON_ERROR_continue -+ ? 0 -+ : -BCH_ERR_fsck_errors_not_fixed; -+ goto out; -+ } -+ -+ if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry) -+ ret = -BCH_ERR_btree_node_read_err_fixable; -+ if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry) -+ ret = -BCH_ERR_btree_node_read_err_bad_node; -+ -+ if (ret != -BCH_ERR_btree_node_read_err_fixable) -+ bch2_sb_error_count(c, err_type); -+ -+ switch (ret) { -+ case -BCH_ERR_btree_node_read_err_fixable: -+ ret = bch2_fsck_err(c, FSCK_CAN_FIX, err_type, "%s", out.buf); -+ if (ret != -BCH_ERR_fsck_fix && -+ ret != -BCH_ERR_fsck_ignore) -+ goto fsck_err; -+ ret = -BCH_ERR_fsck_fix; -+ break; -+ case -BCH_ERR_btree_node_read_err_want_retry: -+ case -BCH_ERR_btree_node_read_err_must_retry: -+ bch2_print_string_as_lines(KERN_ERR, out.buf); -+ break; -+ case -BCH_ERR_btree_node_read_err_bad_node: -+ bch2_print_string_as_lines(KERN_ERR, out.buf); -+ bch2_topology_error(c); -+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: -EIO; -+ break; -+ case -BCH_ERR_btree_node_read_err_incompatible: -+ bch2_print_string_as_lines(KERN_ERR, out.buf); -+ ret = -BCH_ERR_fsck_errors_not_fixed; -+ break; -+ default: -+ BUG(); -+ } -+out: -+fsck_err: -+ printbuf_exit(&out); -+ return ret; -+} -+ -+#define btree_err(type, c, ca, b, i, _err_type, msg, ...) \ -+({ \ -+ int _ret = __btree_err(type, c, ca, b, i, write, have_retry, \ -+ BCH_FSCK_ERR_##_err_type, \ -+ msg, ##__VA_ARGS__); \ -+ \ -+ if (_ret != -BCH_ERR_fsck_fix) { \ -+ ret = _ret; \ -+ goto fsck_err; \ -+ } \ -+ \ -+ *saw_error = true; \ -+}) -+ -+#define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false) -+ -+/* -+ * When btree topology repair changes the start or end of a node, that might -+ * mean we have to drop keys that are no longer inside the node: -+ */ -+__cold -+void bch2_btree_node_drop_keys_outside_node(struct btree *b) -+{ -+ struct bset_tree *t; -+ -+ for_each_bset(b, t) { -+ struct bset *i = bset(b, t); -+ struct bkey_packed *k; -+ -+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) -+ if (bkey_cmp_left_packed(b, k, &b->data->min_key) >= 0) -+ break; -+ -+ if (k != i->start) { -+ unsigned shift = (u64 *) k - (u64 *) i->start; -+ -+ memmove_u64s_down(i->start, k, -+ (u64 *) vstruct_end(i) - (u64 *) k); -+ i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - shift); -+ set_btree_bset_end(b, t); -+ } -+ -+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) -+ if (bkey_cmp_left_packed(b, k, &b->data->max_key) > 0) -+ break; -+ -+ if (k != vstruct_last(i)) { -+ i->u64s = cpu_to_le16((u64 *) k - (u64 *) i->start); -+ set_btree_bset_end(b, t); -+ } -+ } -+ -+ /* -+ * Always rebuild search trees: eytzinger search tree nodes directly -+ * depend on the values of min/max key: -+ */ -+ bch2_bset_set_no_aux_tree(b, b->set); -+ bch2_btree_build_aux_trees(b); -+ -+ struct bkey_s_c k; -+ struct bkey unpacked; -+ struct btree_node_iter iter; -+ for_each_btree_node_key_unpack(b, k, &iter, &unpacked) { -+ BUG_ON(bpos_lt(k.k->p, b->data->min_key)); -+ BUG_ON(bpos_gt(k.k->p, b->data->max_key)); -+ } -+} -+ -+static int validate_bset(struct bch_fs *c, struct bch_dev *ca, -+ struct btree *b, struct bset *i, -+ unsigned offset, unsigned sectors, -+ int write, bool have_retry, bool *saw_error) -+{ -+ unsigned version = le16_to_cpu(i->version); -+ struct printbuf buf1 = PRINTBUF; -+ struct printbuf buf2 = PRINTBUF; -+ int ret = 0; -+ -+ btree_err_on(!bch2_version_compatible(version), -+ -BCH_ERR_btree_node_read_err_incompatible, -+ c, ca, b, i, -+ btree_node_unsupported_version, -+ "unsupported bset version %u.%u", -+ BCH_VERSION_MAJOR(version), -+ BCH_VERSION_MINOR(version)); -+ -+ if (btree_err_on(version < c->sb.version_min, -+ -BCH_ERR_btree_node_read_err_fixable, -+ c, NULL, b, i, -+ btree_node_bset_older_than_sb_min, -+ "bset version %u older than superblock version_min %u", -+ version, c->sb.version_min)) { -+ mutex_lock(&c->sb_lock); -+ c->disk_sb.sb->version_min = cpu_to_le16(version); -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ } -+ -+ if (btree_err_on(BCH_VERSION_MAJOR(version) > -+ BCH_VERSION_MAJOR(c->sb.version), -+ -BCH_ERR_btree_node_read_err_fixable, -+ c, NULL, b, i, -+ btree_node_bset_newer_than_sb, -+ "bset version %u newer than superblock version %u", -+ version, c->sb.version)) { -+ mutex_lock(&c->sb_lock); -+ c->disk_sb.sb->version = cpu_to_le16(version); -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ } -+ -+ btree_err_on(BSET_SEPARATE_WHITEOUTS(i), -+ -BCH_ERR_btree_node_read_err_incompatible, -+ c, ca, b, i, -+ btree_node_unsupported_version, -+ "BSET_SEPARATE_WHITEOUTS no longer supported"); -+ -+ if (btree_err_on(offset + sectors > btree_sectors(c), -+ -BCH_ERR_btree_node_read_err_fixable, -+ c, ca, b, i, -+ bset_past_end_of_btree_node, -+ "bset past end of btree node")) { -+ i->u64s = 0; -+ ret = 0; -+ goto out; -+ } -+ -+ btree_err_on(offset && !i->u64s, -+ -BCH_ERR_btree_node_read_err_fixable, -+ c, ca, b, i, -+ bset_empty, -+ "empty bset"); -+ -+ btree_err_on(BSET_OFFSET(i) && BSET_OFFSET(i) != offset, -+ -BCH_ERR_btree_node_read_err_want_retry, -+ c, ca, b, i, -+ bset_wrong_sector_offset, -+ "bset at wrong sector offset"); -+ -+ if (!offset) { -+ struct btree_node *bn = -+ container_of(i, struct btree_node, keys); -+ /* These indicate that we read the wrong btree node: */ -+ -+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { -+ struct bch_btree_ptr_v2 *bp = -+ &bkey_i_to_btree_ptr_v2(&b->key)->v; -+ -+ /* XXX endianness */ -+ btree_err_on(bp->seq != bn->keys.seq, -+ -BCH_ERR_btree_node_read_err_must_retry, -+ c, ca, b, NULL, -+ bset_bad_seq, -+ "incorrect sequence number (wrong btree node)"); -+ } -+ -+ btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id, -+ -BCH_ERR_btree_node_read_err_must_retry, -+ c, ca, b, i, -+ btree_node_bad_btree, -+ "incorrect btree id"); -+ -+ btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level, -+ -BCH_ERR_btree_node_read_err_must_retry, -+ c, ca, b, i, -+ btree_node_bad_level, -+ "incorrect level"); -+ -+ if (!write) -+ compat_btree_node(b->c.level, b->c.btree_id, version, -+ BSET_BIG_ENDIAN(i), write, bn); -+ -+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { -+ struct bch_btree_ptr_v2 *bp = -+ &bkey_i_to_btree_ptr_v2(&b->key)->v; -+ -+ if (BTREE_PTR_RANGE_UPDATED(bp)) { -+ b->data->min_key = bp->min_key; -+ b->data->max_key = b->key.k.p; -+ } -+ -+ btree_err_on(!bpos_eq(b->data->min_key, bp->min_key), -+ -BCH_ERR_btree_node_read_err_must_retry, -+ c, ca, b, NULL, -+ btree_node_bad_min_key, -+ "incorrect min_key: got %s should be %s", -+ (printbuf_reset(&buf1), -+ bch2_bpos_to_text(&buf1, bn->min_key), buf1.buf), -+ (printbuf_reset(&buf2), -+ bch2_bpos_to_text(&buf2, bp->min_key), buf2.buf)); -+ } -+ -+ btree_err_on(!bpos_eq(bn->max_key, b->key.k.p), -+ -BCH_ERR_btree_node_read_err_must_retry, -+ c, ca, b, i, -+ btree_node_bad_max_key, -+ "incorrect max key %s", -+ (printbuf_reset(&buf1), -+ bch2_bpos_to_text(&buf1, bn->max_key), buf1.buf)); -+ -+ if (write) -+ compat_btree_node(b->c.level, b->c.btree_id, version, -+ BSET_BIG_ENDIAN(i), write, bn); -+ -+ btree_err_on(bch2_bkey_format_invalid(c, &bn->format, write, &buf1), -+ -BCH_ERR_btree_node_read_err_bad_node, -+ c, ca, b, i, -+ btree_node_bad_format, -+ "invalid bkey format: %s\n %s", buf1.buf, -+ (printbuf_reset(&buf2), -+ bch2_bkey_format_to_text(&buf2, &bn->format), buf2.buf)); -+ printbuf_reset(&buf1); -+ -+ compat_bformat(b->c.level, b->c.btree_id, version, -+ BSET_BIG_ENDIAN(i), write, -+ &bn->format); -+ } -+out: -+fsck_err: -+ printbuf_exit(&buf2); -+ printbuf_exit(&buf1); -+ return ret; -+} -+ -+static int bset_key_invalid(struct bch_fs *c, struct btree *b, -+ struct bkey_s_c k, -+ bool updated_range, int rw, -+ struct printbuf *err) -+{ -+ return __bch2_bkey_invalid(c, k, btree_node_type(b), READ, err) ?: -+ (!updated_range ? bch2_bkey_in_btree_node(c, b, k, err) : 0) ?: -+ (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0); -+} -+ -+static int validate_bset_keys(struct bch_fs *c, struct btree *b, -+ struct bset *i, int write, -+ bool have_retry, bool *saw_error) -+{ -+ unsigned version = le16_to_cpu(i->version); -+ struct bkey_packed *k, *prev = NULL; -+ struct printbuf buf = PRINTBUF; -+ bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && -+ BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); -+ int ret = 0; -+ -+ for (k = i->start; -+ k != vstruct_last(i);) { -+ struct bkey_s u; -+ struct bkey tmp; -+ -+ if (btree_err_on(bkey_p_next(k) > vstruct_last(i), -+ -BCH_ERR_btree_node_read_err_fixable, -+ c, NULL, b, i, -+ btree_node_bkey_past_bset_end, -+ "key extends past end of bset")) { -+ i->u64s = cpu_to_le16((u64 *) k - i->_data); -+ break; -+ } -+ -+ if (btree_err_on(k->format > KEY_FORMAT_CURRENT, -+ -BCH_ERR_btree_node_read_err_fixable, -+ c, NULL, b, i, -+ btree_node_bkey_bad_format, -+ "invalid bkey format %u", k->format)) { -+ i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); -+ memmove_u64s_down(k, bkey_p_next(k), -+ (u64 *) vstruct_end(i) - (u64 *) k); -+ continue; -+ } -+ -+ /* XXX: validate k->u64s */ -+ if (!write) -+ bch2_bkey_compat(b->c.level, b->c.btree_id, version, -+ BSET_BIG_ENDIAN(i), write, -+ &b->format, k); -+ -+ u = __bkey_disassemble(b, k, &tmp); -+ -+ printbuf_reset(&buf); -+ if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) { -+ printbuf_reset(&buf); -+ bset_key_invalid(c, b, u.s_c, updated_range, write, &buf); -+ prt_printf(&buf, "\n "); -+ bch2_bkey_val_to_text(&buf, c, u.s_c); -+ -+ btree_err(-BCH_ERR_btree_node_read_err_fixable, -+ c, NULL, b, i, -+ btree_node_bad_bkey, -+ "invalid bkey: %s", buf.buf); -+ -+ i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); -+ memmove_u64s_down(k, bkey_p_next(k), -+ (u64 *) vstruct_end(i) - (u64 *) k); -+ continue; -+ } -+ -+ if (write) -+ bch2_bkey_compat(b->c.level, b->c.btree_id, version, -+ BSET_BIG_ENDIAN(i), write, -+ &b->format, k); -+ -+ if (prev && bkey_iter_cmp(b, prev, k) > 0) { -+ struct bkey up = bkey_unpack_key(b, prev); -+ -+ printbuf_reset(&buf); -+ prt_printf(&buf, "keys out of order: "); -+ bch2_bkey_to_text(&buf, &up); -+ prt_printf(&buf, " > "); -+ bch2_bkey_to_text(&buf, u.k); -+ -+ bch2_dump_bset(c, b, i, 0); -+ -+ if (btree_err(-BCH_ERR_btree_node_read_err_fixable, -+ c, NULL, b, i, -+ btree_node_bkey_out_of_order, -+ "%s", buf.buf)) { -+ i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); -+ memmove_u64s_down(k, bkey_p_next(k), -+ (u64 *) vstruct_end(i) - (u64 *) k); -+ continue; -+ } -+ } -+ -+ prev = k; -+ k = bkey_p_next(k); -+ } -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, -+ struct btree *b, bool have_retry, bool *saw_error) -+{ -+ struct btree_node_entry *bne; -+ struct sort_iter *iter; -+ struct btree_node *sorted; -+ struct bkey_packed *k; -+ struct bch_extent_ptr *ptr; -+ struct bset *i; -+ bool used_mempool, blacklisted; -+ bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && -+ BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); -+ unsigned u64s; -+ unsigned ptr_written = btree_ptr_sectors_written(&b->key); -+ struct printbuf buf = PRINTBUF; -+ int ret = 0, retry_read = 0, write = READ; -+ -+ b->version_ondisk = U16_MAX; -+ /* We might get called multiple times on read retry: */ -+ b->written = 0; -+ -+ iter = mempool_alloc(&c->fill_iter, GFP_NOFS); -+ sort_iter_init(iter, b, (btree_blocks(c) + 1) * 2); -+ -+ if (bch2_meta_read_fault("btree")) -+ btree_err(-BCH_ERR_btree_node_read_err_must_retry, -+ c, ca, b, NULL, -+ btree_node_fault_injected, -+ "dynamic fault"); -+ -+ btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c), -+ -BCH_ERR_btree_node_read_err_must_retry, -+ c, ca, b, NULL, -+ btree_node_bad_magic, -+ "bad magic: want %llx, got %llx", -+ bset_magic(c), le64_to_cpu(b->data->magic)); -+ -+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { -+ struct bch_btree_ptr_v2 *bp = -+ &bkey_i_to_btree_ptr_v2(&b->key)->v; -+ -+ btree_err_on(b->data->keys.seq != bp->seq, -+ -BCH_ERR_btree_node_read_err_must_retry, -+ c, ca, b, NULL, -+ btree_node_bad_seq, -+ "got wrong btree node (seq %llx want %llx)", -+ b->data->keys.seq, bp->seq); -+ } else { -+ btree_err_on(!b->data->keys.seq, -+ -BCH_ERR_btree_node_read_err_must_retry, -+ c, ca, b, NULL, -+ btree_node_bad_seq, -+ "bad btree header: seq 0"); -+ } -+ -+ while (b->written < (ptr_written ?: btree_sectors(c))) { -+ unsigned sectors; -+ struct nonce nonce; -+ bool first = !b->written; -+ bool csum_bad; -+ -+ if (!b->written) { -+ i = &b->data->keys; -+ -+ btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), -+ -BCH_ERR_btree_node_read_err_want_retry, -+ c, ca, b, i, -+ bset_unknown_csum, -+ "unknown checksum type %llu", BSET_CSUM_TYPE(i)); -+ -+ nonce = btree_nonce(i, b->written << 9); -+ -+ csum_bad = bch2_crc_cmp(b->data->csum, -+ csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data)); -+ if (csum_bad) -+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -+ -+ btree_err_on(csum_bad, -+ -BCH_ERR_btree_node_read_err_want_retry, -+ c, ca, b, i, -+ bset_bad_csum, -+ "invalid checksum"); -+ -+ ret = bset_encrypt(c, i, b->written << 9); -+ if (bch2_fs_fatal_err_on(ret, c, -+ "error decrypting btree node: %i", ret)) -+ goto fsck_err; -+ -+ btree_err_on(btree_node_type_is_extents(btree_node_type(b)) && -+ !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data), -+ -BCH_ERR_btree_node_read_err_incompatible, -+ c, NULL, b, NULL, -+ btree_node_unsupported_version, -+ "btree node does not have NEW_EXTENT_OVERWRITE set"); -+ -+ sectors = vstruct_sectors(b->data, c->block_bits); -+ } else { -+ bne = write_block(b); -+ i = &bne->keys; -+ -+ if (i->seq != b->data->keys.seq) -+ break; -+ -+ btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), -+ -BCH_ERR_btree_node_read_err_want_retry, -+ c, ca, b, i, -+ bset_unknown_csum, -+ "unknown checksum type %llu", BSET_CSUM_TYPE(i)); -+ -+ nonce = btree_nonce(i, b->written << 9); -+ csum_bad = bch2_crc_cmp(bne->csum, -+ csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne)); -+ if (csum_bad) -+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -+ -+ btree_err_on(csum_bad, -+ -BCH_ERR_btree_node_read_err_want_retry, -+ c, ca, b, i, -+ bset_bad_csum, -+ "invalid checksum"); -+ -+ ret = bset_encrypt(c, i, b->written << 9); -+ if (bch2_fs_fatal_err_on(ret, c, -+ "error decrypting btree node: %i\n", ret)) -+ goto fsck_err; -+ -+ sectors = vstruct_sectors(bne, c->block_bits); -+ } -+ -+ b->version_ondisk = min(b->version_ondisk, -+ le16_to_cpu(i->version)); -+ -+ ret = validate_bset(c, ca, b, i, b->written, sectors, -+ READ, have_retry, saw_error); -+ if (ret) -+ goto fsck_err; -+ -+ if (!b->written) -+ btree_node_set_format(b, b->data->format); -+ -+ ret = validate_bset_keys(c, b, i, READ, have_retry, saw_error); -+ if (ret) -+ goto fsck_err; -+ -+ SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); -+ -+ blacklisted = bch2_journal_seq_is_blacklisted(c, -+ le64_to_cpu(i->journal_seq), -+ true); -+ -+ btree_err_on(blacklisted && first, -+ -BCH_ERR_btree_node_read_err_fixable, -+ c, ca, b, i, -+ bset_blacklisted_journal_seq, -+ "first btree node bset has blacklisted journal seq (%llu)", -+ le64_to_cpu(i->journal_seq)); -+ -+ btree_err_on(blacklisted && ptr_written, -+ -BCH_ERR_btree_node_read_err_fixable, -+ c, ca, b, i, -+ first_bset_blacklisted_journal_seq, -+ "found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u", -+ le64_to_cpu(i->journal_seq), -+ b->written, b->written + sectors, ptr_written); -+ -+ b->written += sectors; -+ -+ if (blacklisted && !first) -+ continue; -+ -+ sort_iter_add(iter, -+ vstruct_idx(i, 0), -+ vstruct_last(i)); -+ } -+ -+ if (ptr_written) { -+ btree_err_on(b->written < ptr_written, -+ -BCH_ERR_btree_node_read_err_want_retry, -+ c, ca, b, NULL, -+ btree_node_data_missing, -+ "btree node data missing: expected %u sectors, found %u", -+ ptr_written, b->written); -+ } else { -+ for (bne = write_block(b); -+ bset_byte_offset(b, bne) < btree_bytes(c); -+ bne = (void *) bne + block_bytes(c)) -+ btree_err_on(bne->keys.seq == b->data->keys.seq && -+ !bch2_journal_seq_is_blacklisted(c, -+ le64_to_cpu(bne->keys.journal_seq), -+ true), -+ -BCH_ERR_btree_node_read_err_want_retry, -+ c, ca, b, NULL, -+ btree_node_bset_after_end, -+ "found bset signature after last bset"); -+ } -+ -+ sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool); -+ sorted->keys.u64s = 0; -+ -+ set_btree_bset(b, b->set, &b->data->keys); -+ -+ b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter); -+ -+ u64s = le16_to_cpu(sorted->keys.u64s); -+ *sorted = *b->data; -+ sorted->keys.u64s = cpu_to_le16(u64s); -+ swap(sorted, b->data); -+ set_btree_bset(b, b->set, &b->data->keys); -+ b->nsets = 1; -+ -+ BUG_ON(b->nr.live_u64s != u64s); -+ -+ btree_bounce_free(c, btree_bytes(c), used_mempool, sorted); -+ -+ if (updated_range) -+ bch2_btree_node_drop_keys_outside_node(b); -+ -+ i = &b->data->keys; -+ for (k = i->start; k != vstruct_last(i);) { -+ struct bkey tmp; -+ struct bkey_s u = __bkey_disassemble(b, k, &tmp); -+ -+ printbuf_reset(&buf); -+ -+ if (bch2_bkey_val_invalid(c, u.s_c, READ, &buf) || -+ (bch2_inject_invalid_keys && -+ !bversion_cmp(u.k->version, MAX_VERSION))) { -+ printbuf_reset(&buf); -+ -+ prt_printf(&buf, "invalid bkey: "); -+ bch2_bkey_val_invalid(c, u.s_c, READ, &buf); -+ prt_printf(&buf, "\n "); -+ bch2_bkey_val_to_text(&buf, c, u.s_c); -+ -+ btree_err(-BCH_ERR_btree_node_read_err_fixable, -+ c, NULL, b, i, -+ btree_node_bad_bkey, -+ "%s", buf.buf); -+ -+ btree_keys_account_key_drop(&b->nr, 0, k); -+ -+ i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); -+ memmove_u64s_down(k, bkey_p_next(k), -+ (u64 *) vstruct_end(i) - (u64 *) k); -+ set_btree_bset_end(b, b->set); -+ continue; -+ } -+ -+ if (u.k->type == KEY_TYPE_btree_ptr_v2) { -+ struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u); -+ -+ bp.v->mem_ptr = 0; -+ } -+ -+ k = bkey_p_next(k); -+ } -+ -+ bch2_bset_build_aux_tree(b, b->set, false); -+ -+ set_needs_whiteout(btree_bset_first(b), true); -+ -+ btree_node_reset_sib_u64s(b); -+ -+ bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { -+ struct bch_dev *ca2 = bch_dev_bkey_exists(c, ptr->dev); -+ -+ if (ca2->mi.state != BCH_MEMBER_STATE_rw) -+ set_btree_node_need_rewrite(b); -+ } -+ -+ if (!ptr_written) -+ set_btree_node_need_rewrite(b); -+out: -+ mempool_free(iter, &c->fill_iter); -+ printbuf_exit(&buf); -+ return retry_read; -+fsck_err: -+ if (ret == -BCH_ERR_btree_node_read_err_want_retry || -+ ret == -BCH_ERR_btree_node_read_err_must_retry) -+ retry_read = 1; -+ else -+ set_btree_node_read_error(b); -+ goto out; -+} -+ -+static void btree_node_read_work(struct work_struct *work) -+{ -+ struct btree_read_bio *rb = -+ container_of(work, struct btree_read_bio, work); -+ struct bch_fs *c = rb->c; -+ struct btree *b = rb->b; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev); -+ struct bio *bio = &rb->bio; -+ struct bch_io_failures failed = { .nr = 0 }; -+ struct printbuf buf = PRINTBUF; -+ bool saw_error = false; -+ bool retry = false; -+ bool can_retry; -+ -+ goto start; -+ while (1) { -+ retry = true; -+ bch_info(c, "retrying read"); -+ ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev); -+ rb->have_ioref = bch2_dev_get_ioref(ca, READ); -+ bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META); -+ bio->bi_iter.bi_sector = rb->pick.ptr.offset; -+ bio->bi_iter.bi_size = btree_bytes(c); -+ -+ if (rb->have_ioref) { -+ bio_set_dev(bio, ca->disk_sb.bdev); -+ submit_bio_wait(bio); -+ } else { -+ bio->bi_status = BLK_STS_REMOVED; -+ } -+start: -+ printbuf_reset(&buf); -+ bch2_btree_pos_to_text(&buf, c, b); -+ bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read, -+ "btree read error %s for %s", -+ bch2_blk_status_to_str(bio->bi_status), buf.buf); -+ if (rb->have_ioref) -+ percpu_ref_put(&ca->io_ref); -+ rb->have_ioref = false; -+ -+ bch2_mark_io_failure(&failed, &rb->pick); -+ -+ can_retry = bch2_bkey_pick_read_device(c, -+ bkey_i_to_s_c(&b->key), -+ &failed, &rb->pick) > 0; -+ -+ if (!bio->bi_status && -+ !bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) { -+ if (retry) -+ bch_info(c, "retry success"); -+ break; -+ } -+ -+ saw_error = true; -+ -+ if (!can_retry) { -+ set_btree_node_read_error(b); -+ break; -+ } -+ } -+ -+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], -+ rb->start_time); -+ bio_put(&rb->bio); -+ -+ if (saw_error && !btree_node_read_error(b)) { -+ printbuf_reset(&buf); -+ bch2_bpos_to_text(&buf, b->key.k.p); -+ bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error", -+ __func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf); -+ -+ bch2_btree_node_rewrite_async(c, b); -+ } -+ -+ printbuf_exit(&buf); -+ clear_btree_node_read_in_flight(b); -+ wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); -+} -+ -+static void btree_node_read_endio(struct bio *bio) -+{ -+ struct btree_read_bio *rb = -+ container_of(bio, struct btree_read_bio, bio); -+ struct bch_fs *c = rb->c; -+ -+ if (rb->have_ioref) { -+ struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev); -+ -+ bch2_latency_acct(ca, rb->start_time, READ); -+ } -+ -+ queue_work(c->io_complete_wq, &rb->work); -+} -+ -+struct btree_node_read_all { -+ struct closure cl; -+ struct bch_fs *c; -+ struct btree *b; -+ unsigned nr; -+ void *buf[BCH_REPLICAS_MAX]; -+ struct bio *bio[BCH_REPLICAS_MAX]; -+ blk_status_t err[BCH_REPLICAS_MAX]; -+}; -+ -+static unsigned btree_node_sectors_written(struct bch_fs *c, void *data) -+{ -+ struct btree_node *bn = data; -+ struct btree_node_entry *bne; -+ unsigned offset = 0; -+ -+ if (le64_to_cpu(bn->magic) != bset_magic(c)) -+ return 0; -+ -+ while (offset < btree_sectors(c)) { -+ if (!offset) { -+ offset += vstruct_sectors(bn, c->block_bits); -+ } else { -+ bne = data + (offset << 9); -+ if (bne->keys.seq != bn->keys.seq) -+ break; -+ offset += vstruct_sectors(bne, c->block_bits); -+ } -+ } -+ -+ return offset; -+} -+ -+static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void *data) -+{ -+ struct btree_node *bn = data; -+ struct btree_node_entry *bne; -+ -+ if (!offset) -+ return false; -+ -+ while (offset < btree_sectors(c)) { -+ bne = data + (offset << 9); -+ if (bne->keys.seq == bn->keys.seq) -+ return true; -+ offset++; -+ } -+ -+ return false; -+ return offset; -+} -+ -+static void btree_node_read_all_replicas_done(struct closure *cl) -+{ -+ struct btree_node_read_all *ra = -+ container_of(cl, struct btree_node_read_all, cl); -+ struct bch_fs *c = ra->c; -+ struct btree *b = ra->b; -+ struct printbuf buf = PRINTBUF; -+ bool dump_bset_maps = false; -+ bool have_retry = false; -+ int ret = 0, best = -1, write = READ; -+ unsigned i, written = 0, written2 = 0; -+ __le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2 -+ ? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0; -+ bool _saw_error = false, *saw_error = &_saw_error; -+ -+ for (i = 0; i < ra->nr; i++) { -+ struct btree_node *bn = ra->buf[i]; -+ -+ if (ra->err[i]) -+ continue; -+ -+ if (le64_to_cpu(bn->magic) != bset_magic(c) || -+ (seq && seq != bn->keys.seq)) -+ continue; -+ -+ if (best < 0) { -+ best = i; -+ written = btree_node_sectors_written(c, bn); -+ continue; -+ } -+ -+ written2 = btree_node_sectors_written(c, ra->buf[i]); -+ if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable, -+ c, NULL, b, NULL, -+ btree_node_replicas_sectors_written_mismatch, -+ "btree node sectors written mismatch: %u != %u", -+ written, written2) || -+ btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]), -+ -BCH_ERR_btree_node_read_err_fixable, -+ c, NULL, b, NULL, -+ btree_node_bset_after_end, -+ "found bset signature after last bset") || -+ btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9), -+ -BCH_ERR_btree_node_read_err_fixable, -+ c, NULL, b, NULL, -+ btree_node_replicas_data_mismatch, -+ "btree node replicas content mismatch")) -+ dump_bset_maps = true; -+ -+ if (written2 > written) { -+ written = written2; -+ best = i; -+ } -+ } -+fsck_err: -+ if (dump_bset_maps) { -+ for (i = 0; i < ra->nr; i++) { -+ struct btree_node *bn = ra->buf[i]; -+ struct btree_node_entry *bne = NULL; -+ unsigned offset = 0, sectors; -+ bool gap = false; -+ -+ if (ra->err[i]) -+ continue; -+ -+ printbuf_reset(&buf); -+ -+ while (offset < btree_sectors(c)) { -+ if (!offset) { -+ sectors = vstruct_sectors(bn, c->block_bits); -+ } else { -+ bne = ra->buf[i] + (offset << 9); -+ if (bne->keys.seq != bn->keys.seq) -+ break; -+ sectors = vstruct_sectors(bne, c->block_bits); -+ } -+ -+ prt_printf(&buf, " %u-%u", offset, offset + sectors); -+ if (bne && bch2_journal_seq_is_blacklisted(c, -+ le64_to_cpu(bne->keys.journal_seq), false)) -+ prt_printf(&buf, "*"); -+ offset += sectors; -+ } -+ -+ while (offset < btree_sectors(c)) { -+ bne = ra->buf[i] + (offset << 9); -+ if (bne->keys.seq == bn->keys.seq) { -+ if (!gap) -+ prt_printf(&buf, " GAP"); -+ gap = true; -+ -+ sectors = vstruct_sectors(bne, c->block_bits); -+ prt_printf(&buf, " %u-%u", offset, offset + sectors); -+ if (bch2_journal_seq_is_blacklisted(c, -+ le64_to_cpu(bne->keys.journal_seq), false)) -+ prt_printf(&buf, "*"); -+ } -+ offset++; -+ } -+ -+ bch_err(c, "replica %u:%s", i, buf.buf); -+ } -+ } -+ -+ if (best >= 0) { -+ memcpy(b->data, ra->buf[best], btree_bytes(c)); -+ ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error); -+ } else { -+ ret = -1; -+ } -+ -+ if (ret) -+ set_btree_node_read_error(b); -+ else if (*saw_error) -+ bch2_btree_node_rewrite_async(c, b); -+ -+ for (i = 0; i < ra->nr; i++) { -+ mempool_free(ra->buf[i], &c->btree_bounce_pool); -+ bio_put(ra->bio[i]); -+ } -+ -+ closure_debug_destroy(&ra->cl); -+ kfree(ra); -+ printbuf_exit(&buf); -+ -+ clear_btree_node_read_in_flight(b); -+ wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); -+} -+ -+static void btree_node_read_all_replicas_endio(struct bio *bio) -+{ -+ struct btree_read_bio *rb = -+ container_of(bio, struct btree_read_bio, bio); -+ struct bch_fs *c = rb->c; -+ struct btree_node_read_all *ra = rb->ra; -+ -+ if (rb->have_ioref) { -+ struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev); -+ -+ bch2_latency_acct(ca, rb->start_time, READ); -+ } -+ -+ ra->err[rb->idx] = bio->bi_status; -+ closure_put(&ra->cl); -+} -+ -+/* -+ * XXX This allocates multiple times from the same mempools, and can deadlock -+ * under sufficient memory pressure (but is only a debug path) -+ */ -+static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool sync) -+{ -+ struct bkey_s_c k = bkey_i_to_s_c(&b->key); -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded pick; -+ struct btree_node_read_all *ra; -+ unsigned i; -+ -+ ra = kzalloc(sizeof(*ra), GFP_NOFS); -+ if (!ra) -+ return -BCH_ERR_ENOMEM_btree_node_read_all_replicas; -+ -+ closure_init(&ra->cl, NULL); -+ ra->c = c; -+ ra->b = b; -+ ra->nr = bch2_bkey_nr_ptrs(k); -+ -+ for (i = 0; i < ra->nr; i++) { -+ ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); -+ ra->bio[i] = bio_alloc_bioset(NULL, -+ buf_pages(ra->buf[i], btree_bytes(c)), -+ REQ_OP_READ|REQ_SYNC|REQ_META, -+ GFP_NOFS, -+ &c->btree_bio); -+ } -+ -+ i = 0; -+ bkey_for_each_ptr_decode(k.k, ptrs, pick, entry) { -+ struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev); -+ struct btree_read_bio *rb = -+ container_of(ra->bio[i], struct btree_read_bio, bio); -+ rb->c = c; -+ rb->b = b; -+ rb->ra = ra; -+ rb->start_time = local_clock(); -+ rb->have_ioref = bch2_dev_get_ioref(ca, READ); -+ rb->idx = i; -+ rb->pick = pick; -+ rb->bio.bi_iter.bi_sector = pick.ptr.offset; -+ rb->bio.bi_end_io = btree_node_read_all_replicas_endio; -+ bch2_bio_map(&rb->bio, ra->buf[i], btree_bytes(c)); -+ -+ if (rb->have_ioref) { -+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], -+ bio_sectors(&rb->bio)); -+ bio_set_dev(&rb->bio, ca->disk_sb.bdev); -+ -+ closure_get(&ra->cl); -+ submit_bio(&rb->bio); -+ } else { -+ ra->err[i] = BLK_STS_REMOVED; -+ } -+ -+ i++; -+ } -+ -+ if (sync) { -+ closure_sync(&ra->cl); -+ btree_node_read_all_replicas_done(&ra->cl); -+ } else { -+ continue_at(&ra->cl, btree_node_read_all_replicas_done, -+ c->io_complete_wq); -+ } -+ -+ return 0; -+} -+ -+void bch2_btree_node_read(struct bch_fs *c, struct btree *b, -+ bool sync) -+{ -+ struct extent_ptr_decoded pick; -+ struct btree_read_bio *rb; -+ struct bch_dev *ca; -+ struct bio *bio; -+ int ret; -+ -+ trace_and_count(c, btree_node_read, c, b); -+ -+ if (bch2_verify_all_btree_replicas && -+ !btree_node_read_all_replicas(c, b, sync)) -+ return; -+ -+ ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), -+ NULL, &pick); -+ -+ if (ret <= 0) { -+ struct printbuf buf = PRINTBUF; -+ -+ prt_str(&buf, "btree node read error: no device to read from\n at "); -+ bch2_btree_pos_to_text(&buf, c, b); -+ bch_err(c, "%s", buf.buf); -+ -+ if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && -+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) -+ bch2_fatal_error(c); -+ -+ set_btree_node_read_error(b); -+ clear_btree_node_read_in_flight(b); -+ wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); -+ printbuf_exit(&buf); -+ return; -+ } -+ -+ ca = bch_dev_bkey_exists(c, pick.ptr.dev); -+ -+ bio = bio_alloc_bioset(NULL, -+ buf_pages(b->data, btree_bytes(c)), -+ REQ_OP_READ|REQ_SYNC|REQ_META, -+ GFP_NOFS, -+ &c->btree_bio); -+ rb = container_of(bio, struct btree_read_bio, bio); -+ rb->c = c; -+ rb->b = b; -+ rb->ra = NULL; -+ rb->start_time = local_clock(); -+ rb->have_ioref = bch2_dev_get_ioref(ca, READ); -+ rb->pick = pick; -+ INIT_WORK(&rb->work, btree_node_read_work); -+ bio->bi_iter.bi_sector = pick.ptr.offset; -+ bio->bi_end_io = btree_node_read_endio; -+ bch2_bio_map(bio, b->data, btree_bytes(c)); -+ -+ if (rb->have_ioref) { -+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], -+ bio_sectors(bio)); -+ bio_set_dev(bio, ca->disk_sb.bdev); -+ -+ if (sync) { -+ submit_bio_wait(bio); -+ -+ btree_node_read_work(&rb->work); -+ } else { -+ submit_bio(bio); -+ } -+ } else { -+ bio->bi_status = BLK_STS_REMOVED; -+ -+ if (sync) -+ btree_node_read_work(&rb->work); -+ else -+ queue_work(c->io_complete_wq, &rb->work); -+ } -+} -+ -+static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, -+ const struct bkey_i *k, unsigned level) -+{ -+ struct bch_fs *c = trans->c; -+ struct closure cl; -+ struct btree *b; -+ int ret; -+ -+ closure_init_stack(&cl); -+ -+ do { -+ ret = bch2_btree_cache_cannibalize_lock(c, &cl); -+ closure_sync(&cl); -+ } while (ret); -+ -+ b = bch2_btree_node_mem_alloc(trans, level != 0); -+ bch2_btree_cache_cannibalize_unlock(c); -+ -+ BUG_ON(IS_ERR(b)); -+ -+ bkey_copy(&b->key, k); -+ BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id)); -+ -+ set_btree_node_read_in_flight(b); -+ -+ bch2_btree_node_read(c, b, true); -+ -+ if (btree_node_read_error(b)) { -+ bch2_btree_node_hash_remove(&c->btree_cache, b); -+ -+ mutex_lock(&c->btree_cache.lock); -+ list_move(&b->list, &c->btree_cache.freeable); -+ mutex_unlock(&c->btree_cache.lock); -+ -+ ret = -EIO; -+ goto err; -+ } -+ -+ bch2_btree_set_root_for_read(c, b); -+err: -+ six_unlock_write(&b->c.lock); -+ six_unlock_intent(&b->c.lock); -+ -+ return ret; -+} -+ -+int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, -+ const struct bkey_i *k, unsigned level) -+{ -+ return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level)); -+} -+ -+void bch2_btree_complete_write(struct bch_fs *c, struct btree *b, -+ struct btree_write *w) -+{ -+ unsigned long old, new, v = READ_ONCE(b->will_make_reachable); -+ -+ do { -+ old = new = v; -+ if (!(old & 1)) -+ break; -+ -+ new &= ~1UL; -+ } while ((v = cmpxchg(&b->will_make_reachable, old, new)) != old); -+ -+ if (old & 1) -+ closure_put(&((struct btree_update *) new)->cl); -+ -+ bch2_journal_pin_drop(&c->journal, &w->journal); -+} -+ -+static void __btree_node_write_done(struct bch_fs *c, struct btree *b) -+{ -+ struct btree_write *w = btree_prev_write(b); -+ unsigned long old, new, v; -+ unsigned type = 0; -+ -+ bch2_btree_complete_write(c, b, w); -+ -+ v = READ_ONCE(b->flags); -+ do { -+ old = new = v; -+ -+ if ((old & (1U << BTREE_NODE_dirty)) && -+ (old & (1U << BTREE_NODE_need_write)) && -+ !(old & (1U << BTREE_NODE_never_write)) && -+ !(old & (1U << BTREE_NODE_write_blocked)) && -+ !(old & (1U << BTREE_NODE_will_make_reachable))) { -+ new &= ~(1U << BTREE_NODE_dirty); -+ new &= ~(1U << BTREE_NODE_need_write); -+ new |= (1U << BTREE_NODE_write_in_flight); -+ new |= (1U << BTREE_NODE_write_in_flight_inner); -+ new |= (1U << BTREE_NODE_just_written); -+ new ^= (1U << BTREE_NODE_write_idx); -+ -+ type = new & BTREE_WRITE_TYPE_MASK; -+ new &= ~BTREE_WRITE_TYPE_MASK; -+ } else { -+ new &= ~(1U << BTREE_NODE_write_in_flight); -+ new &= ~(1U << BTREE_NODE_write_in_flight_inner); -+ } -+ } while ((v = cmpxchg(&b->flags, old, new)) != old); -+ -+ if (new & (1U << BTREE_NODE_write_in_flight)) -+ __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED|type); -+ else -+ wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); -+} -+ -+static void btree_node_write_done(struct bch_fs *c, struct btree *b) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ -+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); -+ __btree_node_write_done(c, b); -+ six_unlock_read(&b->c.lock); -+ -+ bch2_trans_put(trans); -+} -+ -+static void btree_node_write_work(struct work_struct *work) -+{ -+ struct btree_write_bio *wbio = -+ container_of(work, struct btree_write_bio, work); -+ struct bch_fs *c = wbio->wbio.c; -+ struct btree *b = wbio->wbio.bio.bi_private; -+ struct bch_extent_ptr *ptr; -+ int ret = 0; -+ -+ btree_bounce_free(c, -+ wbio->data_bytes, -+ wbio->wbio.used_mempool, -+ wbio->data); -+ -+ bch2_bkey_drop_ptrs(bkey_i_to_s(&wbio->key), ptr, -+ bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev)); -+ -+ if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) -+ goto err; -+ -+ if (wbio->wbio.first_btree_write) { -+ if (wbio->wbio.failed.nr) { -+ -+ } -+ } else { -+ ret = bch2_trans_do(c, NULL, NULL, 0, -+ bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, -+ BCH_WATERMARK_reclaim| -+ BTREE_INSERT_JOURNAL_RECLAIM| -+ BTREE_INSERT_NOFAIL| -+ BTREE_INSERT_NOCHECK_RW, -+ !wbio->wbio.failed.nr)); -+ if (ret) -+ goto err; -+ } -+out: -+ bio_put(&wbio->wbio.bio); -+ btree_node_write_done(c, b); -+ return; -+err: -+ set_btree_node_noevict(b); -+ if (!bch2_err_matches(ret, EROFS)) -+ bch2_fs_fatal_error(c, "fatal error writing btree node: %s", bch2_err_str(ret)); -+ goto out; -+} -+ -+static void btree_node_write_endio(struct bio *bio) -+{ -+ struct bch_write_bio *wbio = to_wbio(bio); -+ struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL; -+ struct bch_write_bio *orig = parent ?: wbio; -+ struct btree_write_bio *wb = container_of(orig, struct btree_write_bio, wbio); -+ struct bch_fs *c = wbio->c; -+ struct btree *b = wbio->bio.bi_private; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev); -+ unsigned long flags; -+ -+ if (wbio->have_ioref) -+ bch2_latency_acct(ca, wbio->submit_time, WRITE); -+ -+ if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, -+ "btree write error: %s", -+ bch2_blk_status_to_str(bio->bi_status)) || -+ bch2_meta_write_fault("btree")) { -+ spin_lock_irqsave(&c->btree_write_error_lock, flags); -+ bch2_dev_list_add_dev(&orig->failed, wbio->dev); -+ spin_unlock_irqrestore(&c->btree_write_error_lock, flags); -+ } -+ -+ if (wbio->have_ioref) -+ percpu_ref_put(&ca->io_ref); -+ -+ if (parent) { -+ bio_put(bio); -+ bio_endio(&parent->bio); -+ return; -+ } -+ -+ clear_btree_node_write_in_flight_inner(b); -+ wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner); -+ INIT_WORK(&wb->work, btree_node_write_work); -+ queue_work(c->btree_io_complete_wq, &wb->work); -+} -+ -+static int validate_bset_for_write(struct bch_fs *c, struct btree *b, -+ struct bset *i, unsigned sectors) -+{ -+ struct printbuf buf = PRINTBUF; -+ bool saw_error; -+ int ret; -+ -+ ret = bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), -+ BKEY_TYPE_btree, WRITE, &buf); -+ -+ if (ret) -+ bch2_fs_inconsistent(c, "invalid btree node key before write: %s", buf.buf); -+ printbuf_exit(&buf); -+ if (ret) -+ return ret; -+ -+ ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?: -+ validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error); -+ if (ret) { -+ bch2_inconsistent_error(c); -+ dump_stack(); -+ } -+ -+ return ret; -+} -+ -+static void btree_write_submit(struct work_struct *work) -+{ -+ struct btree_write_bio *wbio = container_of(work, struct btree_write_bio, work); -+ struct bch_extent_ptr *ptr; -+ BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; -+ -+ bkey_copy(&tmp.k, &wbio->key); -+ -+ bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&tmp.k)), ptr) -+ ptr->offset += wbio->sector_offset; -+ -+ bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, -+ &tmp.k, false); -+} -+ -+void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) -+{ -+ struct btree_write_bio *wbio; -+ struct bset_tree *t; -+ struct bset *i; -+ struct btree_node *bn = NULL; -+ struct btree_node_entry *bne = NULL; -+ struct sort_iter_stack sort_iter; -+ struct nonce nonce; -+ unsigned bytes_to_write, sectors_to_write, bytes, u64s; -+ u64 seq = 0; -+ bool used_mempool; -+ unsigned long old, new; -+ bool validate_before_checksum = false; -+ enum btree_write_type type = flags & BTREE_WRITE_TYPE_MASK; -+ void *data; -+ int ret; -+ -+ if (flags & BTREE_WRITE_ALREADY_STARTED) -+ goto do_write; -+ -+ /* -+ * We may only have a read lock on the btree node - the dirty bit is our -+ * "lock" against racing with other threads that may be trying to start -+ * a write, we do a write iff we clear the dirty bit. Since setting the -+ * dirty bit requires a write lock, we can't race with other threads -+ * redirtying it: -+ */ -+ do { -+ old = new = READ_ONCE(b->flags); -+ -+ if (!(old & (1 << BTREE_NODE_dirty))) -+ return; -+ -+ if ((flags & BTREE_WRITE_ONLY_IF_NEED) && -+ !(old & (1 << BTREE_NODE_need_write))) -+ return; -+ -+ if (old & -+ ((1 << BTREE_NODE_never_write)| -+ (1 << BTREE_NODE_write_blocked))) -+ return; -+ -+ if (b->written && -+ (old & (1 << BTREE_NODE_will_make_reachable))) -+ return; -+ -+ if (old & (1 << BTREE_NODE_write_in_flight)) -+ return; -+ -+ if (flags & BTREE_WRITE_ONLY_IF_NEED) -+ type = new & BTREE_WRITE_TYPE_MASK; -+ new &= ~BTREE_WRITE_TYPE_MASK; -+ -+ new &= ~(1 << BTREE_NODE_dirty); -+ new &= ~(1 << BTREE_NODE_need_write); -+ new |= (1 << BTREE_NODE_write_in_flight); -+ new |= (1 << BTREE_NODE_write_in_flight_inner); -+ new |= (1 << BTREE_NODE_just_written); -+ new ^= (1 << BTREE_NODE_write_idx); -+ } while (cmpxchg_acquire(&b->flags, old, new) != old); -+ -+ if (new & (1U << BTREE_NODE_need_write)) -+ return; -+do_write: -+ BUG_ON((type == BTREE_WRITE_initial) != (b->written == 0)); -+ -+ atomic_dec(&c->btree_cache.dirty); -+ -+ BUG_ON(btree_node_fake(b)); -+ BUG_ON((b->will_make_reachable != 0) != !b->written); -+ -+ BUG_ON(b->written >= btree_sectors(c)); -+ BUG_ON(b->written & (block_sectors(c) - 1)); -+ BUG_ON(bset_written(b, btree_bset_last(b))); -+ BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c)); -+ BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format))); -+ -+ bch2_sort_whiteouts(c, b); -+ -+ sort_iter_stack_init(&sort_iter, b); -+ -+ bytes = !b->written -+ ? sizeof(struct btree_node) -+ : sizeof(struct btree_node_entry); -+ -+ bytes += b->whiteout_u64s * sizeof(u64); -+ -+ for_each_bset(b, t) { -+ i = bset(b, t); -+ -+ if (bset_written(b, i)) -+ continue; -+ -+ bytes += le16_to_cpu(i->u64s) * sizeof(u64); -+ sort_iter_add(&sort_iter.iter, -+ btree_bkey_first(b, t), -+ btree_bkey_last(b, t)); -+ seq = max(seq, le64_to_cpu(i->journal_seq)); -+ } -+ -+ BUG_ON(b->written && !seq); -+ -+ /* bch2_varint_decode may read up to 7 bytes past the end of the buffer: */ -+ bytes += 8; -+ -+ /* buffer must be a multiple of the block size */ -+ bytes = round_up(bytes, block_bytes(c)); -+ -+ data = btree_bounce_alloc(c, bytes, &used_mempool); -+ -+ if (!b->written) { -+ bn = data; -+ *bn = *b->data; -+ i = &bn->keys; -+ } else { -+ bne = data; -+ bne->keys = b->data->keys; -+ i = &bne->keys; -+ } -+ -+ i->journal_seq = cpu_to_le64(seq); -+ i->u64s = 0; -+ -+ sort_iter_add(&sort_iter.iter, -+ unwritten_whiteouts_start(c, b), -+ unwritten_whiteouts_end(c, b)); -+ SET_BSET_SEPARATE_WHITEOUTS(i, false); -+ -+ b->whiteout_u64s = 0; -+ -+ u64s = bch2_sort_keys(i->start, &sort_iter.iter, false); -+ le16_add_cpu(&i->u64s, u64s); -+ -+ BUG_ON(!b->written && i->u64s != b->data->keys.u64s); -+ -+ set_needs_whiteout(i, false); -+ -+ /* do we have data to write? */ -+ if (b->written && !i->u64s) -+ goto nowrite; -+ -+ bytes_to_write = vstruct_end(i) - data; -+ sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9; -+ -+ if (!b->written && -+ b->key.k.type == KEY_TYPE_btree_ptr_v2) -+ BUG_ON(btree_ptr_sectors_written(&b->key) != sectors_to_write); -+ -+ memset(data + bytes_to_write, 0, -+ (sectors_to_write << 9) - bytes_to_write); -+ -+ BUG_ON(b->written + sectors_to_write > btree_sectors(c)); -+ BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN); -+ BUG_ON(i->seq != b->data->keys.seq); -+ -+ i->version = cpu_to_le16(c->sb.version); -+ SET_BSET_OFFSET(i, b->written); -+ SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c)); -+ -+ if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i))) -+ validate_before_checksum = true; -+ -+ /* validate_bset will be modifying: */ -+ if (le16_to_cpu(i->version) < bcachefs_metadata_version_current) -+ validate_before_checksum = true; -+ -+ /* if we're going to be encrypting, check metadata validity first: */ -+ if (validate_before_checksum && -+ validate_bset_for_write(c, b, i, sectors_to_write)) -+ goto err; -+ -+ ret = bset_encrypt(c, i, b->written << 9); -+ if (bch2_fs_fatal_err_on(ret, c, -+ "error encrypting btree node: %i\n", ret)) -+ goto err; -+ -+ nonce = btree_nonce(i, b->written << 9); -+ -+ if (bn) -+ bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn); -+ else -+ bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); -+ -+ /* if we're not encrypting, check metadata after checksumming: */ -+ if (!validate_before_checksum && -+ validate_bset_for_write(c, b, i, sectors_to_write)) -+ goto err; -+ -+ /* -+ * We handle btree write errors by immediately halting the journal - -+ * after we've done that, we can't issue any subsequent btree writes -+ * because they might have pointers to new nodes that failed to write. -+ * -+ * Furthermore, there's no point in doing any more btree writes because -+ * with the journal stopped, we're never going to update the journal to -+ * reflect that those writes were done and the data flushed from the -+ * journal: -+ * -+ * Also on journal error, the pending write may have updates that were -+ * never journalled (interior nodes, see btree_update_nodes_written()) - -+ * it's critical that we don't do the write in that case otherwise we -+ * will have updates visible that weren't in the journal: -+ * -+ * Make sure to update b->written so bch2_btree_init_next() doesn't -+ * break: -+ */ -+ if (bch2_journal_error(&c->journal) || -+ c->opts.nochanges) -+ goto err; -+ -+ trace_and_count(c, btree_node_write, b, bytes_to_write, sectors_to_write); -+ -+ wbio = container_of(bio_alloc_bioset(NULL, -+ buf_pages(data, sectors_to_write << 9), -+ REQ_OP_WRITE|REQ_META, -+ GFP_NOFS, -+ &c->btree_bio), -+ struct btree_write_bio, wbio.bio); -+ wbio_init(&wbio->wbio.bio); -+ wbio->data = data; -+ wbio->data_bytes = bytes; -+ wbio->sector_offset = b->written; -+ wbio->wbio.c = c; -+ wbio->wbio.used_mempool = used_mempool; -+ wbio->wbio.first_btree_write = !b->written; -+ wbio->wbio.bio.bi_end_io = btree_node_write_endio; -+ wbio->wbio.bio.bi_private = b; -+ -+ bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9); -+ -+ bkey_copy(&wbio->key, &b->key); -+ -+ b->written += sectors_to_write; -+ -+ if (wbio->key.k.type == KEY_TYPE_btree_ptr_v2) -+ bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written = -+ cpu_to_le16(b->written); -+ -+ atomic64_inc(&c->btree_write_stats[type].nr); -+ atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes); -+ -+ INIT_WORK(&wbio->work, btree_write_submit); -+ queue_work(c->io_complete_wq, &wbio->work); -+ return; -+err: -+ set_btree_node_noevict(b); -+ b->written += sectors_to_write; -+nowrite: -+ btree_bounce_free(c, bytes, used_mempool, data); -+ __btree_node_write_done(c, b); -+} -+ -+/* -+ * Work that must be done with write lock held: -+ */ -+bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) -+{ -+ bool invalidated_iter = false; -+ struct btree_node_entry *bne; -+ struct bset_tree *t; -+ -+ if (!btree_node_just_written(b)) -+ return false; -+ -+ BUG_ON(b->whiteout_u64s); -+ -+ clear_btree_node_just_written(b); -+ -+ /* -+ * Note: immediately after write, bset_written() doesn't work - the -+ * amount of data we had to write after compaction might have been -+ * smaller than the offset of the last bset. -+ * -+ * However, we know that all bsets have been written here, as long as -+ * we're still holding the write lock: -+ */ -+ -+ /* -+ * XXX: decide if we really want to unconditionally sort down to a -+ * single bset: -+ */ -+ if (b->nsets > 1) { -+ btree_node_sort(c, b, 0, b->nsets, true); -+ invalidated_iter = true; -+ } else { -+ invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL); -+ } -+ -+ for_each_bset(b, t) -+ set_needs_whiteout(bset(b, t), true); -+ -+ bch2_btree_verify(c, b); -+ -+ /* -+ * If later we don't unconditionally sort down to a single bset, we have -+ * to ensure this is still true: -+ */ -+ BUG_ON((void *) btree_bkey_last(b, bset_tree_last(b)) > write_block(b)); -+ -+ bne = want_new_bset(c, b); -+ if (bne) -+ bch2_bset_init_next(c, b, bne); -+ -+ bch2_btree_build_aux_trees(b); -+ -+ return invalidated_iter; -+} -+ -+/* -+ * Use this one if the node is intent locked: -+ */ -+void bch2_btree_node_write(struct bch_fs *c, struct btree *b, -+ enum six_lock_type lock_type_held, -+ unsigned flags) -+{ -+ if (lock_type_held == SIX_LOCK_intent || -+ (lock_type_held == SIX_LOCK_read && -+ six_lock_tryupgrade(&b->c.lock))) { -+ __bch2_btree_node_write(c, b, flags); -+ -+ /* don't cycle lock unnecessarily: */ -+ if (btree_node_just_written(b) && -+ six_trylock_write(&b->c.lock)) { -+ bch2_btree_post_write_cleanup(c, b); -+ six_unlock_write(&b->c.lock); -+ } -+ -+ if (lock_type_held == SIX_LOCK_read) -+ six_lock_downgrade(&b->c.lock); -+ } else { -+ __bch2_btree_node_write(c, b, flags); -+ if (lock_type_held == SIX_LOCK_write && -+ btree_node_just_written(b)) -+ bch2_btree_post_write_cleanup(c, b); -+ } -+} -+ -+static bool __bch2_btree_flush_all(struct bch_fs *c, unsigned flag) -+{ -+ struct bucket_table *tbl; -+ struct rhash_head *pos; -+ struct btree *b; -+ unsigned i; -+ bool ret = false; -+restart: -+ rcu_read_lock(); -+ for_each_cached_btree(b, c, tbl, i, pos) -+ if (test_bit(flag, &b->flags)) { -+ rcu_read_unlock(); -+ wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE); -+ ret = true; -+ goto restart; -+ } -+ rcu_read_unlock(); -+ -+ return ret; -+} -+ -+bool bch2_btree_flush_all_reads(struct bch_fs *c) -+{ -+ return __bch2_btree_flush_all(c, BTREE_NODE_read_in_flight); -+} -+ -+bool bch2_btree_flush_all_writes(struct bch_fs *c) -+{ -+ return __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight); -+} -+ -+static const char * const bch2_btree_write_types[] = { -+#define x(t, n) [n] = #t, -+ BCH_BTREE_WRITE_TYPES() -+ NULL -+}; -+ -+void bch2_btree_write_stats_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ printbuf_tabstop_push(out, 20); -+ printbuf_tabstop_push(out, 10); -+ -+ prt_tab(out); -+ prt_str(out, "nr"); -+ prt_tab(out); -+ prt_str(out, "size"); -+ prt_newline(out); -+ -+ for (unsigned i = 0; i < BTREE_WRITE_TYPE_NR; i++) { -+ u64 nr = atomic64_read(&c->btree_write_stats[i].nr); -+ u64 bytes = atomic64_read(&c->btree_write_stats[i].bytes); -+ -+ prt_printf(out, "%s:", bch2_btree_write_types[i]); -+ prt_tab(out); -+ prt_u64(out, nr); -+ prt_tab(out); -+ prt_human_readable_u64(out, nr ? div64_u64(bytes, nr) : 0); -+ prt_newline(out); -+ } -+} -diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h -new file mode 100644 -index 000000000000..7e03dd76fb38 ---- /dev/null -+++ b/fs/bcachefs/btree_io.h -@@ -0,0 +1,228 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BTREE_IO_H -+#define _BCACHEFS_BTREE_IO_H -+ -+#include "bkey_methods.h" -+#include "bset.h" -+#include "btree_locking.h" -+#include "checksum.h" -+#include "extents.h" -+#include "io_write_types.h" -+ -+struct bch_fs; -+struct btree_write; -+struct btree; -+struct btree_iter; -+struct btree_node_read_all; -+ -+static inline void set_btree_node_dirty_acct(struct bch_fs *c, struct btree *b) -+{ -+ if (!test_and_set_bit(BTREE_NODE_dirty, &b->flags)) -+ atomic_inc(&c->btree_cache.dirty); -+} -+ -+static inline void clear_btree_node_dirty_acct(struct bch_fs *c, struct btree *b) -+{ -+ if (test_and_clear_bit(BTREE_NODE_dirty, &b->flags)) -+ atomic_dec(&c->btree_cache.dirty); -+} -+ -+static inline unsigned btree_ptr_sectors_written(struct bkey_i *k) -+{ -+ return k->k.type == KEY_TYPE_btree_ptr_v2 -+ ? le16_to_cpu(bkey_i_to_btree_ptr_v2(k)->v.sectors_written) -+ : 0; -+} -+ -+struct btree_read_bio { -+ struct bch_fs *c; -+ struct btree *b; -+ struct btree_node_read_all *ra; -+ u64 start_time; -+ unsigned have_ioref:1; -+ unsigned idx:7; -+ struct extent_ptr_decoded pick; -+ struct work_struct work; -+ struct bio bio; -+}; -+ -+struct btree_write_bio { -+ struct work_struct work; -+ __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); -+ void *data; -+ unsigned data_bytes; -+ unsigned sector_offset; -+ struct bch_write_bio wbio; -+}; -+ -+void bch2_btree_node_io_unlock(struct btree *); -+void bch2_btree_node_io_lock(struct btree *); -+void __bch2_btree_node_wait_on_read(struct btree *); -+void __bch2_btree_node_wait_on_write(struct btree *); -+void bch2_btree_node_wait_on_read(struct btree *); -+void bch2_btree_node_wait_on_write(struct btree *); -+ -+enum compact_mode { -+ COMPACT_LAZY, -+ COMPACT_ALL, -+}; -+ -+bool bch2_compact_whiteouts(struct bch_fs *, struct btree *, -+ enum compact_mode); -+ -+static inline bool should_compact_bset_lazy(struct btree *b, -+ struct bset_tree *t) -+{ -+ unsigned total_u64s = bset_u64s(t); -+ unsigned dead_u64s = bset_dead_u64s(b, t); -+ -+ return dead_u64s > 64 && dead_u64s * 3 > total_u64s; -+} -+ -+static inline bool bch2_maybe_compact_whiteouts(struct bch_fs *c, struct btree *b) -+{ -+ struct bset_tree *t; -+ -+ for_each_bset(b, t) -+ if (should_compact_bset_lazy(b, t)) -+ return bch2_compact_whiteouts(c, b, COMPACT_LAZY); -+ -+ return false; -+} -+ -+static inline struct nonce btree_nonce(struct bset *i, unsigned offset) -+{ -+ return (struct nonce) {{ -+ [0] = cpu_to_le32(offset), -+ [1] = ((__le32 *) &i->seq)[0], -+ [2] = ((__le32 *) &i->seq)[1], -+ [3] = ((__le32 *) &i->journal_seq)[0]^BCH_NONCE_BTREE, -+ }}; -+} -+ -+static inline int bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset) -+{ -+ struct nonce nonce = btree_nonce(i, offset); -+ int ret; -+ -+ if (!offset) { -+ struct btree_node *bn = container_of(i, struct btree_node, keys); -+ unsigned bytes = (void *) &bn->keys - (void *) &bn->flags; -+ -+ ret = bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, -+ &bn->flags, bytes); -+ if (ret) -+ return ret; -+ -+ nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE)); -+ } -+ -+ return bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data, -+ vstruct_end(i) - (void *) i->_data); -+} -+ -+void bch2_btree_sort_into(struct bch_fs *, struct btree *, struct btree *); -+ -+void bch2_btree_node_drop_keys_outside_node(struct btree *); -+ -+void bch2_btree_build_aux_trees(struct btree *); -+void bch2_btree_init_next(struct btree_trans *, struct btree *); -+ -+int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *, -+ struct btree *, bool, bool *); -+void bch2_btree_node_read(struct bch_fs *, struct btree *, bool); -+int bch2_btree_root_read(struct bch_fs *, enum btree_id, -+ const struct bkey_i *, unsigned); -+ -+void bch2_btree_complete_write(struct bch_fs *, struct btree *, -+ struct btree_write *); -+ -+bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *); -+ -+enum btree_write_flags { -+ __BTREE_WRITE_ONLY_IF_NEED = BTREE_WRITE_TYPE_BITS, -+ __BTREE_WRITE_ALREADY_STARTED, -+}; -+#define BTREE_WRITE_ONLY_IF_NEED BIT(__BTREE_WRITE_ONLY_IF_NEED) -+#define BTREE_WRITE_ALREADY_STARTED BIT(__BTREE_WRITE_ALREADY_STARTED) -+ -+void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned); -+void bch2_btree_node_write(struct bch_fs *, struct btree *, -+ enum six_lock_type, unsigned); -+ -+static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b, -+ enum six_lock_type lock_held) -+{ -+ bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED); -+} -+ -+bool bch2_btree_flush_all_reads(struct bch_fs *); -+bool bch2_btree_flush_all_writes(struct bch_fs *); -+ -+static inline void compat_bformat(unsigned level, enum btree_id btree_id, -+ unsigned version, unsigned big_endian, -+ int write, struct bkey_format *f) -+{ -+ if (version < bcachefs_metadata_version_inode_btree_change && -+ btree_id == BTREE_ID_inodes) { -+ swap(f->bits_per_field[BKEY_FIELD_INODE], -+ f->bits_per_field[BKEY_FIELD_OFFSET]); -+ swap(f->field_offset[BKEY_FIELD_INODE], -+ f->field_offset[BKEY_FIELD_OFFSET]); -+ } -+ -+ if (version < bcachefs_metadata_version_snapshot && -+ (level || btree_type_has_snapshots(btree_id))) { -+ u64 max_packed = -+ ~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]); -+ -+ f->field_offset[BKEY_FIELD_SNAPSHOT] = write -+ ? 0 -+ : cpu_to_le64(U32_MAX - max_packed); -+ } -+} -+ -+static inline void compat_bpos(unsigned level, enum btree_id btree_id, -+ unsigned version, unsigned big_endian, -+ int write, struct bpos *p) -+{ -+ if (big_endian != CPU_BIG_ENDIAN) -+ bch2_bpos_swab(p); -+ -+ if (version < bcachefs_metadata_version_inode_btree_change && -+ btree_id == BTREE_ID_inodes) -+ swap(p->inode, p->offset); -+} -+ -+static inline void compat_btree_node(unsigned level, enum btree_id btree_id, -+ unsigned version, unsigned big_endian, -+ int write, -+ struct btree_node *bn) -+{ -+ if (version < bcachefs_metadata_version_inode_btree_change && -+ btree_id_is_extents(btree_id) && -+ !bpos_eq(bn->min_key, POS_MIN) && -+ write) -+ bn->min_key = bpos_nosnap_predecessor(bn->min_key); -+ -+ if (version < bcachefs_metadata_version_snapshot && -+ write) -+ bn->max_key.snapshot = 0; -+ -+ compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key); -+ compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key); -+ -+ if (version < bcachefs_metadata_version_snapshot && -+ !write) -+ bn->max_key.snapshot = U32_MAX; -+ -+ if (version < bcachefs_metadata_version_inode_btree_change && -+ btree_id_is_extents(btree_id) && -+ !bpos_eq(bn->min_key, POS_MIN) && -+ !write) -+ bn->min_key = bpos_nosnap_successor(bn->min_key); -+} -+ -+void bch2_btree_write_stats_to_text(struct printbuf *, struct bch_fs *); -+ -+#endif /* _BCACHEFS_BTREE_IO_H */ -diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c -new file mode 100644 -index 000000000000..c2adf3fbb0b3 ---- /dev/null -+++ b/fs/bcachefs/btree_iter.c -@@ -0,0 +1,3242 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "bkey_methods.h" -+#include "bkey_buf.h" -+#include "btree_cache.h" -+#include "btree_iter.h" -+#include "btree_journal_iter.h" -+#include "btree_key_cache.h" -+#include "btree_locking.h" -+#include "btree_update.h" -+#include "debug.h" -+#include "error.h" -+#include "extents.h" -+#include "journal.h" -+#include "replicas.h" -+#include "snapshot.h" -+#include "trace.h" -+ -+#include -+#include -+ -+static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *); -+static inline void btree_path_list_add(struct btree_trans *, struct btree_path *, -+ struct btree_path *); -+ -+static inline unsigned long btree_iter_ip_allocated(struct btree_iter *iter) -+{ -+#ifdef TRACK_PATH_ALLOCATED -+ return iter->ip_allocated; -+#else -+ return 0; -+#endif -+} -+ -+static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_path *); -+ -+static inline int __btree_path_cmp(const struct btree_path *l, -+ enum btree_id r_btree_id, -+ bool r_cached, -+ struct bpos r_pos, -+ unsigned r_level) -+{ -+ /* -+ * Must match lock ordering as defined by __bch2_btree_node_lock: -+ */ -+ return cmp_int(l->btree_id, r_btree_id) ?: -+ cmp_int((int) l->cached, (int) r_cached) ?: -+ bpos_cmp(l->pos, r_pos) ?: -+ -cmp_int(l->level, r_level); -+} -+ -+static inline int btree_path_cmp(const struct btree_path *l, -+ const struct btree_path *r) -+{ -+ return __btree_path_cmp(l, r->btree_id, r->cached, r->pos, r->level); -+} -+ -+static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p) -+{ -+ /* Are we iterating over keys in all snapshots? */ -+ if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) { -+ p = bpos_successor(p); -+ } else { -+ p = bpos_nosnap_successor(p); -+ p.snapshot = iter->snapshot; -+ } -+ -+ return p; -+} -+ -+static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p) -+{ -+ /* Are we iterating over keys in all snapshots? */ -+ if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) { -+ p = bpos_predecessor(p); -+ } else { -+ p = bpos_nosnap_predecessor(p); -+ p.snapshot = iter->snapshot; -+ } -+ -+ return p; -+} -+ -+static inline struct bpos btree_iter_search_key(struct btree_iter *iter) -+{ -+ struct bpos pos = iter->pos; -+ -+ if ((iter->flags & BTREE_ITER_IS_EXTENTS) && -+ !bkey_eq(pos, POS_MAX)) -+ pos = bkey_successor(iter, pos); -+ return pos; -+} -+ -+static inline bool btree_path_pos_before_node(struct btree_path *path, -+ struct btree *b) -+{ -+ return bpos_lt(path->pos, b->data->min_key); -+} -+ -+static inline bool btree_path_pos_after_node(struct btree_path *path, -+ struct btree *b) -+{ -+ return bpos_gt(path->pos, b->key.k.p); -+} -+ -+static inline bool btree_path_pos_in_node(struct btree_path *path, -+ struct btree *b) -+{ -+ return path->btree_id == b->c.btree_id && -+ !btree_path_pos_before_node(path, b) && -+ !btree_path_pos_after_node(path, b); -+} -+ -+/* Btree iterator: */ -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ -+static void bch2_btree_path_verify_cached(struct btree_trans *trans, -+ struct btree_path *path) -+{ -+ struct bkey_cached *ck; -+ bool locked = btree_node_locked(path, 0); -+ -+ if (!bch2_btree_node_relock(trans, path, 0)) -+ return; -+ -+ ck = (void *) path->l[0].b; -+ BUG_ON(ck->key.btree_id != path->btree_id || -+ !bkey_eq(ck->key.pos, path->pos)); -+ -+ if (!locked) -+ btree_node_unlock(trans, path, 0); -+} -+ -+static void bch2_btree_path_verify_level(struct btree_trans *trans, -+ struct btree_path *path, unsigned level) -+{ -+ struct btree_path_level *l; -+ struct btree_node_iter tmp; -+ bool locked; -+ struct bkey_packed *p, *k; -+ struct printbuf buf1 = PRINTBUF; -+ struct printbuf buf2 = PRINTBUF; -+ struct printbuf buf3 = PRINTBUF; -+ const char *msg; -+ -+ if (!bch2_debug_check_iterators) -+ return; -+ -+ l = &path->l[level]; -+ tmp = l->iter; -+ locked = btree_node_locked(path, level); -+ -+ if (path->cached) { -+ if (!level) -+ bch2_btree_path_verify_cached(trans, path); -+ return; -+ } -+ -+ if (!btree_path_node(path, level)) -+ return; -+ -+ if (!bch2_btree_node_relock_notrace(trans, path, level)) -+ return; -+ -+ BUG_ON(!btree_path_pos_in_node(path, l->b)); -+ -+ bch2_btree_node_iter_verify(&l->iter, l->b); -+ -+ /* -+ * For interior nodes, the iterator will have skipped past deleted keys: -+ */ -+ p = level -+ ? bch2_btree_node_iter_prev(&tmp, l->b) -+ : bch2_btree_node_iter_prev_all(&tmp, l->b); -+ k = bch2_btree_node_iter_peek_all(&l->iter, l->b); -+ -+ if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) { -+ msg = "before"; -+ goto err; -+ } -+ -+ if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) { -+ msg = "after"; -+ goto err; -+ } -+ -+ if (!locked) -+ btree_node_unlock(trans, path, level); -+ return; -+err: -+ bch2_bpos_to_text(&buf1, path->pos); -+ -+ if (p) { -+ struct bkey uk = bkey_unpack_key(l->b, p); -+ -+ bch2_bkey_to_text(&buf2, &uk); -+ } else { -+ prt_printf(&buf2, "(none)"); -+ } -+ -+ if (k) { -+ struct bkey uk = bkey_unpack_key(l->b, k); -+ -+ bch2_bkey_to_text(&buf3, &uk); -+ } else { -+ prt_printf(&buf3, "(none)"); -+ } -+ -+ panic("path should be %s key at level %u:\n" -+ "path pos %s\n" -+ "prev key %s\n" -+ "cur key %s\n", -+ msg, level, buf1.buf, buf2.buf, buf3.buf); -+} -+ -+static void bch2_btree_path_verify(struct btree_trans *trans, -+ struct btree_path *path) -+{ -+ struct bch_fs *c = trans->c; -+ unsigned i; -+ -+ EBUG_ON(path->btree_id >= BTREE_ID_NR); -+ -+ for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) { -+ if (!path->l[i].b) { -+ BUG_ON(!path->cached && -+ bch2_btree_id_root(c, path->btree_id)->b->c.level > i); -+ break; -+ } -+ -+ bch2_btree_path_verify_level(trans, path, i); -+ } -+ -+ bch2_btree_path_verify_locks(path); -+} -+ -+void bch2_trans_verify_paths(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ bch2_btree_path_verify(trans, path); -+} -+ -+static void bch2_btree_iter_verify(struct btree_iter *iter) -+{ -+ struct btree_trans *trans = iter->trans; -+ -+ BUG_ON(iter->btree_id >= BTREE_ID_NR); -+ -+ BUG_ON(!!(iter->flags & BTREE_ITER_CACHED) != iter->path->cached); -+ -+ BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) && -+ (iter->flags & BTREE_ITER_ALL_SNAPSHOTS)); -+ -+ BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) && -+ (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && -+ !btree_type_has_snapshot_field(iter->btree_id)); -+ -+ if (iter->update_path) -+ bch2_btree_path_verify(trans, iter->update_path); -+ bch2_btree_path_verify(trans, iter->path); -+} -+ -+static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) -+{ -+ BUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && -+ !iter->pos.snapshot); -+ -+ BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && -+ iter->pos.snapshot != iter->snapshot); -+ -+ BUG_ON(bkey_lt(iter->pos, bkey_start_pos(&iter->k)) || -+ bkey_gt(iter->pos, iter->k.p)); -+} -+ -+static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) -+{ -+ struct btree_trans *trans = iter->trans; -+ struct btree_iter copy; -+ struct bkey_s_c prev; -+ int ret = 0; -+ -+ if (!bch2_debug_check_iterators) -+ return 0; -+ -+ if (!(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) -+ return 0; -+ -+ if (bkey_err(k) || !k.k) -+ return 0; -+ -+ BUG_ON(!bch2_snapshot_is_ancestor(trans->c, -+ iter->snapshot, -+ k.k->p.snapshot)); -+ -+ bch2_trans_iter_init(trans, ©, iter->btree_id, iter->pos, -+ BTREE_ITER_NOPRESERVE| -+ BTREE_ITER_ALL_SNAPSHOTS); -+ prev = bch2_btree_iter_prev(©); -+ if (!prev.k) -+ goto out; -+ -+ ret = bkey_err(prev); -+ if (ret) -+ goto out; -+ -+ if (bkey_eq(prev.k->p, k.k->p) && -+ bch2_snapshot_is_ancestor(trans->c, iter->snapshot, -+ prev.k->p.snapshot) > 0) { -+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; -+ -+ bch2_bkey_to_text(&buf1, k.k); -+ bch2_bkey_to_text(&buf2, prev.k); -+ -+ panic("iter snap %u\n" -+ "k %s\n" -+ "prev %s\n", -+ iter->snapshot, -+ buf1.buf, buf2.buf); -+ } -+out: -+ bch2_trans_iter_exit(trans, ©); -+ return ret; -+} -+ -+void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, -+ struct bpos pos, bool key_cache) -+{ -+ struct btree_path *path; -+ unsigned idx; -+ struct printbuf buf = PRINTBUF; -+ -+ btree_trans_sort_paths(trans); -+ -+ trans_for_each_path_inorder(trans, path, idx) { -+ int cmp = cmp_int(path->btree_id, id) ?: -+ cmp_int(path->cached, key_cache); -+ -+ if (cmp > 0) -+ break; -+ if (cmp < 0) -+ continue; -+ -+ if (!btree_node_locked(path, 0) || -+ !path->should_be_locked) -+ continue; -+ -+ if (!key_cache) { -+ if (bkey_ge(pos, path->l[0].b->data->min_key) && -+ bkey_le(pos, path->l[0].b->key.k.p)) -+ return; -+ } else { -+ if (bkey_eq(pos, path->pos)) -+ return; -+ } -+ } -+ -+ bch2_dump_trans_paths_updates(trans); -+ bch2_bpos_to_text(&buf, pos); -+ -+ panic("not locked: %s %s%s\n", -+ bch2_btree_id_str(id), buf.buf, -+ key_cache ? " cached" : ""); -+} -+ -+#else -+ -+static inline void bch2_btree_path_verify_level(struct btree_trans *trans, -+ struct btree_path *path, unsigned l) {} -+static inline void bch2_btree_path_verify(struct btree_trans *trans, -+ struct btree_path *path) {} -+static inline void bch2_btree_iter_verify(struct btree_iter *iter) {} -+static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {} -+static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; } -+ -+#endif -+ -+/* Btree path: fixups after btree updates */ -+ -+static void btree_node_iter_set_set_pos(struct btree_node_iter *iter, -+ struct btree *b, -+ struct bset_tree *t, -+ struct bkey_packed *k) -+{ -+ struct btree_node_iter_set *set; -+ -+ btree_node_iter_for_each(iter, set) -+ if (set->end == t->end_offset) { -+ set->k = __btree_node_key_to_offset(b, k); -+ bch2_btree_node_iter_sort(iter, b); -+ return; -+ } -+ -+ bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t)); -+} -+ -+static void __bch2_btree_path_fix_key_modified(struct btree_path *path, -+ struct btree *b, -+ struct bkey_packed *where) -+{ -+ struct btree_path_level *l = &path->l[b->c.level]; -+ -+ if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b)) -+ return; -+ -+ if (bkey_iter_pos_cmp(l->b, where, &path->pos) < 0) -+ bch2_btree_node_iter_advance(&l->iter, l->b); -+} -+ -+void bch2_btree_path_fix_key_modified(struct btree_trans *trans, -+ struct btree *b, -+ struct bkey_packed *where) -+{ -+ struct btree_path *path; -+ -+ trans_for_each_path_with_node(trans, b, path) { -+ __bch2_btree_path_fix_key_modified(path, b, where); -+ bch2_btree_path_verify_level(trans, path, b->c.level); -+ } -+} -+ -+static void __bch2_btree_node_iter_fix(struct btree_path *path, -+ struct btree *b, -+ struct btree_node_iter *node_iter, -+ struct bset_tree *t, -+ struct bkey_packed *where, -+ unsigned clobber_u64s, -+ unsigned new_u64s) -+{ -+ const struct bkey_packed *end = btree_bkey_last(b, t); -+ struct btree_node_iter_set *set; -+ unsigned offset = __btree_node_key_to_offset(b, where); -+ int shift = new_u64s - clobber_u64s; -+ unsigned old_end = t->end_offset - shift; -+ unsigned orig_iter_pos = node_iter->data[0].k; -+ bool iter_current_key_modified = -+ orig_iter_pos >= offset && -+ orig_iter_pos <= offset + clobber_u64s; -+ -+ btree_node_iter_for_each(node_iter, set) -+ if (set->end == old_end) -+ goto found; -+ -+ /* didn't find the bset in the iterator - might have to readd it: */ -+ if (new_u64s && -+ bkey_iter_pos_cmp(b, where, &path->pos) >= 0) { -+ bch2_btree_node_iter_push(node_iter, b, where, end); -+ goto fixup_done; -+ } else { -+ /* Iterator is after key that changed */ -+ return; -+ } -+found: -+ set->end = t->end_offset; -+ -+ /* Iterator hasn't gotten to the key that changed yet: */ -+ if (set->k < offset) -+ return; -+ -+ if (new_u64s && -+ bkey_iter_pos_cmp(b, where, &path->pos) >= 0) { -+ set->k = offset; -+ } else if (set->k < offset + clobber_u64s) { -+ set->k = offset + new_u64s; -+ if (set->k == set->end) -+ bch2_btree_node_iter_set_drop(node_iter, set); -+ } else { -+ /* Iterator is after key that changed */ -+ set->k = (int) set->k + shift; -+ return; -+ } -+ -+ bch2_btree_node_iter_sort(node_iter, b); -+fixup_done: -+ if (node_iter->data[0].k != orig_iter_pos) -+ iter_current_key_modified = true; -+ -+ /* -+ * When a new key is added, and the node iterator now points to that -+ * key, the iterator might have skipped past deleted keys that should -+ * come after the key the iterator now points to. We have to rewind to -+ * before those deleted keys - otherwise -+ * bch2_btree_node_iter_prev_all() breaks: -+ */ -+ if (!bch2_btree_node_iter_end(node_iter) && -+ iter_current_key_modified && -+ b->c.level) { -+ struct bkey_packed *k, *k2, *p; -+ -+ k = bch2_btree_node_iter_peek_all(node_iter, b); -+ -+ for_each_bset(b, t) { -+ bool set_pos = false; -+ -+ if (node_iter->data[0].end == t->end_offset) -+ continue; -+ -+ k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t); -+ -+ while ((p = bch2_bkey_prev_all(b, t, k2)) && -+ bkey_iter_cmp(b, k, p) < 0) { -+ k2 = p; -+ set_pos = true; -+ } -+ -+ if (set_pos) -+ btree_node_iter_set_set_pos(node_iter, -+ b, t, k2); -+ } -+ } -+} -+ -+void bch2_btree_node_iter_fix(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree *b, -+ struct btree_node_iter *node_iter, -+ struct bkey_packed *where, -+ unsigned clobber_u64s, -+ unsigned new_u64s) -+{ -+ struct bset_tree *t = bch2_bkey_to_bset_inlined(b, where); -+ struct btree_path *linked; -+ -+ if (node_iter != &path->l[b->c.level].iter) { -+ __bch2_btree_node_iter_fix(path, b, node_iter, t, -+ where, clobber_u64s, new_u64s); -+ -+ if (bch2_debug_check_iterators) -+ bch2_btree_node_iter_verify(node_iter, b); -+ } -+ -+ trans_for_each_path_with_node(trans, b, linked) { -+ __bch2_btree_node_iter_fix(linked, b, -+ &linked->l[b->c.level].iter, t, -+ where, clobber_u64s, new_u64s); -+ bch2_btree_path_verify_level(trans, linked, b->c.level); -+ } -+} -+ -+/* Btree path level: pointer to a particular btree node and node iter */ -+ -+static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c, -+ struct btree_path_level *l, -+ struct bkey *u, -+ struct bkey_packed *k) -+{ -+ if (unlikely(!k)) { -+ /* -+ * signal to bch2_btree_iter_peek_slot() that we're currently at -+ * a hole -+ */ -+ u->type = KEY_TYPE_deleted; -+ return bkey_s_c_null; -+ } -+ -+ return bkey_disassemble(l->b, k, u); -+} -+ -+static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c, -+ struct btree_path_level *l, -+ struct bkey *u) -+{ -+ return __btree_iter_unpack(c, l, u, -+ bch2_btree_node_iter_peek_all(&l->iter, l->b)); -+} -+ -+static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree_path_level *l, -+ struct bkey *u) -+{ -+ struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u, -+ bch2_btree_node_iter_peek(&l->iter, l->b)); -+ -+ path->pos = k.k ? k.k->p : l->b->key.k.p; -+ trans->paths_sorted = false; -+ bch2_btree_path_verify_level(trans, path, l - path->l); -+ return k; -+} -+ -+static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree_path_level *l, -+ struct bkey *u) -+{ -+ struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u, -+ bch2_btree_node_iter_prev(&l->iter, l->b)); -+ -+ path->pos = k.k ? k.k->p : l->b->data->min_key; -+ trans->paths_sorted = false; -+ bch2_btree_path_verify_level(trans, path, l - path->l); -+ return k; -+} -+ -+static inline bool btree_path_advance_to_pos(struct btree_path *path, -+ struct btree_path_level *l, -+ int max_advance) -+{ -+ struct bkey_packed *k; -+ int nr_advanced = 0; -+ -+ while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) && -+ bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) { -+ if (max_advance > 0 && nr_advanced >= max_advance) -+ return false; -+ -+ bch2_btree_node_iter_advance(&l->iter, l->b); -+ nr_advanced++; -+ } -+ -+ return true; -+} -+ -+static inline void __btree_path_level_init(struct btree_path *path, -+ unsigned level) -+{ -+ struct btree_path_level *l = &path->l[level]; -+ -+ bch2_btree_node_iter_init(&l->iter, l->b, &path->pos); -+ -+ /* -+ * Iterators to interior nodes should always be pointed at the first non -+ * whiteout: -+ */ -+ if (level) -+ bch2_btree_node_iter_peek(&l->iter, l->b); -+} -+ -+void bch2_btree_path_level_init(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree *b) -+{ -+ BUG_ON(path->cached); -+ -+ EBUG_ON(!btree_path_pos_in_node(path, b)); -+ -+ path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock); -+ path->l[b->c.level].b = b; -+ __btree_path_level_init(path, b->c.level); -+} -+ -+/* Btree path: fixups after btree node updates: */ -+ -+static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, struct btree *b) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_insert_entry *i; -+ -+ trans_for_each_update(trans, i) -+ if (!i->cached && -+ i->level == b->c.level && -+ i->btree_id == b->c.btree_id && -+ bpos_cmp(i->k->k.p, b->data->min_key) >= 0 && -+ bpos_cmp(i->k->k.p, b->data->max_key) <= 0) { -+ i->old_v = bch2_btree_path_peek_slot(i->path, &i->old_k).v; -+ -+ if (unlikely(trans->journal_replay_not_finished)) { -+ struct bkey_i *j_k = -+ bch2_journal_keys_peek_slot(c, i->btree_id, i->level, -+ i->k->k.p); -+ -+ if (j_k) { -+ i->old_k = j_k->k; -+ i->old_v = &j_k->v; -+ } -+ } -+ } -+} -+ -+/* -+ * A btree node is being replaced - update the iterator to point to the new -+ * node: -+ */ -+void bch2_trans_node_add(struct btree_trans *trans, struct btree *b) -+{ -+ struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ if (path->uptodate == BTREE_ITER_UPTODATE && -+ !path->cached && -+ btree_path_pos_in_node(path, b)) { -+ enum btree_node_locked_type t = -+ btree_lock_want(path, b->c.level); -+ -+ if (t != BTREE_NODE_UNLOCKED) { -+ btree_node_unlock(trans, path, b->c.level); -+ six_lock_increment(&b->c.lock, (enum six_lock_type) t); -+ mark_btree_node_locked(trans, path, b->c.level, t); -+ } -+ -+ bch2_btree_path_level_init(trans, path, b); -+ } -+ -+ bch2_trans_revalidate_updates_in_node(trans, b); -+} -+ -+/* -+ * A btree node has been modified in such a way as to invalidate iterators - fix -+ * them: -+ */ -+void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b) -+{ -+ struct btree_path *path; -+ -+ trans_for_each_path_with_node(trans, b, path) -+ __btree_path_level_init(path, b->c.level); -+ -+ bch2_trans_revalidate_updates_in_node(trans, b); -+} -+ -+/* Btree path: traverse, set_pos: */ -+ -+static inline int btree_path_lock_root(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned depth_want, -+ unsigned long trace_ip) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree *b, **rootp = &bch2_btree_id_root(c, path->btree_id)->b; -+ enum six_lock_type lock_type; -+ unsigned i; -+ int ret; -+ -+ EBUG_ON(path->nodes_locked); -+ -+ while (1) { -+ b = READ_ONCE(*rootp); -+ path->level = READ_ONCE(b->c.level); -+ -+ if (unlikely(path->level < depth_want)) { -+ /* -+ * the root is at a lower depth than the depth we want: -+ * got to the end of the btree, or we're walking nodes -+ * greater than some depth and there are no nodes >= -+ * that depth -+ */ -+ path->level = depth_want; -+ for (i = path->level; i < BTREE_MAX_DEPTH; i++) -+ path->l[i].b = NULL; -+ return 1; -+ } -+ -+ lock_type = __btree_lock_want(path, path->level); -+ ret = btree_node_lock(trans, path, &b->c, -+ path->level, lock_type, trace_ip); -+ if (unlikely(ret)) { -+ if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed)) -+ continue; -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ return ret; -+ BUG(); -+ } -+ -+ if (likely(b == READ_ONCE(*rootp) && -+ b->c.level == path->level && -+ !race_fault())) { -+ for (i = 0; i < path->level; i++) -+ path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_lock_root); -+ path->l[path->level].b = b; -+ for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++) -+ path->l[i].b = NULL; -+ -+ mark_btree_node_locked(trans, path, path->level, -+ (enum btree_node_locked_type) lock_type); -+ bch2_btree_path_level_init(trans, path, b); -+ return 0; -+ } -+ -+ six_unlock_type(&b->c.lock, lock_type); -+ } -+} -+ -+noinline -+static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *path) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_path_level *l = path_l(path); -+ struct btree_node_iter node_iter = l->iter; -+ struct bkey_packed *k; -+ struct bkey_buf tmp; -+ unsigned nr = test_bit(BCH_FS_STARTED, &c->flags) -+ ? (path->level > 1 ? 0 : 2) -+ : (path->level > 1 ? 1 : 16); -+ bool was_locked = btree_node_locked(path, path->level); -+ int ret = 0; -+ -+ bch2_bkey_buf_init(&tmp); -+ -+ while (nr-- && !ret) { -+ if (!bch2_btree_node_relock(trans, path, path->level)) -+ break; -+ -+ bch2_btree_node_iter_advance(&node_iter, l->b); -+ k = bch2_btree_node_iter_peek(&node_iter, l->b); -+ if (!k) -+ break; -+ -+ bch2_bkey_buf_unpack(&tmp, c, l->b, k); -+ ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, -+ path->level - 1); -+ } -+ -+ if (!was_locked) -+ btree_node_unlock(trans, path, path->level); -+ -+ bch2_bkey_buf_exit(&tmp, c); -+ return ret; -+} -+ -+static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path, -+ struct btree_and_journal_iter *jiter) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_s_c k; -+ struct bkey_buf tmp; -+ unsigned nr = test_bit(BCH_FS_STARTED, &c->flags) -+ ? (path->level > 1 ? 0 : 2) -+ : (path->level > 1 ? 1 : 16); -+ bool was_locked = btree_node_locked(path, path->level); -+ int ret = 0; -+ -+ bch2_bkey_buf_init(&tmp); -+ -+ while (nr-- && !ret) { -+ if (!bch2_btree_node_relock(trans, path, path->level)) -+ break; -+ -+ bch2_btree_and_journal_iter_advance(jiter); -+ k = bch2_btree_and_journal_iter_peek(jiter); -+ if (!k.k) -+ break; -+ -+ bch2_bkey_buf_reassemble(&tmp, c, k); -+ ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, -+ path->level - 1); -+ } -+ -+ if (!was_locked) -+ btree_node_unlock(trans, path, path->level); -+ -+ bch2_bkey_buf_exit(&tmp, c); -+ return ret; -+} -+ -+static noinline void btree_node_mem_ptr_set(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned plevel, struct btree *b) -+{ -+ struct btree_path_level *l = &path->l[plevel]; -+ bool locked = btree_node_locked(path, plevel); -+ struct bkey_packed *k; -+ struct bch_btree_ptr_v2 *bp; -+ -+ if (!bch2_btree_node_relock(trans, path, plevel)) -+ return; -+ -+ k = bch2_btree_node_iter_peek_all(&l->iter, l->b); -+ BUG_ON(k->type != KEY_TYPE_btree_ptr_v2); -+ -+ bp = (void *) bkeyp_val(&l->b->format, k); -+ bp->mem_ptr = (unsigned long)b; -+ -+ if (!locked) -+ btree_node_unlock(trans, path, plevel); -+} -+ -+static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned flags, -+ struct bkey_buf *out) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_path_level *l = path_l(path); -+ struct btree_and_journal_iter jiter; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ __bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos); -+ -+ k = bch2_btree_and_journal_iter_peek(&jiter); -+ -+ bch2_bkey_buf_reassemble(out, c, k); -+ -+ if (flags & BTREE_ITER_PREFETCH) -+ ret = btree_path_prefetch_j(trans, path, &jiter); -+ -+ bch2_btree_and_journal_iter_exit(&jiter); -+ return ret; -+} -+ -+static __always_inline int btree_path_down(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned flags, -+ unsigned long trace_ip) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_path_level *l = path_l(path); -+ struct btree *b; -+ unsigned level = path->level - 1; -+ enum six_lock_type lock_type = __btree_lock_want(path, level); -+ struct bkey_buf tmp; -+ int ret; -+ -+ EBUG_ON(!btree_node_locked(path, path->level)); -+ -+ bch2_bkey_buf_init(&tmp); -+ -+ if (unlikely(trans->journal_replay_not_finished)) { -+ ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp); -+ if (ret) -+ goto err; -+ } else { -+ bch2_bkey_buf_unpack(&tmp, c, l->b, -+ bch2_btree_node_iter_peek(&l->iter, l->b)); -+ -+ if (flags & BTREE_ITER_PREFETCH) { -+ ret = btree_path_prefetch(trans, path); -+ if (ret) -+ goto err; -+ } -+ } -+ -+ b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip); -+ ret = PTR_ERR_OR_ZERO(b); -+ if (unlikely(ret)) -+ goto err; -+ -+ if (likely(!trans->journal_replay_not_finished && -+ tmp.k->k.type == KEY_TYPE_btree_ptr_v2) && -+ unlikely(b != btree_node_mem_ptr(tmp.k))) -+ btree_node_mem_ptr_set(trans, path, level + 1, b); -+ -+ if (btree_node_read_locked(path, level + 1)) -+ btree_node_unlock(trans, path, level + 1); -+ -+ mark_btree_node_locked(trans, path, level, -+ (enum btree_node_locked_type) lock_type); -+ path->level = level; -+ bch2_btree_path_level_init(trans, path, b); -+ -+ bch2_btree_path_verify_locks(path); -+err: -+ bch2_bkey_buf_exit(&tmp, c); -+ return ret; -+} -+ -+ -+static int bch2_btree_path_traverse_all(struct btree_trans *trans) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_path *path; -+ unsigned long trace_ip = _RET_IP_; -+ int i, ret = 0; -+ -+ if (trans->in_traverse_all) -+ return -BCH_ERR_transaction_restart_in_traverse_all; -+ -+ trans->in_traverse_all = true; -+retry_all: -+ trans->restarted = 0; -+ trans->last_restarted_ip = 0; -+ -+ trans_for_each_path(trans, path) -+ path->should_be_locked = false; -+ -+ btree_trans_sort_paths(trans); -+ -+ bch2_trans_unlock(trans); -+ cond_resched(); -+ -+ if (unlikely(trans->memory_allocation_failure)) { -+ struct closure cl; -+ -+ closure_init_stack(&cl); -+ -+ do { -+ ret = bch2_btree_cache_cannibalize_lock(c, &cl); -+ closure_sync(&cl); -+ } while (ret); -+ } -+ -+ /* Now, redo traversals in correct order: */ -+ i = 0; -+ while (i < trans->nr_sorted) { -+ path = trans->paths + trans->sorted[i]; -+ -+ /* -+ * Traversing a path can cause another path to be added at about -+ * the same position: -+ */ -+ if (path->uptodate) { -+ __btree_path_get(path, false); -+ ret = bch2_btree_path_traverse_one(trans, path, 0, _THIS_IP_); -+ __btree_path_put(path, false); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || -+ bch2_err_matches(ret, ENOMEM)) -+ goto retry_all; -+ if (ret) -+ goto err; -+ } else { -+ i++; -+ } -+ } -+ -+ /* -+ * We used to assert that all paths had been traversed here -+ * (path->uptodate < BTREE_ITER_NEED_TRAVERSE); however, since -+ * path->should_be_locked is not set yet, we might have unlocked and -+ * then failed to relock a path - that's fine. -+ */ -+err: -+ bch2_btree_cache_cannibalize_unlock(c); -+ -+ trans->in_traverse_all = false; -+ -+ trace_and_count(c, trans_traverse_all, trans, trace_ip); -+ return ret; -+} -+ -+static inline bool btree_path_check_pos_in_node(struct btree_path *path, -+ unsigned l, int check_pos) -+{ -+ if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b)) -+ return false; -+ if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b)) -+ return false; -+ return true; -+} -+ -+static inline bool btree_path_good_node(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned l, int check_pos) -+{ -+ return is_btree_node(path, l) && -+ bch2_btree_node_relock(trans, path, l) && -+ btree_path_check_pos_in_node(path, l, check_pos); -+} -+ -+static void btree_path_set_level_down(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned new_level) -+{ -+ unsigned l; -+ -+ path->level = new_level; -+ -+ for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++) -+ if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED) -+ btree_node_unlock(trans, path, l); -+ -+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); -+ bch2_btree_path_verify(trans, path); -+} -+ -+static noinline unsigned __btree_path_up_until_good_node(struct btree_trans *trans, -+ struct btree_path *path, -+ int check_pos) -+{ -+ unsigned i, l = path->level; -+again: -+ while (btree_path_node(path, l) && -+ !btree_path_good_node(trans, path, l, check_pos)) -+ __btree_path_set_level_up(trans, path, l++); -+ -+ /* If we need intent locks, take them too: */ -+ for (i = l + 1; -+ i < path->locks_want && btree_path_node(path, i); -+ i++) -+ if (!bch2_btree_node_relock(trans, path, i)) { -+ while (l <= i) -+ __btree_path_set_level_up(trans, path, l++); -+ goto again; -+ } -+ -+ return l; -+} -+ -+static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans, -+ struct btree_path *path, -+ int check_pos) -+{ -+ return likely(btree_node_locked(path, path->level) && -+ btree_path_check_pos_in_node(path, path->level, check_pos)) -+ ? path->level -+ : __btree_path_up_until_good_node(trans, path, check_pos); -+} -+ -+/* -+ * This is the main state machine for walking down the btree - walks down to a -+ * specified depth -+ * -+ * Returns 0 on success, -EIO on error (error reading in a btree node). -+ * -+ * On error, caller (peek_node()/peek_key()) must return NULL; the error is -+ * stashed in the iterator and returned from bch2_trans_exit(). -+ */ -+int bch2_btree_path_traverse_one(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned flags, -+ unsigned long trace_ip) -+{ -+ unsigned depth_want = path->level; -+ int ret = -((int) trans->restarted); -+ -+ if (unlikely(ret)) -+ goto out; -+ -+ if (unlikely(!trans->srcu_held)) -+ bch2_trans_srcu_lock(trans); -+ -+ /* -+ * Ensure we obey path->should_be_locked: if it's set, we can't unlock -+ * and re-traverse the path without a transaction restart: -+ */ -+ if (path->should_be_locked) { -+ ret = bch2_btree_path_relock(trans, path, trace_ip); -+ goto out; -+ } -+ -+ if (path->cached) { -+ ret = bch2_btree_path_traverse_cached(trans, path, flags); -+ goto out; -+ } -+ -+ if (unlikely(path->level >= BTREE_MAX_DEPTH)) -+ goto out; -+ -+ path->level = btree_path_up_until_good_node(trans, path, 0); -+ -+ EBUG_ON(btree_path_node(path, path->level) && -+ !btree_node_locked(path, path->level)); -+ -+ /* -+ * Note: path->nodes[path->level] may be temporarily NULL here - that -+ * would indicate to other code that we got to the end of the btree, -+ * here it indicates that relocking the root failed - it's critical that -+ * btree_path_lock_root() comes next and that it can't fail -+ */ -+ while (path->level > depth_want) { -+ ret = btree_path_node(path, path->level) -+ ? btree_path_down(trans, path, flags, trace_ip) -+ : btree_path_lock_root(trans, path, depth_want, trace_ip); -+ if (unlikely(ret)) { -+ if (ret == 1) { -+ /* -+ * No nodes at this level - got to the end of -+ * the btree: -+ */ -+ ret = 0; -+ goto out; -+ } -+ -+ __bch2_btree_path_unlock(trans, path); -+ path->level = depth_want; -+ path->l[path->level].b = ERR_PTR(ret); -+ goto out; -+ } -+ } -+ -+ path->uptodate = BTREE_ITER_UPTODATE; -+out: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted) -+ panic("ret %s (%i) trans->restarted %s (%i)\n", -+ bch2_err_str(ret), ret, -+ bch2_err_str(trans->restarted), trans->restarted); -+ bch2_btree_path_verify(trans, path); -+ return ret; -+} -+ -+static inline void btree_path_copy(struct btree_trans *trans, struct btree_path *dst, -+ struct btree_path *src) -+{ -+ unsigned i, offset = offsetof(struct btree_path, pos); -+ -+ memcpy((void *) dst + offset, -+ (void *) src + offset, -+ sizeof(struct btree_path) - offset); -+ -+ for (i = 0; i < BTREE_MAX_DEPTH; i++) { -+ unsigned t = btree_node_locked_type(dst, i); -+ -+ if (t != BTREE_NODE_UNLOCKED) -+ six_lock_increment(&dst->l[i].b->c.lock, t); -+ } -+} -+ -+static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src, -+ bool intent) -+{ -+ struct btree_path *new = btree_path_alloc(trans, src); -+ -+ btree_path_copy(trans, new, src); -+ __btree_path_get(new, intent); -+ return new; -+} -+ -+__flatten -+struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *trans, -+ struct btree_path *path, bool intent, -+ unsigned long ip) -+{ -+ __btree_path_put(path, intent); -+ path = btree_path_clone(trans, path, intent); -+ path->preserve = false; -+ return path; -+} -+ -+struct btree_path * __must_check -+__bch2_btree_path_set_pos(struct btree_trans *trans, -+ struct btree_path *path, struct bpos new_pos, -+ bool intent, unsigned long ip, int cmp) -+{ -+ unsigned level = path->level; -+ -+ bch2_trans_verify_not_in_restart(trans); -+ EBUG_ON(!path->ref); -+ -+ path = bch2_btree_path_make_mut(trans, path, intent, ip); -+ -+ path->pos = new_pos; -+ trans->paths_sorted = false; -+ -+ if (unlikely(path->cached)) { -+ btree_node_unlock(trans, path, 0); -+ path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_up); -+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); -+ goto out; -+ } -+ -+ level = btree_path_up_until_good_node(trans, path, cmp); -+ -+ if (btree_path_node(path, level)) { -+ struct btree_path_level *l = &path->l[level]; -+ -+ BUG_ON(!btree_node_locked(path, level)); -+ /* -+ * We might have to skip over many keys, or just a few: try -+ * advancing the node iterator, and if we have to skip over too -+ * many keys just reinit it (or if we're rewinding, since that -+ * is expensive). -+ */ -+ if (cmp < 0 || -+ !btree_path_advance_to_pos(path, l, 8)) -+ bch2_btree_node_iter_init(&l->iter, l->b, &path->pos); -+ -+ /* -+ * Iterators to interior nodes should always be pointed at the first non -+ * whiteout: -+ */ -+ if (unlikely(level)) -+ bch2_btree_node_iter_peek(&l->iter, l->b); -+ } -+ -+ if (unlikely(level != path->level)) { -+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); -+ __bch2_btree_path_unlock(trans, path); -+ } -+out: -+ bch2_btree_path_verify(trans, path); -+ return path; -+} -+ -+/* Btree path: main interface: */ -+ -+static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path) -+{ -+ struct btree_path *sib; -+ -+ sib = prev_btree_path(trans, path); -+ if (sib && !btree_path_cmp(sib, path)) -+ return sib; -+ -+ sib = next_btree_path(trans, path); -+ if (sib && !btree_path_cmp(sib, path)) -+ return sib; -+ -+ return NULL; -+} -+ -+static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path) -+{ -+ struct btree_path *sib; -+ -+ sib = prev_btree_path(trans, path); -+ if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b) -+ return sib; -+ -+ sib = next_btree_path(trans, path); -+ if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b) -+ return sib; -+ -+ return NULL; -+} -+ -+static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path) -+{ -+ __bch2_btree_path_unlock(trans, path); -+ btree_path_list_remove(trans, path); -+ trans->paths_allocated &= ~(1ULL << path->idx); -+} -+ -+void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent) -+{ -+ struct btree_path *dup; -+ -+ EBUG_ON(trans->paths + path->idx != path); -+ EBUG_ON(!path->ref); -+ -+ if (!__btree_path_put(path, intent)) -+ return; -+ -+ dup = path->preserve -+ ? have_path_at_pos(trans, path) -+ : have_node_at_pos(trans, path); -+ -+ if (!dup && !(!path->preserve && !is_btree_node(path, path->level))) -+ return; -+ -+ if (path->should_be_locked && -+ !trans->restarted && -+ (!dup || !bch2_btree_path_relock_norestart(trans, dup, _THIS_IP_))) -+ return; -+ -+ if (dup) { -+ dup->preserve |= path->preserve; -+ dup->should_be_locked |= path->should_be_locked; -+ } -+ -+ __bch2_path_free(trans, path); -+} -+ -+static void bch2_path_put_nokeep(struct btree_trans *trans, struct btree_path *path, -+ bool intent) -+{ -+ EBUG_ON(trans->paths + path->idx != path); -+ EBUG_ON(!path->ref); -+ -+ if (!__btree_path_put(path, intent)) -+ return; -+ -+ __bch2_path_free(trans, path); -+} -+ -+void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count) -+{ -+ panic("trans->restart_count %u, should be %u, last restarted by %pS\n", -+ trans->restart_count, restart_count, -+ (void *) trans->last_begin_ip); -+} -+ -+void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) -+{ -+ panic("in transaction restart: %s, last restarted by %pS\n", -+ bch2_err_str(trans->restarted), -+ (void *) trans->last_restarted_ip); -+} -+ -+noinline __cold -+void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) -+{ -+ struct btree_insert_entry *i; -+ struct btree_write_buffered_key *wb; -+ -+ prt_printf(buf, "transaction updates for %s journal seq %llu", -+ trans->fn, trans->journal_res.seq); -+ prt_newline(buf); -+ printbuf_indent_add(buf, 2); -+ -+ trans_for_each_update(trans, i) { -+ struct bkey_s_c old = { &i->old_k, i->old_v }; -+ -+ prt_printf(buf, "update: btree=%s cached=%u %pS", -+ bch2_btree_id_str(i->btree_id), -+ i->cached, -+ (void *) i->ip_allocated); -+ prt_newline(buf); -+ -+ prt_printf(buf, " old "); -+ bch2_bkey_val_to_text(buf, trans->c, old); -+ prt_newline(buf); -+ -+ prt_printf(buf, " new "); -+ bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k)); -+ prt_newline(buf); -+ } -+ -+ trans_for_each_wb_update(trans, wb) { -+ prt_printf(buf, "update: btree=%s wb=1 %pS", -+ bch2_btree_id_str(wb->btree), -+ (void *) i->ip_allocated); -+ prt_newline(buf); -+ -+ prt_printf(buf, " new "); -+ bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(&wb->k)); -+ prt_newline(buf); -+ } -+ -+ printbuf_indent_sub(buf, 2); -+} -+ -+noinline __cold -+void bch2_dump_trans_updates(struct btree_trans *trans) -+{ -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_trans_updates_to_text(&buf, trans); -+ bch2_print_string_as_lines(KERN_ERR, buf.buf); -+ printbuf_exit(&buf); -+} -+ -+noinline __cold -+void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path) -+{ -+ prt_printf(out, "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos ", -+ path->idx, path->ref, path->intent_ref, -+ path->preserve ? 'P' : ' ', -+ path->should_be_locked ? 'S' : ' ', -+ bch2_btree_id_str(path->btree_id), -+ path->level); -+ bch2_bpos_to_text(out, path->pos); -+ -+ prt_printf(out, " locks %u", path->nodes_locked); -+#ifdef TRACK_PATH_ALLOCATED -+ prt_printf(out, " %pS", (void *) path->ip_allocated); -+#endif -+ prt_newline(out); -+} -+ -+static noinline __cold -+void __bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans, -+ bool nosort) -+{ -+ struct btree_path *path; -+ unsigned idx; -+ -+ if (!nosort) -+ btree_trans_sort_paths(trans); -+ -+ trans_for_each_path_inorder(trans, path, idx) -+ bch2_btree_path_to_text(out, path); -+} -+ -+noinline __cold -+void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans) -+{ -+ __bch2_trans_paths_to_text(out, trans, false); -+} -+ -+static noinline __cold -+void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort) -+{ -+ struct printbuf buf = PRINTBUF; -+ -+ __bch2_trans_paths_to_text(&buf, trans, nosort); -+ bch2_trans_updates_to_text(&buf, trans); -+ -+ bch2_print_string_as_lines(KERN_ERR, buf.buf); -+ printbuf_exit(&buf); -+} -+ -+noinline __cold -+void bch2_dump_trans_paths_updates(struct btree_trans *trans) -+{ -+ __bch2_dump_trans_paths_updates(trans, false); -+} -+ -+noinline __cold -+static void bch2_trans_update_max_paths(struct btree_trans *trans) -+{ -+ struct btree_transaction_stats *s = btree_trans_stats(trans); -+ struct printbuf buf = PRINTBUF; -+ -+ if (!s) -+ return; -+ -+ bch2_trans_paths_to_text(&buf, trans); -+ -+ if (!buf.allocation_failure) { -+ mutex_lock(&s->lock); -+ if (s->nr_max_paths < hweight64(trans->paths_allocated)) { -+ s->nr_max_paths = trans->nr_max_paths = -+ hweight64(trans->paths_allocated); -+ swap(s->max_paths_text, buf.buf); -+ } -+ mutex_unlock(&s->lock); -+ } -+ -+ printbuf_exit(&buf); -+ -+ trans->nr_max_paths = hweight64(trans->paths_allocated); -+} -+ -+static noinline void btree_path_overflow(struct btree_trans *trans) -+{ -+ bch2_dump_trans_paths_updates(trans); -+ panic("trans path overflow\n"); -+} -+ -+static inline struct btree_path *btree_path_alloc(struct btree_trans *trans, -+ struct btree_path *pos) -+{ -+ struct btree_path *path; -+ unsigned idx; -+ -+ if (unlikely(trans->paths_allocated == -+ ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) -+ btree_path_overflow(trans); -+ -+ idx = __ffs64(~trans->paths_allocated); -+ -+ /* -+ * Do this before marking the new path as allocated, since it won't be -+ * initialized yet: -+ */ -+ if (unlikely(idx > trans->nr_max_paths)) -+ bch2_trans_update_max_paths(trans); -+ -+ trans->paths_allocated |= 1ULL << idx; -+ -+ path = &trans->paths[idx]; -+ path->idx = idx; -+ path->ref = 0; -+ path->intent_ref = 0; -+ path->nodes_locked = 0; -+ path->alloc_seq++; -+ -+ btree_path_list_add(trans, pos, path); -+ trans->paths_sorted = false; -+ return path; -+} -+ -+struct btree_path *bch2_path_get(struct btree_trans *trans, -+ enum btree_id btree_id, struct bpos pos, -+ unsigned locks_want, unsigned level, -+ unsigned flags, unsigned long ip) -+{ -+ struct btree_path *path, *path_pos = NULL; -+ bool cached = flags & BTREE_ITER_CACHED; -+ bool intent = flags & BTREE_ITER_INTENT; -+ int i; -+ -+ bch2_trans_verify_not_in_restart(trans); -+ bch2_trans_verify_locks(trans); -+ -+ btree_trans_sort_paths(trans); -+ -+ trans_for_each_path_inorder(trans, path, i) { -+ if (__btree_path_cmp(path, -+ btree_id, -+ cached, -+ pos, -+ level) > 0) -+ break; -+ -+ path_pos = path; -+ } -+ -+ if (path_pos && -+ path_pos->cached == cached && -+ path_pos->btree_id == btree_id && -+ path_pos->level == level) { -+ __btree_path_get(path_pos, intent); -+ path = bch2_btree_path_set_pos(trans, path_pos, pos, intent, ip); -+ } else { -+ path = btree_path_alloc(trans, path_pos); -+ path_pos = NULL; -+ -+ __btree_path_get(path, intent); -+ path->pos = pos; -+ path->btree_id = btree_id; -+ path->cached = cached; -+ path->uptodate = BTREE_ITER_NEED_TRAVERSE; -+ path->should_be_locked = false; -+ path->level = level; -+ path->locks_want = locks_want; -+ path->nodes_locked = 0; -+ for (i = 0; i < ARRAY_SIZE(path->l); i++) -+ path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_init); -+#ifdef TRACK_PATH_ALLOCATED -+ path->ip_allocated = ip; -+#endif -+ trans->paths_sorted = false; -+ } -+ -+ if (!(flags & BTREE_ITER_NOPRESERVE)) -+ path->preserve = true; -+ -+ if (path->intent_ref) -+ locks_want = max(locks_want, level + 1); -+ -+ /* -+ * If the path has locks_want greater than requested, we don't downgrade -+ * it here - on transaction restart because btree node split needs to -+ * upgrade locks, we might be putting/getting the iterator again. -+ * Downgrading iterators only happens via bch2_trans_downgrade(), after -+ * a successful transaction commit. -+ */ -+ -+ locks_want = min(locks_want, BTREE_MAX_DEPTH); -+ if (locks_want > path->locks_want) -+ bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL); -+ -+ return path; -+} -+ -+struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u) -+{ -+ -+ struct btree_path_level *l = path_l(path); -+ struct bkey_packed *_k; -+ struct bkey_s_c k; -+ -+ if (unlikely(!l->b)) -+ return bkey_s_c_null; -+ -+ EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE); -+ EBUG_ON(!btree_node_locked(path, path->level)); -+ -+ if (!path->cached) { -+ _k = bch2_btree_node_iter_peek_all(&l->iter, l->b); -+ k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null; -+ -+ EBUG_ON(k.k && bkey_deleted(k.k) && bpos_eq(k.k->p, path->pos)); -+ -+ if (!k.k || !bpos_eq(path->pos, k.k->p)) -+ goto hole; -+ } else { -+ struct bkey_cached *ck = (void *) path->l[0].b; -+ -+ EBUG_ON(ck && -+ (path->btree_id != ck->key.btree_id || -+ !bkey_eq(path->pos, ck->key.pos))); -+ if (!ck || !ck->valid) -+ return bkey_s_c_null; -+ -+ *u = ck->k->k; -+ k = bkey_i_to_s_c(ck->k); -+ } -+ -+ return k; -+hole: -+ bkey_init(u); -+ u->p = path->pos; -+ return (struct bkey_s_c) { u, NULL }; -+} -+ -+/* Btree iterators: */ -+ -+int __must_check -+__bch2_btree_iter_traverse(struct btree_iter *iter) -+{ -+ return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); -+} -+ -+int __must_check -+bch2_btree_iter_traverse(struct btree_iter *iter) -+{ -+ int ret; -+ -+ iter->path = bch2_btree_path_set_pos(iter->trans, iter->path, -+ btree_iter_search_key(iter), -+ iter->flags & BTREE_ITER_INTENT, -+ btree_iter_ip_allocated(iter)); -+ -+ ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); -+ if (ret) -+ return ret; -+ -+ btree_path_set_should_be_locked(iter->path); -+ return 0; -+} -+ -+/* Iterate across nodes (leaf and interior nodes) */ -+ -+struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) -+{ -+ struct btree_trans *trans = iter->trans; -+ struct btree *b = NULL; -+ int ret; -+ -+ EBUG_ON(iter->path->cached); -+ bch2_btree_iter_verify(iter); -+ -+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); -+ if (ret) -+ goto err; -+ -+ b = btree_path_node(iter->path, iter->path->level); -+ if (!b) -+ goto out; -+ -+ BUG_ON(bpos_lt(b->key.k.p, iter->pos)); -+ -+ bkey_init(&iter->k); -+ iter->k.p = iter->pos = b->key.k.p; -+ -+ iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p, -+ iter->flags & BTREE_ITER_INTENT, -+ btree_iter_ip_allocated(iter)); -+ btree_path_set_should_be_locked(iter->path); -+out: -+ bch2_btree_iter_verify_entry_exit(iter); -+ bch2_btree_iter_verify(iter); -+ -+ return b; -+err: -+ b = ERR_PTR(ret); -+ goto out; -+} -+ -+struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter) -+{ -+ struct btree *b; -+ -+ while (b = bch2_btree_iter_peek_node(iter), -+ bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart)) -+ bch2_trans_begin(iter->trans); -+ -+ return b; -+} -+ -+struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) -+{ -+ struct btree_trans *trans = iter->trans; -+ struct btree_path *path = iter->path; -+ struct btree *b = NULL; -+ int ret; -+ -+ bch2_trans_verify_not_in_restart(trans); -+ EBUG_ON(iter->path->cached); -+ bch2_btree_iter_verify(iter); -+ -+ /* already at end? */ -+ if (!btree_path_node(path, path->level)) -+ return NULL; -+ -+ /* got to end? */ -+ if (!btree_path_node(path, path->level + 1)) { -+ btree_path_set_level_up(trans, path); -+ return NULL; -+ } -+ -+ if (!bch2_btree_node_relock(trans, path, path->level + 1)) { -+ __bch2_btree_path_unlock(trans, path); -+ path->l[path->level].b = ERR_PTR(-BCH_ERR_no_btree_node_relock); -+ path->l[path->level + 1].b = ERR_PTR(-BCH_ERR_no_btree_node_relock); -+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); -+ trace_and_count(trans->c, trans_restart_relock_next_node, trans, _THIS_IP_, path); -+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_relock); -+ goto err; -+ } -+ -+ b = btree_path_node(path, path->level + 1); -+ -+ if (bpos_eq(iter->pos, b->key.k.p)) { -+ __btree_path_set_level_up(trans, path, path->level++); -+ } else { -+ /* -+ * Haven't gotten to the end of the parent node: go back down to -+ * the next child node -+ */ -+ path = iter->path = -+ bch2_btree_path_set_pos(trans, path, bpos_successor(iter->pos), -+ iter->flags & BTREE_ITER_INTENT, -+ btree_iter_ip_allocated(iter)); -+ -+ btree_path_set_level_down(trans, path, iter->min_depth); -+ -+ ret = bch2_btree_path_traverse(trans, path, iter->flags); -+ if (ret) -+ goto err; -+ -+ b = path->l[path->level].b; -+ } -+ -+ bkey_init(&iter->k); -+ iter->k.p = iter->pos = b->key.k.p; -+ -+ iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p, -+ iter->flags & BTREE_ITER_INTENT, -+ btree_iter_ip_allocated(iter)); -+ btree_path_set_should_be_locked(iter->path); -+ BUG_ON(iter->path->uptodate); -+out: -+ bch2_btree_iter_verify_entry_exit(iter); -+ bch2_btree_iter_verify(iter); -+ -+ return b; -+err: -+ b = ERR_PTR(ret); -+ goto out; -+} -+ -+/* Iterate across keys (in leaf nodes only) */ -+ -+inline bool bch2_btree_iter_advance(struct btree_iter *iter) -+{ -+ if (likely(!(iter->flags & BTREE_ITER_ALL_LEVELS))) { -+ struct bpos pos = iter->k.p; -+ bool ret = !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS -+ ? bpos_eq(pos, SPOS_MAX) -+ : bkey_eq(pos, SPOS_MAX)); -+ -+ if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) -+ pos = bkey_successor(iter, pos); -+ bch2_btree_iter_set_pos(iter, pos); -+ return ret; -+ } else { -+ if (!btree_path_node(iter->path, iter->path->level)) -+ return true; -+ -+ iter->advanced = true; -+ return false; -+ } -+} -+ -+inline bool bch2_btree_iter_rewind(struct btree_iter *iter) -+{ -+ struct bpos pos = bkey_start_pos(&iter->k); -+ bool ret = !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS -+ ? bpos_eq(pos, POS_MIN) -+ : bkey_eq(pos, POS_MIN)); -+ -+ if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) -+ pos = bkey_predecessor(iter, pos); -+ bch2_btree_iter_set_pos(iter, pos); -+ return ret; -+} -+ -+static noinline -+struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter) -+{ -+ struct btree_insert_entry *i; -+ struct bkey_i *ret = NULL; -+ -+ trans_for_each_update(iter->trans, i) { -+ if (i->btree_id < iter->btree_id) -+ continue; -+ if (i->btree_id > iter->btree_id) -+ break; -+ if (bpos_lt(i->k->k.p, iter->path->pos)) -+ continue; -+ if (i->key_cache_already_flushed) -+ continue; -+ if (!ret || bpos_lt(i->k->k.p, ret->k.p)) -+ ret = i->k; -+ } -+ -+ return ret; -+} -+ -+static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter) -+{ -+ return iter->flags & BTREE_ITER_WITH_UPDATES -+ ? __bch2_btree_trans_peek_updates(iter) -+ : NULL; -+} -+ -+static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bpos end_pos) -+{ -+ struct bkey_i *k; -+ -+ if (bpos_lt(iter->path->pos, iter->journal_pos)) -+ iter->journal_idx = 0; -+ -+ k = bch2_journal_keys_peek_upto(trans->c, iter->btree_id, -+ iter->path->level, -+ iter->path->pos, -+ end_pos, -+ &iter->journal_idx); -+ -+ iter->journal_pos = k ? k->k.p : end_pos; -+ return k; -+} -+ -+static noinline -+struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans, -+ struct btree_iter *iter) -+{ -+ struct bkey_i *k = bch2_btree_journal_peek(trans, iter, iter->path->pos); -+ -+ if (k) { -+ iter->k = k->k; -+ return bkey_i_to_s_c(k); -+ } else { -+ return bkey_s_c_null; -+ } -+} -+ -+static noinline -+struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bkey_i *next_journal = -+ bch2_btree_journal_peek(trans, iter, -+ k.k ? k.k->p : path_l(iter->path)->b->key.k.p); -+ -+ if (next_journal) { -+ iter->k = next_journal->k; -+ k = bkey_i_to_s_c(next_journal); -+ } -+ -+ return k; -+} -+ -+/* -+ * Checks btree key cache for key at iter->pos and returns it if present, or -+ * bkey_s_c_null: -+ */ -+static noinline -+struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos) -+{ -+ struct btree_trans *trans = iter->trans; -+ struct bch_fs *c = trans->c; -+ struct bkey u; -+ struct bkey_s_c k; -+ int ret; -+ -+ if ((iter->flags & BTREE_ITER_KEY_CACHE_FILL) && -+ bpos_eq(iter->pos, pos)) -+ return bkey_s_c_null; -+ -+ if (!bch2_btree_key_cache_find(c, iter->btree_id, pos)) -+ return bkey_s_c_null; -+ -+ if (!iter->key_cache_path) -+ iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos, -+ iter->flags & BTREE_ITER_INTENT, 0, -+ iter->flags|BTREE_ITER_CACHED| -+ BTREE_ITER_CACHED_NOFILL, -+ _THIS_IP_); -+ -+ iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos, -+ iter->flags & BTREE_ITER_INTENT, -+ btree_iter_ip_allocated(iter)); -+ -+ ret = bch2_btree_path_traverse(trans, iter->key_cache_path, -+ iter->flags|BTREE_ITER_CACHED) ?: -+ bch2_btree_path_relock(trans, iter->path, _THIS_IP_); -+ if (unlikely(ret)) -+ return bkey_s_c_err(ret); -+ -+ btree_path_set_should_be_locked(iter->key_cache_path); -+ -+ k = bch2_btree_path_peek_slot(iter->key_cache_path, &u); -+ if (k.k && !bkey_err(k)) { -+ iter->k = u; -+ k.k = &iter->k; -+ } -+ return k; -+} -+ -+static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key) -+{ -+ struct btree_trans *trans = iter->trans; -+ struct bkey_i *next_update; -+ struct bkey_s_c k, k2; -+ int ret; -+ -+ EBUG_ON(iter->path->cached); -+ bch2_btree_iter_verify(iter); -+ -+ while (1) { -+ struct btree_path_level *l; -+ -+ iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, -+ iter->flags & BTREE_ITER_INTENT, -+ btree_iter_ip_allocated(iter)); -+ -+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); -+ if (unlikely(ret)) { -+ /* ensure that iter->k is consistent with iter->pos: */ -+ bch2_btree_iter_set_pos(iter, iter->pos); -+ k = bkey_s_c_err(ret); -+ goto out; -+ } -+ -+ l = path_l(iter->path); -+ -+ if (unlikely(!l->b)) { -+ /* No btree nodes at requested level: */ -+ bch2_btree_iter_set_pos(iter, SPOS_MAX); -+ k = bkey_s_c_null; -+ goto out; -+ } -+ -+ btree_path_set_should_be_locked(iter->path); -+ -+ k = btree_path_level_peek_all(trans->c, l, &iter->k); -+ -+ if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) && -+ k.k && -+ (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { -+ k = k2; -+ ret = bkey_err(k); -+ if (ret) { -+ bch2_btree_iter_set_pos(iter, iter->pos); -+ goto out; -+ } -+ } -+ -+ if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL)) -+ k = btree_trans_peek_journal(trans, iter, k); -+ -+ next_update = btree_trans_peek_updates(iter); -+ -+ if (next_update && -+ bpos_le(next_update->k.p, -+ k.k ? k.k->p : l->b->key.k.p)) { -+ iter->k = next_update->k; -+ k = bkey_i_to_s_c(next_update); -+ } -+ -+ if (k.k && bkey_deleted(k.k)) { -+ /* -+ * If we've got a whiteout, and it's after the search -+ * key, advance the search key to the whiteout instead -+ * of just after the whiteout - it might be a btree -+ * whiteout, with a real key at the same position, since -+ * in the btree deleted keys sort before non deleted. -+ */ -+ search_key = !bpos_eq(search_key, k.k->p) -+ ? k.k->p -+ : bpos_successor(k.k->p); -+ continue; -+ } -+ -+ if (likely(k.k)) { -+ break; -+ } else if (likely(!bpos_eq(l->b->key.k.p, SPOS_MAX))) { -+ /* Advance to next leaf node: */ -+ search_key = bpos_successor(l->b->key.k.p); -+ } else { -+ /* End of btree: */ -+ bch2_btree_iter_set_pos(iter, SPOS_MAX); -+ k = bkey_s_c_null; -+ goto out; -+ } -+ } -+out: -+ bch2_btree_iter_verify(iter); -+ -+ return k; -+} -+ -+/** -+ * bch2_btree_iter_peek_upto() - returns first key greater than or equal to -+ * iterator's current position -+ * @iter: iterator to peek from -+ * @end: search limit: returns keys less than or equal to @end -+ * -+ * Returns: key if found, or an error extractable with bkey_err(). -+ */ -+struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end) -+{ -+ struct btree_trans *trans = iter->trans; -+ struct bpos search_key = btree_iter_search_key(iter); -+ struct bkey_s_c k; -+ struct bpos iter_pos; -+ int ret; -+ -+ EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS); -+ EBUG_ON((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && bkey_eq(end, POS_MAX)); -+ -+ if (iter->update_path) { -+ bch2_path_put_nokeep(trans, iter->update_path, -+ iter->flags & BTREE_ITER_INTENT); -+ iter->update_path = NULL; -+ } -+ -+ bch2_btree_iter_verify_entry_exit(iter); -+ -+ while (1) { -+ k = __bch2_btree_iter_peek(iter, search_key); -+ if (unlikely(!k.k)) -+ goto end; -+ if (unlikely(bkey_err(k))) -+ goto out_no_locked; -+ -+ /* -+ * iter->pos should be mononotically increasing, and always be -+ * equal to the key we just returned - except extents can -+ * straddle iter->pos: -+ */ -+ if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) -+ iter_pos = k.k->p; -+ else -+ iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k)); -+ -+ if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS) -+ ? bkey_gt(iter_pos, end) -+ : bkey_ge(iter_pos, end))) -+ goto end; -+ -+ if (iter->update_path && -+ !bkey_eq(iter->update_path->pos, k.k->p)) { -+ bch2_path_put_nokeep(trans, iter->update_path, -+ iter->flags & BTREE_ITER_INTENT); -+ iter->update_path = NULL; -+ } -+ -+ if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && -+ (iter->flags & BTREE_ITER_INTENT) && -+ !(iter->flags & BTREE_ITER_IS_EXTENTS) && -+ !iter->update_path) { -+ struct bpos pos = k.k->p; -+ -+ if (pos.snapshot < iter->snapshot) { -+ search_key = bpos_successor(k.k->p); -+ continue; -+ } -+ -+ pos.snapshot = iter->snapshot; -+ -+ /* -+ * advance, same as on exit for iter->path, but only up -+ * to snapshot -+ */ -+ __btree_path_get(iter->path, iter->flags & BTREE_ITER_INTENT); -+ iter->update_path = iter->path; -+ -+ iter->update_path = bch2_btree_path_set_pos(trans, -+ iter->update_path, pos, -+ iter->flags & BTREE_ITER_INTENT, -+ _THIS_IP_); -+ ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags); -+ if (unlikely(ret)) { -+ k = bkey_s_c_err(ret); -+ goto out_no_locked; -+ } -+ } -+ -+ /* -+ * We can never have a key in a leaf node at POS_MAX, so -+ * we don't have to check these successor() calls: -+ */ -+ if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) && -+ !bch2_snapshot_is_ancestor(trans->c, -+ iter->snapshot, -+ k.k->p.snapshot)) { -+ search_key = bpos_successor(k.k->p); -+ continue; -+ } -+ -+ if (bkey_whiteout(k.k) && -+ !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) { -+ search_key = bkey_successor(iter, k.k->p); -+ continue; -+ } -+ -+ break; -+ } -+ -+ iter->pos = iter_pos; -+ -+ iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p, -+ iter->flags & BTREE_ITER_INTENT, -+ btree_iter_ip_allocated(iter)); -+ -+ btree_path_set_should_be_locked(iter->path); -+out_no_locked: -+ if (iter->update_path) { -+ ret = bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_); -+ if (unlikely(ret)) -+ k = bkey_s_c_err(ret); -+ else -+ btree_path_set_should_be_locked(iter->update_path); -+ } -+ -+ if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) -+ iter->pos.snapshot = iter->snapshot; -+ -+ ret = bch2_btree_iter_verify_ret(iter, k); -+ if (unlikely(ret)) { -+ bch2_btree_iter_set_pos(iter, iter->pos); -+ k = bkey_s_c_err(ret); -+ } -+ -+ bch2_btree_iter_verify_entry_exit(iter); -+ -+ return k; -+end: -+ bch2_btree_iter_set_pos(iter, end); -+ k = bkey_s_c_null; -+ goto out_no_locked; -+} -+ -+/** -+ * bch2_btree_iter_peek_all_levels() - returns the first key greater than or -+ * equal to iterator's current position, returning keys from every level of the -+ * btree. For keys at different levels of the btree that compare equal, the key -+ * from the lower level (leaf) is returned first. -+ * @iter: iterator to peek from -+ * -+ * Returns: key if found, or an error extractable with bkey_err(). -+ */ -+struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter) -+{ -+ struct btree_trans *trans = iter->trans; -+ struct bkey_s_c k; -+ int ret; -+ -+ EBUG_ON(iter->path->cached); -+ bch2_btree_iter_verify(iter); -+ BUG_ON(iter->path->level < iter->min_depth); -+ BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)); -+ EBUG_ON(!(iter->flags & BTREE_ITER_ALL_LEVELS)); -+ -+ while (1) { -+ iter->path = bch2_btree_path_set_pos(trans, iter->path, iter->pos, -+ iter->flags & BTREE_ITER_INTENT, -+ btree_iter_ip_allocated(iter)); -+ -+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); -+ if (unlikely(ret)) { -+ /* ensure that iter->k is consistent with iter->pos: */ -+ bch2_btree_iter_set_pos(iter, iter->pos); -+ k = bkey_s_c_err(ret); -+ goto out_no_locked; -+ } -+ -+ /* Already at end? */ -+ if (!btree_path_node(iter->path, iter->path->level)) { -+ k = bkey_s_c_null; -+ goto out_no_locked; -+ } -+ -+ k = btree_path_level_peek_all(trans->c, -+ &iter->path->l[iter->path->level], &iter->k); -+ -+ /* Check if we should go up to the parent node: */ -+ if (!k.k || -+ (iter->advanced && -+ bpos_eq(path_l(iter->path)->b->key.k.p, iter->pos))) { -+ iter->pos = path_l(iter->path)->b->key.k.p; -+ btree_path_set_level_up(trans, iter->path); -+ iter->advanced = false; -+ continue; -+ } -+ -+ /* -+ * Check if we should go back down to a leaf: -+ * If we're not in a leaf node, we only return the current key -+ * if it exactly matches iter->pos - otherwise we first have to -+ * go back to the leaf: -+ */ -+ if (iter->path->level != iter->min_depth && -+ (iter->advanced || -+ !k.k || -+ !bpos_eq(iter->pos, k.k->p))) { -+ btree_path_set_level_down(trans, iter->path, iter->min_depth); -+ iter->pos = bpos_successor(iter->pos); -+ iter->advanced = false; -+ continue; -+ } -+ -+ /* Check if we should go to the next key: */ -+ if (iter->path->level == iter->min_depth && -+ iter->advanced && -+ k.k && -+ bpos_eq(iter->pos, k.k->p)) { -+ iter->pos = bpos_successor(iter->pos); -+ iter->advanced = false; -+ continue; -+ } -+ -+ if (iter->advanced && -+ iter->path->level == iter->min_depth && -+ !bpos_eq(k.k->p, iter->pos)) -+ iter->advanced = false; -+ -+ BUG_ON(iter->advanced); -+ BUG_ON(!k.k); -+ break; -+ } -+ -+ iter->pos = k.k->p; -+ btree_path_set_should_be_locked(iter->path); -+out_no_locked: -+ bch2_btree_iter_verify(iter); -+ -+ return k; -+} -+ -+/** -+ * bch2_btree_iter_next() - returns first key greater than iterator's current -+ * position -+ * @iter: iterator to peek from -+ * -+ * Returns: key if found, or an error extractable with bkey_err(). -+ */ -+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) -+{ -+ if (!bch2_btree_iter_advance(iter)) -+ return bkey_s_c_null; -+ -+ return bch2_btree_iter_peek(iter); -+} -+ -+/** -+ * bch2_btree_iter_peek_prev() - returns first key less than or equal to -+ * iterator's current position -+ * @iter: iterator to peek from -+ * -+ * Returns: key if found, or an error extractable with bkey_err(). -+ */ -+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) -+{ -+ struct btree_trans *trans = iter->trans; -+ struct bpos search_key = iter->pos; -+ struct btree_path *saved_path = NULL; -+ struct bkey_s_c k; -+ struct bkey saved_k; -+ const struct bch_val *saved_v; -+ int ret; -+ -+ EBUG_ON(iter->path->cached || iter->path->level); -+ EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES); -+ -+ if (iter->flags & BTREE_ITER_WITH_JOURNAL) -+ return bkey_s_c_err(-EIO); -+ -+ bch2_btree_iter_verify(iter); -+ bch2_btree_iter_verify_entry_exit(iter); -+ -+ if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) -+ search_key.snapshot = U32_MAX; -+ -+ while (1) { -+ iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, -+ iter->flags & BTREE_ITER_INTENT, -+ btree_iter_ip_allocated(iter)); -+ -+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); -+ if (unlikely(ret)) { -+ /* ensure that iter->k is consistent with iter->pos: */ -+ bch2_btree_iter_set_pos(iter, iter->pos); -+ k = bkey_s_c_err(ret); -+ goto out_no_locked; -+ } -+ -+ k = btree_path_level_peek(trans, iter->path, -+ &iter->path->l[0], &iter->k); -+ if (!k.k || -+ ((iter->flags & BTREE_ITER_IS_EXTENTS) -+ ? bpos_ge(bkey_start_pos(k.k), search_key) -+ : bpos_gt(k.k->p, search_key))) -+ k = btree_path_level_prev(trans, iter->path, -+ &iter->path->l[0], &iter->k); -+ -+ if (likely(k.k)) { -+ if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) { -+ if (k.k->p.snapshot == iter->snapshot) -+ goto got_key; -+ -+ /* -+ * If we have a saved candidate, and we're no -+ * longer at the same _key_ (not pos), return -+ * that candidate -+ */ -+ if (saved_path && !bkey_eq(k.k->p, saved_k.p)) { -+ bch2_path_put_nokeep(trans, iter->path, -+ iter->flags & BTREE_ITER_INTENT); -+ iter->path = saved_path; -+ saved_path = NULL; -+ iter->k = saved_k; -+ k.v = saved_v; -+ goto got_key; -+ } -+ -+ if (bch2_snapshot_is_ancestor(iter->trans->c, -+ iter->snapshot, -+ k.k->p.snapshot)) { -+ if (saved_path) -+ bch2_path_put_nokeep(trans, saved_path, -+ iter->flags & BTREE_ITER_INTENT); -+ saved_path = btree_path_clone(trans, iter->path, -+ iter->flags & BTREE_ITER_INTENT); -+ saved_k = *k.k; -+ saved_v = k.v; -+ } -+ -+ search_key = bpos_predecessor(k.k->p); -+ continue; -+ } -+got_key: -+ if (bkey_whiteout(k.k) && -+ !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) { -+ search_key = bkey_predecessor(iter, k.k->p); -+ if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) -+ search_key.snapshot = U32_MAX; -+ continue; -+ } -+ -+ break; -+ } else if (likely(!bpos_eq(iter->path->l[0].b->data->min_key, POS_MIN))) { -+ /* Advance to previous leaf node: */ -+ search_key = bpos_predecessor(iter->path->l[0].b->data->min_key); -+ } else { -+ /* Start of btree: */ -+ bch2_btree_iter_set_pos(iter, POS_MIN); -+ k = bkey_s_c_null; -+ goto out_no_locked; -+ } -+ } -+ -+ EBUG_ON(bkey_gt(bkey_start_pos(k.k), iter->pos)); -+ -+ /* Extents can straddle iter->pos: */ -+ if (bkey_lt(k.k->p, iter->pos)) -+ iter->pos = k.k->p; -+ -+ if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) -+ iter->pos.snapshot = iter->snapshot; -+ -+ btree_path_set_should_be_locked(iter->path); -+out_no_locked: -+ if (saved_path) -+ bch2_path_put_nokeep(trans, saved_path, iter->flags & BTREE_ITER_INTENT); -+ -+ bch2_btree_iter_verify_entry_exit(iter); -+ bch2_btree_iter_verify(iter); -+ -+ return k; -+} -+ -+/** -+ * bch2_btree_iter_prev() - returns first key less than iterator's current -+ * position -+ * @iter: iterator to peek from -+ * -+ * Returns: key if found, or an error extractable with bkey_err(). -+ */ -+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) -+{ -+ if (!bch2_btree_iter_rewind(iter)) -+ return bkey_s_c_null; -+ -+ return bch2_btree_iter_peek_prev(iter); -+} -+ -+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) -+{ -+ struct btree_trans *trans = iter->trans; -+ struct bpos search_key; -+ struct bkey_s_c k; -+ int ret; -+ -+ bch2_btree_iter_verify(iter); -+ bch2_btree_iter_verify_entry_exit(iter); -+ EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS); -+ EBUG_ON(iter->path->level && (iter->flags & BTREE_ITER_WITH_KEY_CACHE)); -+ -+ /* extents can't span inode numbers: */ -+ if ((iter->flags & BTREE_ITER_IS_EXTENTS) && -+ unlikely(iter->pos.offset == KEY_OFFSET_MAX)) { -+ if (iter->pos.inode == KEY_INODE_MAX) -+ return bkey_s_c_null; -+ -+ bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos)); -+ } -+ -+ search_key = btree_iter_search_key(iter); -+ iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, -+ iter->flags & BTREE_ITER_INTENT, -+ btree_iter_ip_allocated(iter)); -+ -+ ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); -+ if (unlikely(ret)) { -+ k = bkey_s_c_err(ret); -+ goto out_no_locked; -+ } -+ -+ if ((iter->flags & BTREE_ITER_CACHED) || -+ !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) { -+ struct bkey_i *next_update; -+ -+ if ((next_update = btree_trans_peek_updates(iter)) && -+ bpos_eq(next_update->k.p, iter->pos)) { -+ iter->k = next_update->k; -+ k = bkey_i_to_s_c(next_update); -+ goto out; -+ } -+ -+ if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) && -+ (k = btree_trans_peek_slot_journal(trans, iter)).k) -+ goto out; -+ -+ if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) && -+ (k = btree_trans_peek_key_cache(iter, iter->pos)).k) { -+ if (!bkey_err(k)) -+ iter->k = *k.k; -+ /* We're not returning a key from iter->path: */ -+ goto out_no_locked; -+ } -+ -+ k = bch2_btree_path_peek_slot(iter->path, &iter->k); -+ if (unlikely(!k.k)) -+ goto out_no_locked; -+ } else { -+ struct bpos next; -+ struct bpos end = iter->pos; -+ -+ if (iter->flags & BTREE_ITER_IS_EXTENTS) -+ end.offset = U64_MAX; -+ -+ EBUG_ON(iter->path->level); -+ -+ if (iter->flags & BTREE_ITER_INTENT) { -+ struct btree_iter iter2; -+ -+ bch2_trans_copy_iter(&iter2, iter); -+ k = bch2_btree_iter_peek_upto(&iter2, end); -+ -+ if (k.k && !bkey_err(k)) { -+ iter->k = iter2.k; -+ k.k = &iter->k; -+ } -+ bch2_trans_iter_exit(trans, &iter2); -+ } else { -+ struct bpos pos = iter->pos; -+ -+ k = bch2_btree_iter_peek_upto(iter, end); -+ if (unlikely(bkey_err(k))) -+ bch2_btree_iter_set_pos(iter, pos); -+ else -+ iter->pos = pos; -+ } -+ -+ if (unlikely(bkey_err(k))) -+ goto out_no_locked; -+ -+ next = k.k ? bkey_start_pos(k.k) : POS_MAX; -+ -+ if (bkey_lt(iter->pos, next)) { -+ bkey_init(&iter->k); -+ iter->k.p = iter->pos; -+ -+ if (iter->flags & BTREE_ITER_IS_EXTENTS) { -+ bch2_key_resize(&iter->k, -+ min_t(u64, KEY_SIZE_MAX, -+ (next.inode == iter->pos.inode -+ ? next.offset -+ : KEY_OFFSET_MAX) - -+ iter->pos.offset)); -+ EBUG_ON(!iter->k.size); -+ } -+ -+ k = (struct bkey_s_c) { &iter->k, NULL }; -+ } -+ } -+out: -+ btree_path_set_should_be_locked(iter->path); -+out_no_locked: -+ bch2_btree_iter_verify_entry_exit(iter); -+ bch2_btree_iter_verify(iter); -+ ret = bch2_btree_iter_verify_ret(iter, k); -+ if (unlikely(ret)) -+ return bkey_s_c_err(ret); -+ -+ return k; -+} -+ -+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) -+{ -+ if (!bch2_btree_iter_advance(iter)) -+ return bkey_s_c_null; -+ -+ return bch2_btree_iter_peek_slot(iter); -+} -+ -+struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter) -+{ -+ if (!bch2_btree_iter_rewind(iter)) -+ return bkey_s_c_null; -+ -+ return bch2_btree_iter_peek_slot(iter); -+} -+ -+struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter) -+{ -+ struct bkey_s_c k; -+ -+ while (btree_trans_too_many_iters(iter->trans) || -+ (k = bch2_btree_iter_peek_type(iter, iter->flags), -+ bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart))) -+ bch2_trans_begin(iter->trans); -+ -+ return k; -+} -+ -+/* new transactional stuff: */ -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+static void btree_trans_verify_sorted_refs(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ unsigned i; -+ -+ BUG_ON(trans->nr_sorted != hweight64(trans->paths_allocated)); -+ -+ trans_for_each_path(trans, path) { -+ BUG_ON(path->sorted_idx >= trans->nr_sorted); -+ BUG_ON(trans->sorted[path->sorted_idx] != path->idx); -+ } -+ -+ for (i = 0; i < trans->nr_sorted; i++) { -+ unsigned idx = trans->sorted[i]; -+ -+ EBUG_ON(!(trans->paths_allocated & (1ULL << idx))); -+ BUG_ON(trans->paths[idx].sorted_idx != i); -+ } -+} -+ -+static void btree_trans_verify_sorted(struct btree_trans *trans) -+{ -+ struct btree_path *path, *prev = NULL; -+ unsigned i; -+ -+ if (!bch2_debug_check_iterators) -+ return; -+ -+ trans_for_each_path_inorder(trans, path, i) { -+ if (prev && btree_path_cmp(prev, path) > 0) { -+ __bch2_dump_trans_paths_updates(trans, true); -+ panic("trans paths out of order!\n"); -+ } -+ prev = path; -+ } -+} -+#else -+static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) {} -+static inline void btree_trans_verify_sorted(struct btree_trans *trans) {} -+#endif -+ -+void __bch2_btree_trans_sort_paths(struct btree_trans *trans) -+{ -+ int i, l = 0, r = trans->nr_sorted, inc = 1; -+ bool swapped; -+ -+ btree_trans_verify_sorted_refs(trans); -+ -+ if (trans->paths_sorted) -+ goto out; -+ -+ /* -+ * Cocktail shaker sort: this is efficient because iterators will be -+ * mostly sorted. -+ */ -+ do { -+ swapped = false; -+ -+ for (i = inc > 0 ? l : r - 2; -+ i + 1 < r && i >= l; -+ i += inc) { -+ if (btree_path_cmp(trans->paths + trans->sorted[i], -+ trans->paths + trans->sorted[i + 1]) > 0) { -+ swap(trans->sorted[i], trans->sorted[i + 1]); -+ trans->paths[trans->sorted[i]].sorted_idx = i; -+ trans->paths[trans->sorted[i + 1]].sorted_idx = i + 1; -+ swapped = true; -+ } -+ } -+ -+ if (inc > 0) -+ --r; -+ else -+ l++; -+ inc = -inc; -+ } while (swapped); -+ -+ trans->paths_sorted = true; -+out: -+ btree_trans_verify_sorted(trans); -+} -+ -+static inline void btree_path_list_remove(struct btree_trans *trans, -+ struct btree_path *path) -+{ -+ unsigned i; -+ -+ EBUG_ON(path->sorted_idx >= trans->nr_sorted); -+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -+ trans->nr_sorted--; -+ memmove_u64s_down_small(trans->sorted + path->sorted_idx, -+ trans->sorted + path->sorted_idx + 1, -+ DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); -+#else -+ array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx); -+#endif -+ for (i = path->sorted_idx; i < trans->nr_sorted; i++) -+ trans->paths[trans->sorted[i]].sorted_idx = i; -+ -+ path->sorted_idx = U8_MAX; -+} -+ -+static inline void btree_path_list_add(struct btree_trans *trans, -+ struct btree_path *pos, -+ struct btree_path *path) -+{ -+ unsigned i; -+ -+ path->sorted_idx = pos ? pos->sorted_idx + 1 : trans->nr_sorted; -+ -+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -+ memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1, -+ trans->sorted + path->sorted_idx, -+ DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); -+ trans->nr_sorted++; -+ trans->sorted[path->sorted_idx] = path->idx; -+#else -+ array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx); -+#endif -+ -+ for (i = path->sorted_idx; i < trans->nr_sorted; i++) -+ trans->paths[trans->sorted[i]].sorted_idx = i; -+ -+ btree_trans_verify_sorted_refs(trans); -+} -+ -+void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter) -+{ -+ if (iter->update_path) -+ bch2_path_put_nokeep(trans, iter->update_path, -+ iter->flags & BTREE_ITER_INTENT); -+ if (iter->path) -+ bch2_path_put(trans, iter->path, -+ iter->flags & BTREE_ITER_INTENT); -+ if (iter->key_cache_path) -+ bch2_path_put(trans, iter->key_cache_path, -+ iter->flags & BTREE_ITER_INTENT); -+ iter->path = NULL; -+ iter->update_path = NULL; -+ iter->key_cache_path = NULL; -+} -+ -+void bch2_trans_iter_init_outlined(struct btree_trans *trans, -+ struct btree_iter *iter, -+ enum btree_id btree_id, struct bpos pos, -+ unsigned flags) -+{ -+ bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0, -+ bch2_btree_iter_flags(trans, btree_id, flags), -+ _RET_IP_); -+} -+ -+void bch2_trans_node_iter_init(struct btree_trans *trans, -+ struct btree_iter *iter, -+ enum btree_id btree_id, -+ struct bpos pos, -+ unsigned locks_want, -+ unsigned depth, -+ unsigned flags) -+{ -+ flags |= BTREE_ITER_NOT_EXTENTS; -+ flags |= __BTREE_ITER_ALL_SNAPSHOTS; -+ flags |= BTREE_ITER_ALL_SNAPSHOTS; -+ -+ bch2_trans_iter_init_common(trans, iter, btree_id, pos, locks_want, depth, -+ __bch2_btree_iter_flags(trans, btree_id, flags), -+ _RET_IP_); -+ -+ iter->min_depth = depth; -+ -+ BUG_ON(iter->path->locks_want < min(locks_want, BTREE_MAX_DEPTH)); -+ BUG_ON(iter->path->level != depth); -+ BUG_ON(iter->min_depth != depth); -+} -+ -+void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src) -+{ -+ *dst = *src; -+ if (src->path) -+ __btree_path_get(src->path, src->flags & BTREE_ITER_INTENT); -+ if (src->update_path) -+ __btree_path_get(src->update_path, src->flags & BTREE_ITER_INTENT); -+ dst->key_cache_path = NULL; -+} -+ -+void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) -+{ -+ unsigned new_top = trans->mem_top + size; -+ size_t old_bytes = trans->mem_bytes; -+ size_t new_bytes = roundup_pow_of_two(new_top); -+ int ret; -+ void *new_mem; -+ void *p; -+ -+ trans->mem_max = max(trans->mem_max, new_top); -+ -+ WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX); -+ -+ new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN); -+ if (unlikely(!new_mem)) { -+ bch2_trans_unlock(trans); -+ -+ new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL); -+ if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) { -+ new_mem = mempool_alloc(&trans->c->btree_trans_mem_pool, GFP_KERNEL); -+ new_bytes = BTREE_TRANS_MEM_MAX; -+ kfree(trans->mem); -+ } -+ -+ if (!new_mem) -+ return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc); -+ -+ trans->mem = new_mem; -+ trans->mem_bytes = new_bytes; -+ -+ ret = bch2_trans_relock(trans); -+ if (ret) -+ return ERR_PTR(ret); -+ } -+ -+ trans->mem = new_mem; -+ trans->mem_bytes = new_bytes; -+ -+ if (old_bytes) { -+ trace_and_count(trans->c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); -+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced)); -+ } -+ -+ p = trans->mem + trans->mem_top; -+ trans->mem_top += size; -+ memset(p, 0, size); -+ return p; -+} -+ -+static inline void check_srcu_held_too_long(struct btree_trans *trans) -+{ -+ WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10), -+ "btree trans held srcu lock (delaying memory reclaim) for %lu seconds", -+ (jiffies - trans->srcu_lock_time) / HZ); -+} -+ -+void bch2_trans_srcu_unlock(struct btree_trans *trans) -+{ -+ if (trans->srcu_held) { -+ struct bch_fs *c = trans->c; -+ struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ if (path->cached && !btree_node_locked(path, 0)) -+ path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset); -+ -+ check_srcu_held_too_long(trans); -+ srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); -+ trans->srcu_held = false; -+ } -+} -+ -+void bch2_trans_srcu_lock(struct btree_trans *trans) -+{ -+ if (!trans->srcu_held) { -+ trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier); -+ trans->srcu_lock_time = jiffies; -+ trans->srcu_held = true; -+ } -+} -+ -+/** -+ * bch2_trans_begin() - reset a transaction after a interrupted attempt -+ * @trans: transaction to reset -+ * -+ * Returns: current restart counter, to be used with trans_was_restarted() -+ * -+ * While iterating over nodes or updating nodes a attempt to lock a btree node -+ * may return BCH_ERR_transaction_restart when the trylock fails. When this -+ * occurs bch2_trans_begin() should be called and the transaction retried. -+ */ -+u32 bch2_trans_begin(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ u64 now; -+ -+ bch2_trans_reset_updates(trans); -+ -+ trans->restart_count++; -+ trans->mem_top = 0; -+ -+ trans_for_each_path(trans, path) { -+ path->should_be_locked = false; -+ -+ /* -+ * If the transaction wasn't restarted, we're presuming to be -+ * doing something new: dont keep iterators excpt the ones that -+ * are in use - except for the subvolumes btree: -+ */ -+ if (!trans->restarted && path->btree_id != BTREE_ID_subvolumes) -+ path->preserve = false; -+ -+ /* -+ * XXX: we probably shouldn't be doing this if the transaction -+ * was restarted, but currently we still overflow transaction -+ * iterators if we do that -+ */ -+ if (!path->ref && !path->preserve) -+ __bch2_path_free(trans, path); -+ else -+ path->preserve = false; -+ } -+ -+ now = local_clock(); -+ if (!trans->restarted && -+ (need_resched() || -+ now - trans->last_begin_time > BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS)) { -+ drop_locks_do(trans, (cond_resched(), 0)); -+ now = local_clock(); -+ } -+ trans->last_begin_time = now; -+ -+ if (unlikely(trans->srcu_held && -+ time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10)))) -+ bch2_trans_srcu_unlock(trans); -+ -+ trans->last_begin_ip = _RET_IP_; -+ if (trans->restarted) { -+ bch2_btree_path_traverse_all(trans); -+ trans->notrace_relock_fail = false; -+ } -+ -+ return trans->restart_count; -+} -+ -+static struct btree_trans *bch2_trans_alloc(struct bch_fs *c) -+{ -+ struct btree_trans *trans; -+ -+ if (IS_ENABLED(__KERNEL__)) { -+ trans = this_cpu_xchg(c->btree_trans_bufs->trans, NULL); -+ if (trans) -+ return trans; -+ } -+ -+ trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS); -+ /* -+ * paths need to be zeroed, bch2_check_for_deadlock looks at -+ * paths in other threads -+ */ -+ memset(&trans->paths, 0, sizeof(trans->paths)); -+ return trans; -+} -+ -+const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR]; -+ -+unsigned bch2_trans_get_fn_idx(const char *fn) -+{ -+ unsigned i; -+ -+ for (i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++) -+ if (!bch2_btree_transaction_fns[i] || -+ bch2_btree_transaction_fns[i] == fn) { -+ bch2_btree_transaction_fns[i] = fn; -+ return i; -+ } -+ -+ pr_warn_once("BCH_TRANSACTIONS_NR not big enough!"); -+ return i; -+} -+ -+struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) -+ __acquires(&c->btree_trans_barrier) -+{ -+ struct btree_trans *trans; -+ struct btree_transaction_stats *s; -+ -+ trans = bch2_trans_alloc(c); -+ -+ memset(trans, 0, sizeof(*trans)); -+ trans->c = c; -+ trans->fn = fn_idx < ARRAY_SIZE(bch2_btree_transaction_fns) -+ ? bch2_btree_transaction_fns[fn_idx] : NULL; -+ trans->last_begin_time = local_clock(); -+ trans->fn_idx = fn_idx; -+ trans->locking_wait.task = current; -+ trans->journal_replay_not_finished = -+ !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags); -+ closure_init_stack(&trans->ref); -+ -+ s = btree_trans_stats(trans); -+ if (s && s->max_mem) { -+ unsigned expected_mem_bytes = roundup_pow_of_two(s->max_mem); -+ -+ trans->mem = kmalloc(expected_mem_bytes, GFP_KERNEL); -+ -+ if (!unlikely(trans->mem)) { -+ trans->mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL); -+ trans->mem_bytes = BTREE_TRANS_MEM_MAX; -+ } else { -+ trans->mem_bytes = expected_mem_bytes; -+ } -+ } -+ -+ if (s) { -+ trans->nr_max_paths = s->nr_max_paths; -+ trans->wb_updates_size = s->wb_updates_size; -+ } -+ -+ trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); -+ trans->srcu_lock_time = jiffies; -+ trans->srcu_held = true; -+ -+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { -+ struct btree_trans *pos; -+ -+ seqmutex_lock(&c->btree_trans_lock); -+ list_for_each_entry(pos, &c->btree_trans_list, list) { -+ /* -+ * We'd much prefer to be stricter here and completely -+ * disallow multiple btree_trans in the same thread - -+ * but the data move path calls bch2_write when we -+ * already have a btree_trans initialized. -+ */ -+ BUG_ON(trans->locking_wait.task->pid == pos->locking_wait.task->pid && -+ bch2_trans_locked(pos)); -+ -+ if (trans->locking_wait.task->pid < pos->locking_wait.task->pid) { -+ list_add_tail(&trans->list, &pos->list); -+ goto list_add_done; -+ } -+ } -+ list_add_tail(&trans->list, &c->btree_trans_list); -+list_add_done: -+ seqmutex_unlock(&c->btree_trans_lock); -+ } -+ -+ return trans; -+} -+ -+static void check_btree_paths_leaked(struct btree_trans *trans) -+{ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ struct bch_fs *c = trans->c; -+ struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ if (path->ref) -+ goto leaked; -+ return; -+leaked: -+ bch_err(c, "btree paths leaked from %s!", trans->fn); -+ trans_for_each_path(trans, path) -+ if (path->ref) -+ printk(KERN_ERR " btree %s %pS\n", -+ bch2_btree_id_str(path->btree_id), -+ (void *) path->ip_allocated); -+ /* Be noisy about this: */ -+ bch2_fatal_error(c); -+#endif -+} -+ -+void bch2_trans_put(struct btree_trans *trans) -+ __releases(&c->btree_trans_barrier) -+{ -+ struct btree_insert_entry *i; -+ struct bch_fs *c = trans->c; -+ struct btree_transaction_stats *s = btree_trans_stats(trans); -+ -+ bch2_trans_unlock(trans); -+ -+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { -+ seqmutex_lock(&c->btree_trans_lock); -+ list_del(&trans->list); -+ seqmutex_unlock(&c->btree_trans_lock); -+ } -+ -+ closure_sync(&trans->ref); -+ -+ if (s) -+ s->max_mem = max(s->max_mem, trans->mem_max); -+ -+ trans_for_each_update(trans, i) -+ __btree_path_put(i->path, true); -+ trans->nr_updates = 0; -+ -+ check_btree_paths_leaked(trans); -+ -+ if (trans->srcu_held) { -+ check_srcu_held_too_long(trans); -+ srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); -+ } -+ -+ bch2_journal_preres_put(&c->journal, &trans->journal_preres); -+ -+ kfree(trans->extra_journal_entries.data); -+ -+ if (trans->fs_usage_deltas) { -+ if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) == -+ REPLICAS_DELTA_LIST_MAX) -+ mempool_free(trans->fs_usage_deltas, -+ &c->replicas_delta_pool); -+ else -+ kfree(trans->fs_usage_deltas); -+ } -+ -+ if (trans->mem_bytes == BTREE_TRANS_MEM_MAX) -+ mempool_free(trans->mem, &c->btree_trans_mem_pool); -+ else -+ kfree(trans->mem); -+ -+ /* Userspace doesn't have a real percpu implementation: */ -+ if (IS_ENABLED(__KERNEL__)) -+ trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans); -+ if (trans) -+ mempool_free(trans, &c->btree_trans_pool); -+} -+ -+static void __maybe_unused -+bch2_btree_bkey_cached_common_to_text(struct printbuf *out, -+ struct btree_bkey_cached_common *b) -+{ -+ struct six_lock_count c = six_lock_counts(&b->lock); -+ struct task_struct *owner; -+ pid_t pid; -+ -+ rcu_read_lock(); -+ owner = READ_ONCE(b->lock.owner); -+ pid = owner ? owner->pid : 0; -+ rcu_read_unlock(); -+ -+ prt_tab(out); -+ prt_printf(out, "%px %c l=%u %s:", b, b->cached ? 'c' : 'b', -+ b->level, bch2_btree_id_str(b->btree_id)); -+ bch2_bpos_to_text(out, btree_node_pos(b)); -+ -+ prt_tab(out); -+ prt_printf(out, " locks %u:%u:%u held by pid %u", -+ c.n[0], c.n[1], c.n[2], pid); -+} -+ -+void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ struct btree_bkey_cached_common *b; -+ static char lock_types[] = { 'r', 'i', 'w' }; -+ unsigned l, idx; -+ -+ if (!out->nr_tabstops) { -+ printbuf_tabstop_push(out, 16); -+ printbuf_tabstop_push(out, 32); -+ } -+ -+ prt_printf(out, "%i %s\n", trans->locking_wait.task->pid, trans->fn); -+ -+ trans_for_each_path_safe(trans, path, idx) { -+ if (!path->nodes_locked) -+ continue; -+ -+ prt_printf(out, " path %u %c l=%u %s:", -+ path->idx, -+ path->cached ? 'c' : 'b', -+ path->level, -+ bch2_btree_id_str(path->btree_id)); -+ bch2_bpos_to_text(out, path->pos); -+ prt_newline(out); -+ -+ for (l = 0; l < BTREE_MAX_DEPTH; l++) { -+ if (btree_node_locked(path, l) && -+ !IS_ERR_OR_NULL(b = (void *) READ_ONCE(path->l[l].b))) { -+ prt_printf(out, " %c l=%u ", -+ lock_types[btree_node_locked_type(path, l)], l); -+ bch2_btree_bkey_cached_common_to_text(out, b); -+ prt_newline(out); -+ } -+ } -+ } -+ -+ b = READ_ONCE(trans->locking); -+ if (b) { -+ prt_printf(out, " blocked for %lluus on", -+ div_u64(local_clock() - trans->locking_wait.start_time, -+ 1000)); -+ prt_newline(out); -+ prt_printf(out, " %c", lock_types[trans->locking_wait.lock_want]); -+ bch2_btree_bkey_cached_common_to_text(out, b); -+ prt_newline(out); -+ } -+} -+ -+void bch2_fs_btree_iter_exit(struct bch_fs *c) -+{ -+ struct btree_transaction_stats *s; -+ struct btree_trans *trans; -+ int cpu; -+ -+ trans = list_first_entry_or_null(&c->btree_trans_list, struct btree_trans, list); -+ if (trans) -+ panic("%s leaked btree_trans\n", trans->fn); -+ -+ if (c->btree_trans_bufs) -+ for_each_possible_cpu(cpu) -+ kfree(per_cpu_ptr(c->btree_trans_bufs, cpu)->trans); -+ free_percpu(c->btree_trans_bufs); -+ -+ for (s = c->btree_transaction_stats; -+ s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats); -+ s++) { -+ kfree(s->max_paths_text); -+ bch2_time_stats_exit(&s->lock_hold_times); -+ } -+ -+ if (c->btree_trans_barrier_initialized) -+ cleanup_srcu_struct(&c->btree_trans_barrier); -+ mempool_exit(&c->btree_trans_mem_pool); -+ mempool_exit(&c->btree_trans_pool); -+} -+ -+int bch2_fs_btree_iter_init(struct bch_fs *c) -+{ -+ struct btree_transaction_stats *s; -+ int ret; -+ -+ for (s = c->btree_transaction_stats; -+ s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats); -+ s++) { -+ bch2_time_stats_init(&s->lock_hold_times); -+ mutex_init(&s->lock); -+ } -+ -+ INIT_LIST_HEAD(&c->btree_trans_list); -+ seqmutex_init(&c->btree_trans_lock); -+ -+ c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf); -+ if (!c->btree_trans_bufs) -+ return -ENOMEM; -+ -+ ret = mempool_init_kmalloc_pool(&c->btree_trans_pool, 1, -+ sizeof(struct btree_trans)) ?: -+ mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1, -+ BTREE_TRANS_MEM_MAX) ?: -+ init_srcu_struct(&c->btree_trans_barrier); -+ if (!ret) -+ c->btree_trans_barrier_initialized = true; -+ return ret; -+} -diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h -new file mode 100644 -index 000000000000..85e7cb52f6b6 ---- /dev/null -+++ b/fs/bcachefs/btree_iter.h -@@ -0,0 +1,943 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BTREE_ITER_H -+#define _BCACHEFS_BTREE_ITER_H -+ -+#include "bset.h" -+#include "btree_types.h" -+#include "trace.h" -+ -+static inline int __bkey_err(const struct bkey *k) -+{ -+ return PTR_ERR_OR_ZERO(k); -+} -+ -+#define bkey_err(_k) __bkey_err((_k).k) -+ -+static inline void __btree_path_get(struct btree_path *path, bool intent) -+{ -+ path->ref++; -+ path->intent_ref += intent; -+} -+ -+static inline bool __btree_path_put(struct btree_path *path, bool intent) -+{ -+ EBUG_ON(!path->ref); -+ EBUG_ON(!path->intent_ref && intent); -+ path->intent_ref -= intent; -+ return --path->ref == 0; -+} -+ -+static inline void btree_path_set_dirty(struct btree_path *path, -+ enum btree_path_uptodate u) -+{ -+ path->uptodate = max_t(unsigned, path->uptodate, u); -+} -+ -+static inline struct btree *btree_path_node(struct btree_path *path, -+ unsigned level) -+{ -+ return level < BTREE_MAX_DEPTH ? path->l[level].b : NULL; -+} -+ -+static inline bool btree_node_lock_seq_matches(const struct btree_path *path, -+ const struct btree *b, unsigned level) -+{ -+ return path->l[level].lock_seq == six_lock_seq(&b->c.lock); -+} -+ -+static inline struct btree *btree_node_parent(struct btree_path *path, -+ struct btree *b) -+{ -+ return btree_path_node(path, b->c.level + 1); -+} -+ -+/* Iterate over paths within a transaction: */ -+ -+void __bch2_btree_trans_sort_paths(struct btree_trans *); -+ -+static inline void btree_trans_sort_paths(struct btree_trans *trans) -+{ -+ if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && -+ trans->paths_sorted) -+ return; -+ __bch2_btree_trans_sort_paths(trans); -+} -+ -+static inline struct btree_path * -+__trans_next_path(struct btree_trans *trans, unsigned idx) -+{ -+ u64 l; -+ -+ if (idx == BTREE_ITER_MAX) -+ return NULL; -+ -+ l = trans->paths_allocated >> idx; -+ if (!l) -+ return NULL; -+ -+ idx += __ffs64(l); -+ EBUG_ON(idx >= BTREE_ITER_MAX); -+ EBUG_ON(trans->paths[idx].idx != idx); -+ return &trans->paths[idx]; -+} -+ -+#define trans_for_each_path_from(_trans, _path, _start) \ -+ for (_path = __trans_next_path((_trans), _start); \ -+ (_path); \ -+ _path = __trans_next_path((_trans), (_path)->idx + 1)) -+ -+#define trans_for_each_path(_trans, _path) \ -+ trans_for_each_path_from(_trans, _path, 0) -+ -+static inline struct btree_path * -+__trans_next_path_safe(struct btree_trans *trans, unsigned *idx) -+{ -+ u64 l; -+ -+ if (*idx == BTREE_ITER_MAX) -+ return NULL; -+ -+ l = trans->paths_allocated >> *idx; -+ if (!l) -+ return NULL; -+ -+ *idx += __ffs64(l); -+ EBUG_ON(*idx >= BTREE_ITER_MAX); -+ return &trans->paths[*idx]; -+} -+ -+/* -+ * This version is intended to be safe for use on a btree_trans that is owned by -+ * another thread, for bch2_btree_trans_to_text(); -+ */ -+#define trans_for_each_path_safe_from(_trans, _path, _idx, _start) \ -+ for (_idx = _start; \ -+ (_path = __trans_next_path_safe((_trans), &_idx)); \ -+ _idx++) -+ -+#define trans_for_each_path_safe(_trans, _path, _idx) \ -+ trans_for_each_path_safe_from(_trans, _path, _idx, 0) -+ -+static inline struct btree_path *next_btree_path(struct btree_trans *trans, struct btree_path *path) -+{ -+ unsigned idx = path ? path->sorted_idx + 1 : 0; -+ -+ EBUG_ON(idx > trans->nr_sorted); -+ -+ return idx < trans->nr_sorted -+ ? trans->paths + trans->sorted[idx] -+ : NULL; -+} -+ -+static inline struct btree_path *prev_btree_path(struct btree_trans *trans, struct btree_path *path) -+{ -+ unsigned idx = path ? path->sorted_idx : trans->nr_sorted; -+ -+ return idx -+ ? trans->paths + trans->sorted[idx - 1] -+ : NULL; -+} -+ -+#define trans_for_each_path_inorder(_trans, _path, _i) \ -+ for (_i = 0; \ -+ ((_path) = (_trans)->paths + trans->sorted[_i]), (_i) < (_trans)->nr_sorted;\ -+ _i++) -+ -+#define trans_for_each_path_inorder_reverse(_trans, _path, _i) \ -+ for (_i = trans->nr_sorted - 1; \ -+ ((_path) = (_trans)->paths + trans->sorted[_i]), (_i) >= 0;\ -+ --_i) -+ -+static inline bool __path_has_node(const struct btree_path *path, -+ const struct btree *b) -+{ -+ return path->l[b->c.level].b == b && -+ btree_node_lock_seq_matches(path, b, b->c.level); -+} -+ -+static inline struct btree_path * -+__trans_next_path_with_node(struct btree_trans *trans, struct btree *b, -+ unsigned idx) -+{ -+ struct btree_path *path = __trans_next_path(trans, idx); -+ -+ while (path && !__path_has_node(path, b)) -+ path = __trans_next_path(trans, path->idx + 1); -+ -+ return path; -+} -+ -+#define trans_for_each_path_with_node(_trans, _b, _path) \ -+ for (_path = __trans_next_path_with_node((_trans), (_b), 0); \ -+ (_path); \ -+ _path = __trans_next_path_with_node((_trans), (_b), \ -+ (_path)->idx + 1)) -+ -+struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *, struct btree_path *, -+ bool, unsigned long); -+ -+static inline struct btree_path * __must_check -+bch2_btree_path_make_mut(struct btree_trans *trans, -+ struct btree_path *path, bool intent, -+ unsigned long ip) -+{ -+ if (path->ref > 1 || path->preserve) -+ path = __bch2_btree_path_make_mut(trans, path, intent, ip); -+ path->should_be_locked = false; -+ return path; -+} -+ -+struct btree_path * __must_check -+__bch2_btree_path_set_pos(struct btree_trans *, struct btree_path *, -+ struct bpos, bool, unsigned long, int); -+ -+static inline struct btree_path * __must_check -+bch2_btree_path_set_pos(struct btree_trans *trans, -+ struct btree_path *path, struct bpos new_pos, -+ bool intent, unsigned long ip) -+{ -+ int cmp = bpos_cmp(new_pos, path->pos); -+ -+ return cmp -+ ? __bch2_btree_path_set_pos(trans, path, new_pos, intent, ip, cmp) -+ : path; -+} -+ -+int __must_check bch2_btree_path_traverse_one(struct btree_trans *, struct btree_path *, -+ unsigned, unsigned long); -+ -+static inline int __must_check bch2_btree_path_traverse(struct btree_trans *trans, -+ struct btree_path *path, unsigned flags) -+{ -+ if (path->uptodate < BTREE_ITER_NEED_RELOCK) -+ return 0; -+ -+ return bch2_btree_path_traverse_one(trans, path, flags, _RET_IP_); -+} -+ -+int __must_check bch2_btree_path_traverse(struct btree_trans *, -+ struct btree_path *, unsigned); -+struct btree_path *bch2_path_get(struct btree_trans *, enum btree_id, struct bpos, -+ unsigned, unsigned, unsigned, unsigned long); -+struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bkey *); -+ -+/* -+ * bch2_btree_path_peek_slot() for a cached iterator might return a key in a -+ * different snapshot: -+ */ -+static inline struct bkey_s_c bch2_btree_path_peek_slot_exact(struct btree_path *path, struct bkey *u) -+{ -+ struct bkey_s_c k = bch2_btree_path_peek_slot(path, u); -+ -+ if (k.k && bpos_eq(path->pos, k.k->p)) -+ return k; -+ -+ bkey_init(u); -+ u->p = path->pos; -+ return (struct bkey_s_c) { u, NULL }; -+} -+ -+struct bkey_i *bch2_btree_journal_peek_slot(struct btree_trans *, -+ struct btree_iter *, struct bpos); -+ -+void bch2_btree_path_level_init(struct btree_trans *, struct btree_path *, struct btree *); -+ -+int __bch2_trans_mutex_lock(struct btree_trans *, struct mutex *); -+ -+static inline int bch2_trans_mutex_lock(struct btree_trans *trans, struct mutex *lock) -+{ -+ return mutex_trylock(lock) -+ ? 0 -+ : __bch2_trans_mutex_lock(trans, lock); -+} -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+void bch2_trans_verify_paths(struct btree_trans *); -+void bch2_assert_pos_locked(struct btree_trans *, enum btree_id, -+ struct bpos, bool); -+#else -+static inline void bch2_trans_verify_paths(struct btree_trans *trans) {} -+static inline void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, -+ struct bpos pos, bool key_cache) {} -+#endif -+ -+void bch2_btree_path_fix_key_modified(struct btree_trans *trans, -+ struct btree *, struct bkey_packed *); -+void bch2_btree_node_iter_fix(struct btree_trans *trans, struct btree_path *, -+ struct btree *, struct btree_node_iter *, -+ struct bkey_packed *, unsigned, unsigned); -+ -+int bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *); -+ -+void bch2_path_put(struct btree_trans *, struct btree_path *, bool); -+ -+int bch2_trans_relock(struct btree_trans *); -+int bch2_trans_relock_notrace(struct btree_trans *); -+void bch2_trans_unlock(struct btree_trans *); -+void bch2_trans_unlock_long(struct btree_trans *); -+bool bch2_trans_locked(struct btree_trans *); -+ -+static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count) -+{ -+ return restart_count != trans->restart_count -+ ? -BCH_ERR_transaction_restart_nested -+ : 0; -+} -+ -+void __noreturn bch2_trans_restart_error(struct btree_trans *, u32); -+ -+static inline void bch2_trans_verify_not_restarted(struct btree_trans *trans, -+ u32 restart_count) -+{ -+ if (trans_was_restarted(trans, restart_count)) -+ bch2_trans_restart_error(trans, restart_count); -+} -+ -+void __noreturn bch2_trans_in_restart_error(struct btree_trans *); -+ -+static inline void bch2_trans_verify_not_in_restart(struct btree_trans *trans) -+{ -+ if (trans->restarted) -+ bch2_trans_in_restart_error(trans); -+} -+ -+__always_inline -+static int btree_trans_restart_nounlock(struct btree_trans *trans, int err) -+{ -+ BUG_ON(err <= 0); -+ BUG_ON(!bch2_err_matches(-err, BCH_ERR_transaction_restart)); -+ -+ trans->restarted = err; -+ trans->last_restarted_ip = _THIS_IP_; -+ return -err; -+} -+ -+__always_inline -+static int btree_trans_restart(struct btree_trans *trans, int err) -+{ -+ btree_trans_restart_nounlock(trans, err); -+ return -err; -+} -+ -+bool bch2_btree_node_upgrade(struct btree_trans *, -+ struct btree_path *, unsigned); -+ -+void __bch2_btree_path_downgrade(struct btree_trans *, struct btree_path *, unsigned); -+ -+static inline void bch2_btree_path_downgrade(struct btree_trans *trans, -+ struct btree_path *path) -+{ -+ unsigned new_locks_want = path->level + !!path->intent_ref; -+ -+ if (path->locks_want > new_locks_want) -+ __bch2_btree_path_downgrade(trans, path, new_locks_want); -+} -+ -+void bch2_trans_downgrade(struct btree_trans *); -+ -+void bch2_trans_node_add(struct btree_trans *trans, struct btree *); -+void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *); -+ -+int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter); -+int __must_check bch2_btree_iter_traverse(struct btree_iter *); -+ -+struct btree *bch2_btree_iter_peek_node(struct btree_iter *); -+struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *); -+struct btree *bch2_btree_iter_next_node(struct btree_iter *); -+ -+struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos); -+struct bkey_s_c bch2_btree_iter_next(struct btree_iter *); -+ -+struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *); -+ -+static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) -+{ -+ return bch2_btree_iter_peek_upto(iter, SPOS_MAX); -+} -+ -+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *); -+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *); -+ -+struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *); -+struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *); -+struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *); -+ -+bool bch2_btree_iter_advance(struct btree_iter *); -+bool bch2_btree_iter_rewind(struct btree_iter *); -+ -+static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) -+{ -+ iter->k.type = KEY_TYPE_deleted; -+ iter->k.p.inode = iter->pos.inode = new_pos.inode; -+ iter->k.p.offset = iter->pos.offset = new_pos.offset; -+ iter->k.p.snapshot = iter->pos.snapshot = new_pos.snapshot; -+ iter->k.size = 0; -+} -+ -+static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) -+{ -+ if (unlikely(iter->update_path)) -+ bch2_path_put(iter->trans, iter->update_path, -+ iter->flags & BTREE_ITER_INTENT); -+ iter->update_path = NULL; -+ -+ if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) -+ new_pos.snapshot = iter->snapshot; -+ -+ __bch2_btree_iter_set_pos(iter, new_pos); -+} -+ -+static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *iter) -+{ -+ BUG_ON(!(iter->flags & BTREE_ITER_IS_EXTENTS)); -+ iter->pos = bkey_start_pos(&iter->k); -+} -+ -+static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 snapshot) -+{ -+ struct bpos pos = iter->pos; -+ -+ iter->snapshot = snapshot; -+ pos.snapshot = snapshot; -+ bch2_btree_iter_set_pos(iter, pos); -+} -+ -+void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *); -+ -+static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans, -+ unsigned btree_id, -+ unsigned flags) -+{ -+ if (flags & BTREE_ITER_ALL_LEVELS) -+ flags |= BTREE_ITER_ALL_SNAPSHOTS|__BTREE_ITER_ALL_SNAPSHOTS; -+ -+ if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) && -+ btree_id_is_extents(btree_id)) -+ flags |= BTREE_ITER_IS_EXTENTS; -+ -+ if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) && -+ !btree_type_has_snapshot_field(btree_id)) -+ flags &= ~BTREE_ITER_ALL_SNAPSHOTS; -+ -+ if (!(flags & BTREE_ITER_ALL_SNAPSHOTS) && -+ btree_type_has_snapshots(btree_id)) -+ flags |= BTREE_ITER_FILTER_SNAPSHOTS; -+ -+ if (trans->journal_replay_not_finished) -+ flags |= BTREE_ITER_WITH_JOURNAL; -+ -+ return flags; -+} -+ -+static inline unsigned bch2_btree_iter_flags(struct btree_trans *trans, -+ unsigned btree_id, -+ unsigned flags) -+{ -+ if (!btree_id_cached(trans->c, btree_id)) { -+ flags &= ~BTREE_ITER_CACHED; -+ flags &= ~BTREE_ITER_WITH_KEY_CACHE; -+ } else if (!(flags & BTREE_ITER_CACHED)) -+ flags |= BTREE_ITER_WITH_KEY_CACHE; -+ -+ return __bch2_btree_iter_flags(trans, btree_id, flags); -+} -+ -+static inline void bch2_trans_iter_init_common(struct btree_trans *trans, -+ struct btree_iter *iter, -+ unsigned btree_id, struct bpos pos, -+ unsigned locks_want, -+ unsigned depth, -+ unsigned flags, -+ unsigned long ip) -+{ -+ memset(iter, 0, sizeof(*iter)); -+ iter->trans = trans; -+ iter->btree_id = btree_id; -+ iter->flags = flags; -+ iter->snapshot = pos.snapshot; -+ iter->pos = pos; -+ iter->k.p = pos; -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ iter->ip_allocated = ip; -+#endif -+ iter->path = bch2_path_get(trans, btree_id, iter->pos, -+ locks_want, depth, flags, ip); -+} -+ -+void bch2_trans_iter_init_outlined(struct btree_trans *, struct btree_iter *, -+ enum btree_id, struct bpos, unsigned); -+ -+static inline void bch2_trans_iter_init(struct btree_trans *trans, -+ struct btree_iter *iter, -+ unsigned btree_id, struct bpos pos, -+ unsigned flags) -+{ -+ if (__builtin_constant_p(btree_id) && -+ __builtin_constant_p(flags)) -+ bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0, -+ bch2_btree_iter_flags(trans, btree_id, flags), -+ _THIS_IP_); -+ else -+ bch2_trans_iter_init_outlined(trans, iter, btree_id, pos, flags); -+} -+ -+void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *, -+ enum btree_id, struct bpos, -+ unsigned, unsigned, unsigned); -+void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *); -+ -+static inline void set_btree_iter_dontneed(struct btree_iter *iter) -+{ -+ if (!iter->trans->restarted) -+ iter->path->preserve = false; -+} -+ -+void *__bch2_trans_kmalloc(struct btree_trans *, size_t); -+ -+static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) -+{ -+ size = roundup(size, 8); -+ -+ if (likely(trans->mem_top + size <= trans->mem_bytes)) { -+ void *p = trans->mem + trans->mem_top; -+ -+ trans->mem_top += size; -+ memset(p, 0, size); -+ return p; -+ } else { -+ return __bch2_trans_kmalloc(trans, size); -+ } -+} -+ -+static inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size) -+{ -+ size = roundup(size, 8); -+ -+ if (likely(trans->mem_top + size <= trans->mem_bytes)) { -+ void *p = trans->mem + trans->mem_top; -+ -+ trans->mem_top += size; -+ return p; -+ } else { -+ return __bch2_trans_kmalloc(trans, size); -+ } -+} -+ -+static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans, -+ struct btree_iter *iter, -+ unsigned btree_id, struct bpos pos, -+ unsigned flags, unsigned type) -+{ -+ struct bkey_s_c k; -+ -+ bch2_trans_iter_init(trans, iter, btree_id, pos, flags); -+ k = bch2_btree_iter_peek_slot(iter); -+ -+ if (!bkey_err(k) && type && k.k->type != type) -+ k = bkey_s_c_err(-BCH_ERR_ENOENT_bkey_type_mismatch); -+ if (unlikely(bkey_err(k))) -+ bch2_trans_iter_exit(trans, iter); -+ return k; -+} -+ -+static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans, -+ struct btree_iter *iter, -+ unsigned btree_id, struct bpos pos, -+ unsigned flags) -+{ -+ return __bch2_bkey_get_iter(trans, iter, btree_id, pos, flags, 0); -+} -+ -+#define bch2_bkey_get_iter_typed(_trans, _iter, _btree_id, _pos, _flags, _type)\ -+ bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \ -+ _btree_id, _pos, _flags, KEY_TYPE_##_type)) -+ -+static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, -+ unsigned btree_id, struct bpos pos, -+ unsigned flags, unsigned type, -+ unsigned val_size, void *val) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type); -+ ret = bkey_err(k); -+ if (!ret) { -+ unsigned b = min_t(unsigned, bkey_val_bytes(k.k), val_size); -+ -+ memcpy(val, k.v, b); -+ if (unlikely(b < sizeof(*val))) -+ memset((void *) val + b, 0, sizeof(*val) - b); -+ bch2_trans_iter_exit(trans, &iter); -+ } -+ -+ return ret; -+} -+ -+#define bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags, _type, _val)\ -+ __bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags, \ -+ KEY_TYPE_##_type, sizeof(*_val), _val) -+ -+void bch2_trans_srcu_unlock(struct btree_trans *); -+void bch2_trans_srcu_lock(struct btree_trans *); -+ -+u32 bch2_trans_begin(struct btree_trans *); -+ -+/* -+ * XXX -+ * this does not handle transaction restarts from bch2_btree_iter_next_node() -+ * correctly -+ */ -+#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ -+ _locks_want, _depth, _flags, _b, _ret) \ -+ for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \ -+ _start, _locks_want, _depth, _flags); \ -+ (_b) = bch2_btree_iter_peek_node_and_restart(&(_iter)), \ -+ !((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b); \ -+ (_b) = bch2_btree_iter_next_node(&(_iter))) -+ -+#define for_each_btree_node(_trans, _iter, _btree_id, _start, \ -+ _flags, _b, _ret) \ -+ __for_each_btree_node(_trans, _iter, _btree_id, _start, \ -+ 0, 0, _flags, _b, _ret) -+ -+static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter, -+ unsigned flags) -+{ -+ BUG_ON(flags & BTREE_ITER_ALL_LEVELS); -+ -+ return flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) : -+ bch2_btree_iter_peek_prev(iter); -+} -+ -+static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter, -+ unsigned flags) -+{ -+ return flags & BTREE_ITER_ALL_LEVELS ? bch2_btree_iter_peek_all_levels(iter) : -+ flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) : -+ bch2_btree_iter_peek(iter); -+} -+ -+static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter, -+ struct bpos end, -+ unsigned flags) -+{ -+ if (!(flags & BTREE_ITER_SLOTS)) -+ return bch2_btree_iter_peek_upto(iter, end); -+ -+ if (bkey_gt(iter->pos, end)) -+ return bkey_s_c_null; -+ -+ return bch2_btree_iter_peek_slot(iter); -+} -+ -+static inline int btree_trans_too_many_iters(struct btree_trans *trans) -+{ -+ if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX - 8) { -+ trace_and_count(trans->c, trans_restart_too_many_iters, trans, _THIS_IP_); -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters); -+ } -+ -+ return 0; -+} -+ -+struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); -+ -+static inline struct bkey_s_c -+__bch2_btree_iter_peek_and_restart(struct btree_trans *trans, -+ struct btree_iter *iter, unsigned flags) -+{ -+ struct bkey_s_c k; -+ -+ while (btree_trans_too_many_iters(trans) || -+ (k = bch2_btree_iter_peek_type(iter, flags), -+ bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart))) -+ bch2_trans_begin(trans); -+ -+ return k; -+} -+ -+static inline struct bkey_s_c -+__bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bpos end, -+ unsigned flags) -+{ -+ struct bkey_s_c k; -+ -+ while (btree_trans_too_many_iters(trans) || -+ (k = bch2_btree_iter_peek_upto_type(iter, end, flags), -+ bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart))) -+ bch2_trans_begin(trans); -+ -+ return k; -+} -+ -+#define lockrestart_do(_trans, _do) \ -+({ \ -+ u32 _restart_count; \ -+ int _ret2; \ -+ \ -+ do { \ -+ _restart_count = bch2_trans_begin(_trans); \ -+ _ret2 = (_do); \ -+ } while (bch2_err_matches(_ret2, BCH_ERR_transaction_restart)); \ -+ \ -+ if (!_ret2) \ -+ bch2_trans_verify_not_restarted(_trans, _restart_count);\ -+ \ -+ _ret2; \ -+}) -+ -+/* -+ * nested_lockrestart_do(), nested_commit_do(): -+ * -+ * These are like lockrestart_do() and commit_do(), with two differences: -+ * -+ * - We don't call bch2_trans_begin() unless we had a transaction restart -+ * - We return -BCH_ERR_transaction_restart_nested if we succeeded after a -+ * transaction restart -+ */ -+#define nested_lockrestart_do(_trans, _do) \ -+({ \ -+ u32 _restart_count, _orig_restart_count; \ -+ int _ret2; \ -+ \ -+ _restart_count = _orig_restart_count = (_trans)->restart_count; \ -+ \ -+ while (bch2_err_matches(_ret2 = (_do), BCH_ERR_transaction_restart))\ -+ _restart_count = bch2_trans_begin(_trans); \ -+ \ -+ if (!_ret2) \ -+ bch2_trans_verify_not_restarted(_trans, _restart_count);\ -+ \ -+ _ret2 ?: trans_was_restarted(_trans, _restart_count); \ -+}) -+ -+#define for_each_btree_key2(_trans, _iter, _btree_id, \ -+ _start, _flags, _k, _do) \ -+({ \ -+ int _ret3 = 0; \ -+ \ -+ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ -+ (_start), (_flags)); \ -+ \ -+ while (1) { \ -+ u32 _restart_count = bch2_trans_begin(_trans); \ -+ \ -+ _ret3 = 0; \ -+ (_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \ -+ if (!(_k).k) \ -+ break; \ -+ \ -+ _ret3 = bkey_err(_k) ?: (_do); \ -+ if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\ -+ continue; \ -+ if (_ret3) \ -+ break; \ -+ bch2_trans_verify_not_restarted(_trans, _restart_count);\ -+ if (!bch2_btree_iter_advance(&(_iter))) \ -+ break; \ -+ } \ -+ \ -+ bch2_trans_iter_exit((_trans), &(_iter)); \ -+ _ret3; \ -+}) -+ -+#define for_each_btree_key2_upto(_trans, _iter, _btree_id, \ -+ _start, _end, _flags, _k, _do) \ -+({ \ -+ int _ret3 = 0; \ -+ \ -+ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ -+ (_start), (_flags)); \ -+ \ -+ while (1) { \ -+ u32 _restart_count = bch2_trans_begin(_trans); \ -+ \ -+ _ret3 = 0; \ -+ (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, (_flags));\ -+ if (!(_k).k) \ -+ break; \ -+ \ -+ _ret3 = bkey_err(_k) ?: (_do); \ -+ if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\ -+ continue; \ -+ if (_ret3) \ -+ break; \ -+ bch2_trans_verify_not_restarted(_trans, _restart_count);\ -+ if (!bch2_btree_iter_advance(&(_iter))) \ -+ break; \ -+ } \ -+ \ -+ bch2_trans_iter_exit((_trans), &(_iter)); \ -+ _ret3; \ -+}) -+ -+#define for_each_btree_key_reverse(_trans, _iter, _btree_id, \ -+ _start, _flags, _k, _do) \ -+({ \ -+ int _ret3 = 0; \ -+ \ -+ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ -+ (_start), (_flags)); \ -+ \ -+ while (1) { \ -+ u32 _restart_count = bch2_trans_begin(_trans); \ -+ (_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\ -+ if (!(_k).k) { \ -+ _ret3 = 0; \ -+ break; \ -+ } \ -+ \ -+ _ret3 = bkey_err(_k) ?: (_do); \ -+ if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\ -+ continue; \ -+ if (_ret3) \ -+ break; \ -+ bch2_trans_verify_not_restarted(_trans, _restart_count);\ -+ if (!bch2_btree_iter_rewind(&(_iter))) \ -+ break; \ -+ } \ -+ \ -+ bch2_trans_iter_exit((_trans), &(_iter)); \ -+ _ret3; \ -+}) -+ -+#define for_each_btree_key_commit(_trans, _iter, _btree_id, \ -+ _start, _iter_flags, _k, \ -+ _disk_res, _journal_seq, _commit_flags,\ -+ _do) \ -+ for_each_btree_key2(_trans, _iter, _btree_id, _start, _iter_flags, _k,\ -+ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ -+ (_journal_seq), (_commit_flags))) -+ -+#define for_each_btree_key_reverse_commit(_trans, _iter, _btree_id, \ -+ _start, _iter_flags, _k, \ -+ _disk_res, _journal_seq, _commit_flags,\ -+ _do) \ -+ for_each_btree_key_reverse(_trans, _iter, _btree_id, _start, _iter_flags, _k,\ -+ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ -+ (_journal_seq), (_commit_flags))) -+ -+#define for_each_btree_key_upto_commit(_trans, _iter, _btree_id, \ -+ _start, _end, _iter_flags, _k, \ -+ _disk_res, _journal_seq, _commit_flags,\ -+ _do) \ -+ for_each_btree_key2_upto(_trans, _iter, _btree_id, _start, _end, _iter_flags, _k,\ -+ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ -+ (_journal_seq), (_commit_flags))) -+ -+#define for_each_btree_key(_trans, _iter, _btree_id, \ -+ _start, _flags, _k, _ret) \ -+ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ -+ (_start), (_flags)); \ -+ (_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\ -+ !((_ret) = bkey_err(_k)) && (_k).k; \ -+ bch2_btree_iter_advance(&(_iter))) -+ -+#define for_each_btree_key_upto(_trans, _iter, _btree_id, \ -+ _start, _end, _flags, _k, _ret) \ -+ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ -+ (_start), (_flags)); \ -+ (_k) = __bch2_btree_iter_peek_upto_and_restart((_trans), \ -+ &(_iter), _end, _flags),\ -+ !((_ret) = bkey_err(_k)) && (_k).k; \ -+ bch2_btree_iter_advance(&(_iter))) -+ -+#define for_each_btree_key_norestart(_trans, _iter, _btree_id, \ -+ _start, _flags, _k, _ret) \ -+ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ -+ (_start), (_flags)); \ -+ (_k) = bch2_btree_iter_peek_type(&(_iter), _flags), \ -+ !((_ret) = bkey_err(_k)) && (_k).k; \ -+ bch2_btree_iter_advance(&(_iter))) -+ -+#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \ -+ _start, _end, _flags, _k, _ret) \ -+ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ -+ (_start), (_flags)); \ -+ (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\ -+ !((_ret) = bkey_err(_k)) && (_k).k; \ -+ bch2_btree_iter_advance(&(_iter))) -+ -+#define for_each_btree_key_continue(_trans, _iter, _flags, _k, _ret) \ -+ for (; \ -+ (_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\ -+ !((_ret) = bkey_err(_k)) && (_k).k; \ -+ bch2_btree_iter_advance(&(_iter))) -+ -+#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \ -+ for (; \ -+ (_k) = bch2_btree_iter_peek_type(&(_iter), _flags), \ -+ !((_ret) = bkey_err(_k)) && (_k).k; \ -+ bch2_btree_iter_advance(&(_iter))) -+ -+#define for_each_btree_key_upto_continue_norestart(_iter, _end, _flags, _k, _ret)\ -+ for (; \ -+ (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags), \ -+ !((_ret) = bkey_err(_k)) && (_k).k; \ -+ bch2_btree_iter_advance(&(_iter))) -+ -+#define drop_locks_do(_trans, _do) \ -+({ \ -+ bch2_trans_unlock(_trans); \ -+ _do ?: bch2_trans_relock(_trans); \ -+}) -+ -+#define allocate_dropping_locks_errcode(_trans, _do) \ -+({ \ -+ gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \ -+ int _ret = _do; \ -+ \ -+ if (bch2_err_matches(_ret, ENOMEM)) { \ -+ _gfp = GFP_KERNEL; \ -+ _ret = drop_locks_do(trans, _do); \ -+ } \ -+ _ret; \ -+}) -+ -+#define allocate_dropping_locks(_trans, _ret, _do) \ -+({ \ -+ gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \ -+ typeof(_do) _p = _do; \ -+ \ -+ _ret = 0; \ -+ if (unlikely(!_p)) { \ -+ _gfp = GFP_KERNEL; \ -+ _ret = drop_locks_do(trans, ((_p = _do), 0)); \ -+ } \ -+ _p; \ -+}) -+ -+/* new multiple iterator interface: */ -+ -+void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); -+void bch2_btree_path_to_text(struct printbuf *, struct btree_path *); -+void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); -+void bch2_dump_trans_updates(struct btree_trans *); -+void bch2_dump_trans_paths_updates(struct btree_trans *); -+ -+struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned); -+void bch2_trans_put(struct btree_trans *); -+ -+extern const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR]; -+unsigned bch2_trans_get_fn_idx(const char *); -+ -+#define bch2_trans_get(_c) \ -+({ \ -+ static unsigned trans_fn_idx; \ -+ \ -+ if (unlikely(!trans_fn_idx)) \ -+ trans_fn_idx = bch2_trans_get_fn_idx(__func__); \ -+ __bch2_trans_get(_c, trans_fn_idx); \ -+}) -+ -+void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *); -+ -+void bch2_fs_btree_iter_exit(struct bch_fs *); -+int bch2_fs_btree_iter_init(struct bch_fs *); -+ -+#endif /* _BCACHEFS_BTREE_ITER_H */ -diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c -new file mode 100644 -index 000000000000..58a981bcf3aa ---- /dev/null -+++ b/fs/bcachefs/btree_journal_iter.c -@@ -0,0 +1,531 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "bset.h" -+#include "btree_journal_iter.h" -+#include "journal_io.h" -+ -+#include -+ -+/* -+ * For managing keys we read from the journal: until journal replay works normal -+ * btree lookups need to be able to find and return keys from the journal where -+ * they overwrite what's in the btree, so we have a special iterator and -+ * operations for the regular btree iter code to use: -+ */ -+ -+static int __journal_key_cmp(enum btree_id l_btree_id, -+ unsigned l_level, -+ struct bpos l_pos, -+ const struct journal_key *r) -+{ -+ return (cmp_int(l_btree_id, r->btree_id) ?: -+ cmp_int(l_level, r->level) ?: -+ bpos_cmp(l_pos, r->k->k.p)); -+} -+ -+static int journal_key_cmp(const struct journal_key *l, const struct journal_key *r) -+{ -+ return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r); -+} -+ -+static inline size_t idx_to_pos(struct journal_keys *keys, size_t idx) -+{ -+ size_t gap_size = keys->size - keys->nr; -+ -+ if (idx >= keys->gap) -+ idx += gap_size; -+ return idx; -+} -+ -+static inline struct journal_key *idx_to_key(struct journal_keys *keys, size_t idx) -+{ -+ return keys->d + idx_to_pos(keys, idx); -+} -+ -+static size_t __bch2_journal_key_search(struct journal_keys *keys, -+ enum btree_id id, unsigned level, -+ struct bpos pos) -+{ -+ size_t l = 0, r = keys->nr, m; -+ -+ while (l < r) { -+ m = l + ((r - l) >> 1); -+ if (__journal_key_cmp(id, level, pos, idx_to_key(keys, m)) > 0) -+ l = m + 1; -+ else -+ r = m; -+ } -+ -+ BUG_ON(l < keys->nr && -+ __journal_key_cmp(id, level, pos, idx_to_key(keys, l)) > 0); -+ -+ BUG_ON(l && -+ __journal_key_cmp(id, level, pos, idx_to_key(keys, l - 1)) <= 0); -+ -+ return l; -+} -+ -+static size_t bch2_journal_key_search(struct journal_keys *keys, -+ enum btree_id id, unsigned level, -+ struct bpos pos) -+{ -+ return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos)); -+} -+ -+struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id, -+ unsigned level, struct bpos pos, -+ struct bpos end_pos, size_t *idx) -+{ -+ struct journal_keys *keys = &c->journal_keys; -+ unsigned iters = 0; -+ struct journal_key *k; -+search: -+ if (!*idx) -+ *idx = __bch2_journal_key_search(keys, btree_id, level, pos); -+ -+ while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { -+ if (__journal_key_cmp(btree_id, level, end_pos, k) < 0) -+ return NULL; -+ -+ if (__journal_key_cmp(btree_id, level, pos, k) <= 0 && -+ !k->overwritten) -+ return k->k; -+ -+ (*idx)++; -+ iters++; -+ if (iters == 10) { -+ *idx = 0; -+ goto search; -+ } -+ } -+ -+ return NULL; -+} -+ -+struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id, -+ unsigned level, struct bpos pos) -+{ -+ size_t idx = 0; -+ -+ return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx); -+} -+ -+static void journal_iters_fix(struct bch_fs *c) -+{ -+ struct journal_keys *keys = &c->journal_keys; -+ /* The key we just inserted is immediately before the gap: */ -+ size_t gap_end = keys->gap + (keys->size - keys->nr); -+ struct btree_and_journal_iter *iter; -+ -+ /* -+ * If an iterator points one after the key we just inserted, decrement -+ * the iterator so it points at the key we just inserted - if the -+ * decrement was unnecessary, bch2_btree_and_journal_iter_peek() will -+ * handle that: -+ */ -+ list_for_each_entry(iter, &c->journal_iters, journal.list) -+ if (iter->journal.idx == gap_end) -+ iter->journal.idx = keys->gap - 1; -+} -+ -+static void journal_iters_move_gap(struct bch_fs *c, size_t old_gap, size_t new_gap) -+{ -+ struct journal_keys *keys = &c->journal_keys; -+ struct journal_iter *iter; -+ size_t gap_size = keys->size - keys->nr; -+ -+ list_for_each_entry(iter, &c->journal_iters, list) { -+ if (iter->idx > old_gap) -+ iter->idx -= gap_size; -+ if (iter->idx >= new_gap) -+ iter->idx += gap_size; -+ } -+} -+ -+int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, -+ unsigned level, struct bkey_i *k) -+{ -+ struct journal_key n = { -+ .btree_id = id, -+ .level = level, -+ .k = k, -+ .allocated = true, -+ /* -+ * Ensure these keys are done last by journal replay, to unblock -+ * journal reclaim: -+ */ -+ .journal_seq = U32_MAX, -+ }; -+ struct journal_keys *keys = &c->journal_keys; -+ size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); -+ -+ BUG_ON(test_bit(BCH_FS_RW, &c->flags)); -+ -+ if (idx < keys->size && -+ journal_key_cmp(&n, &keys->d[idx]) == 0) { -+ if (keys->d[idx].allocated) -+ kfree(keys->d[idx].k); -+ keys->d[idx] = n; -+ return 0; -+ } -+ -+ if (idx > keys->gap) -+ idx -= keys->size - keys->nr; -+ -+ if (keys->nr == keys->size) { -+ struct journal_keys new_keys = { -+ .nr = keys->nr, -+ .size = max_t(size_t, keys->size, 8) * 2, -+ }; -+ -+ new_keys.d = kvmalloc_array(new_keys.size, sizeof(new_keys.d[0]), GFP_KERNEL); -+ if (!new_keys.d) { -+ bch_err(c, "%s: error allocating new key array (size %zu)", -+ __func__, new_keys.size); -+ return -BCH_ERR_ENOMEM_journal_key_insert; -+ } -+ -+ /* Since @keys was full, there was no gap: */ -+ memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr); -+ kvfree(keys->d); -+ *keys = new_keys; -+ -+ /* And now the gap is at the end: */ -+ keys->gap = keys->nr; -+ } -+ -+ journal_iters_move_gap(c, keys->gap, idx); -+ -+ move_gap(keys->d, keys->nr, keys->size, keys->gap, idx); -+ keys->gap = idx; -+ -+ keys->nr++; -+ keys->d[keys->gap++] = n; -+ -+ journal_iters_fix(c); -+ -+ return 0; -+} -+ -+/* -+ * Can only be used from the recovery thread while we're still RO - can't be -+ * used once we've got RW, as journal_keys is at that point used by multiple -+ * threads: -+ */ -+int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id, -+ unsigned level, struct bkey_i *k) -+{ -+ struct bkey_i *n; -+ int ret; -+ -+ n = kmalloc(bkey_bytes(&k->k), GFP_KERNEL); -+ if (!n) -+ return -BCH_ERR_ENOMEM_journal_key_insert; -+ -+ bkey_copy(n, k); -+ ret = bch2_journal_key_insert_take(c, id, level, n); -+ if (ret) -+ kfree(n); -+ return ret; -+} -+ -+int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id, -+ unsigned level, struct bpos pos) -+{ -+ struct bkey_i whiteout; -+ -+ bkey_init(&whiteout.k); -+ whiteout.k.p = pos; -+ -+ return bch2_journal_key_insert(c, id, level, &whiteout); -+} -+ -+void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, -+ unsigned level, struct bpos pos) -+{ -+ struct journal_keys *keys = &c->journal_keys; -+ size_t idx = bch2_journal_key_search(keys, btree, level, pos); -+ -+ if (idx < keys->size && -+ keys->d[idx].btree_id == btree && -+ keys->d[idx].level == level && -+ bpos_eq(keys->d[idx].k->k.p, pos)) -+ keys->d[idx].overwritten = true; -+} -+ -+static void bch2_journal_iter_advance(struct journal_iter *iter) -+{ -+ if (iter->idx < iter->keys->size) { -+ iter->idx++; -+ if (iter->idx == iter->keys->gap) -+ iter->idx += iter->keys->size - iter->keys->nr; -+ } -+} -+ -+static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) -+{ -+ struct journal_key *k = iter->keys->d + iter->idx; -+ -+ while (k < iter->keys->d + iter->keys->size && -+ k->btree_id == iter->btree_id && -+ k->level == iter->level) { -+ if (!k->overwritten) -+ return bkey_i_to_s_c(k->k); -+ -+ bch2_journal_iter_advance(iter); -+ k = iter->keys->d + iter->idx; -+ } -+ -+ return bkey_s_c_null; -+} -+ -+static void bch2_journal_iter_exit(struct journal_iter *iter) -+{ -+ list_del(&iter->list); -+} -+ -+static void bch2_journal_iter_init(struct bch_fs *c, -+ struct journal_iter *iter, -+ enum btree_id id, unsigned level, -+ struct bpos pos) -+{ -+ iter->btree_id = id; -+ iter->level = level; -+ iter->keys = &c->journal_keys; -+ iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos); -+} -+ -+static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter) -+{ -+ return bch2_btree_node_iter_peek_unpack(&iter->node_iter, -+ iter->b, &iter->unpacked); -+} -+ -+static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter) -+{ -+ bch2_btree_node_iter_advance(&iter->node_iter, iter->b); -+} -+ -+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter) -+{ -+ if (bpos_eq(iter->pos, SPOS_MAX)) -+ iter->at_end = true; -+ else -+ iter->pos = bpos_successor(iter->pos); -+} -+ -+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter) -+{ -+ struct bkey_s_c btree_k, journal_k, ret; -+again: -+ if (iter->at_end) -+ return bkey_s_c_null; -+ -+ while ((btree_k = bch2_journal_iter_peek_btree(iter)).k && -+ bpos_lt(btree_k.k->p, iter->pos)) -+ bch2_journal_iter_advance_btree(iter); -+ -+ while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k && -+ bpos_lt(journal_k.k->p, iter->pos)) -+ bch2_journal_iter_advance(&iter->journal); -+ -+ ret = journal_k.k && -+ (!btree_k.k || bpos_le(journal_k.k->p, btree_k.k->p)) -+ ? journal_k -+ : btree_k; -+ -+ if (ret.k && iter->b && bpos_gt(ret.k->p, iter->b->data->max_key)) -+ ret = bkey_s_c_null; -+ -+ if (ret.k) { -+ iter->pos = ret.k->p; -+ if (bkey_deleted(ret.k)) { -+ bch2_btree_and_journal_iter_advance(iter); -+ goto again; -+ } -+ } else { -+ iter->pos = SPOS_MAX; -+ iter->at_end = true; -+ } -+ -+ return ret; -+} -+ -+void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *iter) -+{ -+ bch2_journal_iter_exit(&iter->journal); -+} -+ -+void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter, -+ struct bch_fs *c, -+ struct btree *b, -+ struct btree_node_iter node_iter, -+ struct bpos pos) -+{ -+ memset(iter, 0, sizeof(*iter)); -+ -+ iter->b = b; -+ iter->node_iter = node_iter; -+ bch2_journal_iter_init(c, &iter->journal, b->c.btree_id, b->c.level, pos); -+ INIT_LIST_HEAD(&iter->journal.list); -+ iter->pos = b->data->min_key; -+ iter->at_end = false; -+} -+ -+/* -+ * this version is used by btree_gc before filesystem has gone RW and -+ * multithreaded, so uses the journal_iters list: -+ */ -+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter, -+ struct bch_fs *c, -+ struct btree *b) -+{ -+ struct btree_node_iter node_iter; -+ -+ bch2_btree_node_iter_init_from_start(&node_iter, b); -+ __bch2_btree_and_journal_iter_init_node_iter(iter, c, b, node_iter, b->data->min_key); -+ list_add(&iter->journal.list, &c->journal_iters); -+} -+ -+/* sort and dedup all keys in the journal: */ -+ -+void bch2_journal_entries_free(struct bch_fs *c) -+{ -+ struct journal_replay **i; -+ struct genradix_iter iter; -+ -+ genradix_for_each(&c->journal_entries, iter, i) -+ if (*i) -+ kvpfree(*i, offsetof(struct journal_replay, j) + -+ vstruct_bytes(&(*i)->j)); -+ genradix_free(&c->journal_entries); -+} -+ -+/* -+ * When keys compare equal, oldest compares first: -+ */ -+static int journal_sort_key_cmp(const void *_l, const void *_r) -+{ -+ const struct journal_key *l = _l; -+ const struct journal_key *r = _r; -+ -+ return journal_key_cmp(l, r) ?: -+ cmp_int(l->journal_seq, r->journal_seq) ?: -+ cmp_int(l->journal_offset, r->journal_offset); -+} -+ -+void bch2_journal_keys_free(struct journal_keys *keys) -+{ -+ struct journal_key *i; -+ -+ move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr); -+ keys->gap = keys->nr; -+ -+ for (i = keys->d; i < keys->d + keys->nr; i++) -+ if (i->allocated) -+ kfree(i->k); -+ -+ kvfree(keys->d); -+ keys->d = NULL; -+ keys->nr = keys->gap = keys->size = 0; -+} -+ -+static void __journal_keys_sort(struct journal_keys *keys) -+{ -+ struct journal_key *src, *dst; -+ -+ sort(keys->d, keys->nr, sizeof(keys->d[0]), journal_sort_key_cmp, NULL); -+ -+ src = dst = keys->d; -+ while (src < keys->d + keys->nr) { -+ while (src + 1 < keys->d + keys->nr && -+ src[0].btree_id == src[1].btree_id && -+ src[0].level == src[1].level && -+ bpos_eq(src[0].k->k.p, src[1].k->k.p)) -+ src++; -+ -+ *dst++ = *src++; -+ } -+ -+ keys->nr = dst - keys->d; -+} -+ -+int bch2_journal_keys_sort(struct bch_fs *c) -+{ -+ struct genradix_iter iter; -+ struct journal_replay *i, **_i; -+ struct jset_entry *entry; -+ struct bkey_i *k; -+ struct journal_keys *keys = &c->journal_keys; -+ size_t nr_keys = 0, nr_read = 0; -+ -+ genradix_for_each(&c->journal_entries, iter, _i) { -+ i = *_i; -+ -+ if (!i || i->ignore) -+ continue; -+ -+ for_each_jset_key(k, entry, &i->j) -+ nr_keys++; -+ } -+ -+ if (!nr_keys) -+ return 0; -+ -+ keys->size = roundup_pow_of_two(nr_keys); -+ -+ keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL); -+ if (!keys->d) { -+ bch_err(c, "Failed to allocate buffer for sorted journal keys (%zu keys); trying slowpath", -+ nr_keys); -+ -+ do { -+ keys->size >>= 1; -+ keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL); -+ } while (!keys->d && keys->size > nr_keys / 8); -+ -+ if (!keys->d) { -+ bch_err(c, "Failed to allocate %zu size buffer for sorted journal keys; exiting", -+ keys->size); -+ return -BCH_ERR_ENOMEM_journal_keys_sort; -+ } -+ } -+ -+ genradix_for_each(&c->journal_entries, iter, _i) { -+ i = *_i; -+ -+ if (!i || i->ignore) -+ continue; -+ -+ cond_resched(); -+ -+ for_each_jset_key(k, entry, &i->j) { -+ if (keys->nr == keys->size) { -+ __journal_keys_sort(keys); -+ -+ if (keys->nr > keys->size * 7 / 8) { -+ bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu/%zu", -+ keys->nr, keys->size, nr_read, nr_keys); -+ return -BCH_ERR_ENOMEM_journal_keys_sort; -+ } -+ } -+ -+ keys->d[keys->nr++] = (struct journal_key) { -+ .btree_id = entry->btree_id, -+ .level = entry->level, -+ .k = k, -+ .journal_seq = le64_to_cpu(i->j.seq), -+ .journal_offset = k->_data - i->j._data, -+ }; -+ -+ nr_read++; -+ } -+ } -+ -+ __journal_keys_sort(keys); -+ keys->gap = keys->nr; -+ -+ bch_verbose(c, "Journal keys: %zu read, %zu after sorting and compacting", nr_keys, keys->nr); -+ return 0; -+} -diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h -new file mode 100644 -index 000000000000..5d64e7e22f26 ---- /dev/null -+++ b/fs/bcachefs/btree_journal_iter.h -@@ -0,0 +1,57 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BTREE_JOURNAL_ITER_H -+#define _BCACHEFS_BTREE_JOURNAL_ITER_H -+ -+struct journal_iter { -+ struct list_head list; -+ enum btree_id btree_id; -+ unsigned level; -+ size_t idx; -+ struct journal_keys *keys; -+}; -+ -+/* -+ * Iterate over keys in the btree, with keys from the journal overlaid on top: -+ */ -+ -+struct btree_and_journal_iter { -+ struct btree *b; -+ struct btree_node_iter node_iter; -+ struct bkey unpacked; -+ -+ struct journal_iter journal; -+ struct bpos pos; -+ bool at_end; -+}; -+ -+struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *, enum btree_id, -+ unsigned, struct bpos, struct bpos, size_t *); -+struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id, -+ unsigned, struct bpos); -+ -+int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id, -+ unsigned, struct bkey_i *); -+int bch2_journal_key_insert(struct bch_fs *, enum btree_id, -+ unsigned, struct bkey_i *); -+int bch2_journal_key_delete(struct bch_fs *, enum btree_id, -+ unsigned, struct bpos); -+void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id, -+ unsigned, struct bpos); -+ -+void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *); -+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *); -+ -+void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *); -+void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *, -+ struct bch_fs *, struct btree *, -+ struct btree_node_iter, struct bpos); -+void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *, -+ struct bch_fs *, -+ struct btree *); -+ -+void bch2_journal_keys_free(struct journal_keys *); -+void bch2_journal_entries_free(struct bch_fs *); -+ -+int bch2_journal_keys_sort(struct bch_fs *); -+ -+#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */ -diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c -new file mode 100644 -index 000000000000..3304bff7d464 ---- /dev/null -+++ b/fs/bcachefs/btree_key_cache.c -@@ -0,0 +1,1072 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "btree_cache.h" -+#include "btree_iter.h" -+#include "btree_key_cache.h" -+#include "btree_locking.h" -+#include "btree_update.h" -+#include "errcode.h" -+#include "error.h" -+#include "journal.h" -+#include "journal_reclaim.h" -+#include "trace.h" -+ -+#include -+ -+static inline bool btree_uses_pcpu_readers(enum btree_id id) -+{ -+ return id == BTREE_ID_subvolumes; -+} -+ -+static struct kmem_cache *bch2_key_cache; -+ -+static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg, -+ const void *obj) -+{ -+ const struct bkey_cached *ck = obj; -+ const struct bkey_cached_key *key = arg->key; -+ -+ return ck->key.btree_id != key->btree_id || -+ !bpos_eq(ck->key.pos, key->pos); -+} -+ -+static const struct rhashtable_params bch2_btree_key_cache_params = { -+ .head_offset = offsetof(struct bkey_cached, hash), -+ .key_offset = offsetof(struct bkey_cached, key), -+ .key_len = sizeof(struct bkey_cached_key), -+ .obj_cmpfn = bch2_btree_key_cache_cmp_fn, -+}; -+ -+__flatten -+inline struct bkey_cached * -+bch2_btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos) -+{ -+ struct bkey_cached_key key = { -+ .btree_id = btree_id, -+ .pos = pos, -+ }; -+ -+ return rhashtable_lookup_fast(&c->btree_key_cache.table, &key, -+ bch2_btree_key_cache_params); -+} -+ -+static bool bkey_cached_lock_for_evict(struct bkey_cached *ck) -+{ -+ if (!six_trylock_intent(&ck->c.lock)) -+ return false; -+ -+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { -+ six_unlock_intent(&ck->c.lock); -+ return false; -+ } -+ -+ if (!six_trylock_write(&ck->c.lock)) { -+ six_unlock_intent(&ck->c.lock); -+ return false; -+ } -+ -+ return true; -+} -+ -+static void bkey_cached_evict(struct btree_key_cache *c, -+ struct bkey_cached *ck) -+{ -+ BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash, -+ bch2_btree_key_cache_params)); -+ memset(&ck->key, ~0, sizeof(ck->key)); -+ -+ atomic_long_dec(&c->nr_keys); -+} -+ -+static void bkey_cached_free(struct btree_key_cache *bc, -+ struct bkey_cached *ck) -+{ -+ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); -+ -+ BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); -+ -+ ck->btree_trans_barrier_seq = -+ start_poll_synchronize_srcu(&c->btree_trans_barrier); -+ -+ if (ck->c.lock.readers) -+ list_move_tail(&ck->list, &bc->freed_pcpu); -+ else -+ list_move_tail(&ck->list, &bc->freed_nonpcpu); -+ atomic_long_inc(&bc->nr_freed); -+ -+ kfree(ck->k); -+ ck->k = NULL; -+ ck->u64s = 0; -+ -+ six_unlock_write(&ck->c.lock); -+ six_unlock_intent(&ck->c.lock); -+} -+ -+#ifdef __KERNEL__ -+static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc, -+ struct bkey_cached *ck) -+{ -+ struct bkey_cached *pos; -+ -+ list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) { -+ if (ULONG_CMP_GE(ck->btree_trans_barrier_seq, -+ pos->btree_trans_barrier_seq)) { -+ list_move(&ck->list, &pos->list); -+ return; -+ } -+ } -+ -+ list_move(&ck->list, &bc->freed_nonpcpu); -+} -+#endif -+ -+static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, -+ struct bkey_cached *ck) -+{ -+ BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); -+ -+ if (!ck->c.lock.readers) { -+#ifdef __KERNEL__ -+ struct btree_key_cache_freelist *f; -+ bool freed = false; -+ -+ preempt_disable(); -+ f = this_cpu_ptr(bc->pcpu_freed); -+ -+ if (f->nr < ARRAY_SIZE(f->objs)) { -+ f->objs[f->nr++] = ck; -+ freed = true; -+ } -+ preempt_enable(); -+ -+ if (!freed) { -+ mutex_lock(&bc->lock); -+ preempt_disable(); -+ f = this_cpu_ptr(bc->pcpu_freed); -+ -+ while (f->nr > ARRAY_SIZE(f->objs) / 2) { -+ struct bkey_cached *ck2 = f->objs[--f->nr]; -+ -+ __bkey_cached_move_to_freelist_ordered(bc, ck2); -+ } -+ preempt_enable(); -+ -+ __bkey_cached_move_to_freelist_ordered(bc, ck); -+ mutex_unlock(&bc->lock); -+ } -+#else -+ mutex_lock(&bc->lock); -+ list_move_tail(&ck->list, &bc->freed_nonpcpu); -+ mutex_unlock(&bc->lock); -+#endif -+ } else { -+ mutex_lock(&bc->lock); -+ list_move_tail(&ck->list, &bc->freed_pcpu); -+ mutex_unlock(&bc->lock); -+ } -+} -+ -+static void bkey_cached_free_fast(struct btree_key_cache *bc, -+ struct bkey_cached *ck) -+{ -+ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); -+ -+ ck->btree_trans_barrier_seq = -+ start_poll_synchronize_srcu(&c->btree_trans_barrier); -+ -+ list_del_init(&ck->list); -+ atomic_long_inc(&bc->nr_freed); -+ -+ kfree(ck->k); -+ ck->k = NULL; -+ ck->u64s = 0; -+ -+ bkey_cached_move_to_freelist(bc, ck); -+ -+ six_unlock_write(&ck->c.lock); -+ six_unlock_intent(&ck->c.lock); -+} -+ -+static struct bkey_cached * -+bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, -+ bool *was_new) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_key_cache *bc = &c->btree_key_cache; -+ struct bkey_cached *ck = NULL; -+ bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id); -+ int ret; -+ -+ if (!pcpu_readers) { -+#ifdef __KERNEL__ -+ struct btree_key_cache_freelist *f; -+ -+ preempt_disable(); -+ f = this_cpu_ptr(bc->pcpu_freed); -+ if (f->nr) -+ ck = f->objs[--f->nr]; -+ preempt_enable(); -+ -+ if (!ck) { -+ mutex_lock(&bc->lock); -+ preempt_disable(); -+ f = this_cpu_ptr(bc->pcpu_freed); -+ -+ while (!list_empty(&bc->freed_nonpcpu) && -+ f->nr < ARRAY_SIZE(f->objs) / 2) { -+ ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list); -+ list_del_init(&ck->list); -+ f->objs[f->nr++] = ck; -+ } -+ -+ ck = f->nr ? f->objs[--f->nr] : NULL; -+ preempt_enable(); -+ mutex_unlock(&bc->lock); -+ } -+#else -+ mutex_lock(&bc->lock); -+ if (!list_empty(&bc->freed_nonpcpu)) { -+ ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list); -+ list_del_init(&ck->list); -+ } -+ mutex_unlock(&bc->lock); -+#endif -+ } else { -+ mutex_lock(&bc->lock); -+ if (!list_empty(&bc->freed_pcpu)) { -+ ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list); -+ list_del_init(&ck->list); -+ } -+ mutex_unlock(&bc->lock); -+ } -+ -+ if (ck) { -+ ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent, _THIS_IP_); -+ if (unlikely(ret)) { -+ bkey_cached_move_to_freelist(bc, ck); -+ return ERR_PTR(ret); -+ } -+ -+ path->l[0].b = (void *) ck; -+ path->l[0].lock_seq = six_lock_seq(&ck->c.lock); -+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED); -+ -+ ret = bch2_btree_node_lock_write(trans, path, &ck->c); -+ if (unlikely(ret)) { -+ btree_node_unlock(trans, path, 0); -+ bkey_cached_move_to_freelist(bc, ck); -+ return ERR_PTR(ret); -+ } -+ -+ return ck; -+ } -+ -+ ck = allocate_dropping_locks(trans, ret, -+ kmem_cache_zalloc(bch2_key_cache, _gfp)); -+ if (ret) { -+ kmem_cache_free(bch2_key_cache, ck); -+ return ERR_PTR(ret); -+ } -+ -+ if (!ck) -+ return NULL; -+ -+ INIT_LIST_HEAD(&ck->list); -+ bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0); -+ -+ ck->c.cached = true; -+ BUG_ON(!six_trylock_intent(&ck->c.lock)); -+ BUG_ON(!six_trylock_write(&ck->c.lock)); -+ *was_new = true; -+ return ck; -+} -+ -+static struct bkey_cached * -+bkey_cached_reuse(struct btree_key_cache *c) -+{ -+ struct bucket_table *tbl; -+ struct rhash_head *pos; -+ struct bkey_cached *ck; -+ unsigned i; -+ -+ mutex_lock(&c->lock); -+ rcu_read_lock(); -+ tbl = rht_dereference_rcu(c->table.tbl, &c->table); -+ for (i = 0; i < tbl->size; i++) -+ rht_for_each_entry_rcu(ck, pos, tbl, i, hash) { -+ if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) && -+ bkey_cached_lock_for_evict(ck)) { -+ bkey_cached_evict(c, ck); -+ goto out; -+ } -+ } -+ ck = NULL; -+out: -+ rcu_read_unlock(); -+ mutex_unlock(&c->lock); -+ return ck; -+} -+ -+static struct bkey_cached * -+btree_key_cache_create(struct btree_trans *trans, struct btree_path *path) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_key_cache *bc = &c->btree_key_cache; -+ struct bkey_cached *ck; -+ bool was_new = false; -+ -+ ck = bkey_cached_alloc(trans, path, &was_new); -+ if (IS_ERR(ck)) -+ return ck; -+ -+ if (unlikely(!ck)) { -+ ck = bkey_cached_reuse(bc); -+ if (unlikely(!ck)) { -+ bch_err(c, "error allocating memory for key cache item, btree %s", -+ bch2_btree_id_str(path->btree_id)); -+ return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create); -+ } -+ -+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED); -+ } -+ -+ ck->c.level = 0; -+ ck->c.btree_id = path->btree_id; -+ ck->key.btree_id = path->btree_id; -+ ck->key.pos = path->pos; -+ ck->valid = false; -+ ck->flags = 1U << BKEY_CACHED_ACCESSED; -+ -+ if (unlikely(rhashtable_lookup_insert_fast(&bc->table, -+ &ck->hash, -+ bch2_btree_key_cache_params))) { -+ /* We raced with another fill: */ -+ -+ if (likely(was_new)) { -+ six_unlock_write(&ck->c.lock); -+ six_unlock_intent(&ck->c.lock); -+ kfree(ck); -+ } else { -+ bkey_cached_free_fast(bc, ck); -+ } -+ -+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED); -+ return NULL; -+ } -+ -+ atomic_long_inc(&bc->nr_keys); -+ -+ six_unlock_write(&ck->c.lock); -+ -+ return ck; -+} -+ -+static int btree_key_cache_fill(struct btree_trans *trans, -+ struct btree_path *ck_path, -+ struct bkey_cached *ck) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ unsigned new_u64s = 0; -+ struct bkey_i *new_k = NULL; -+ int ret; -+ -+ k = bch2_bkey_get_iter(trans, &iter, ck->key.btree_id, ck->key.pos, -+ BTREE_ITER_KEY_CACHE_FILL| -+ BTREE_ITER_CACHED_NOFILL); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (!bch2_btree_node_relock(trans, ck_path, 0)) { -+ trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path); -+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill); -+ goto err; -+ } -+ -+ /* -+ * bch2_varint_decode can read past the end of the buffer by at -+ * most 7 bytes (it won't be used): -+ */ -+ new_u64s = k.k->u64s + 1; -+ -+ /* -+ * Allocate some extra space so that the transaction commit path is less -+ * likely to have to reallocate, since that requires a transaction -+ * restart: -+ */ -+ new_u64s = min(256U, (new_u64s * 3) / 2); -+ -+ if (new_u64s > ck->u64s) { -+ new_u64s = roundup_pow_of_two(new_u64s); -+ new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN); -+ if (!new_k) { -+ bch2_trans_unlock(trans); -+ -+ new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL); -+ if (!new_k) { -+ bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", -+ bch2_btree_id_str(ck->key.btree_id), new_u64s); -+ ret = -BCH_ERR_ENOMEM_btree_key_cache_fill; -+ goto err; -+ } -+ -+ if (!bch2_btree_node_relock(trans, ck_path, 0)) { -+ kfree(new_k); -+ trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path); -+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill); -+ goto err; -+ } -+ -+ ret = bch2_trans_relock(trans); -+ if (ret) { -+ kfree(new_k); -+ goto err; -+ } -+ } -+ } -+ -+ ret = bch2_btree_node_lock_write(trans, ck_path, &ck_path->l[0].b->c); -+ if (ret) { -+ kfree(new_k); -+ goto err; -+ } -+ -+ if (new_k) { -+ kfree(ck->k); -+ ck->u64s = new_u64s; -+ ck->k = new_k; -+ } -+ -+ bkey_reassemble(ck->k, k); -+ ck->valid = true; -+ bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b); -+ -+ /* We're not likely to need this iterator again: */ -+ set_btree_iter_dontneed(&iter); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static noinline int -+bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path, -+ unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_cached *ck; -+ int ret = 0; -+ -+ BUG_ON(path->level); -+ -+ path->l[1].b = NULL; -+ -+ if (bch2_btree_node_relock_notrace(trans, path, 0)) { -+ ck = (void *) path->l[0].b; -+ goto fill; -+ } -+retry: -+ ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); -+ if (!ck) { -+ ck = btree_key_cache_create(trans, path); -+ ret = PTR_ERR_OR_ZERO(ck); -+ if (ret) -+ goto err; -+ if (!ck) -+ goto retry; -+ -+ mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED); -+ path->locks_want = 1; -+ } else { -+ enum six_lock_type lock_want = __btree_lock_want(path, 0); -+ -+ ret = btree_node_lock(trans, path, (void *) ck, 0, -+ lock_want, _THIS_IP_); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto err; -+ -+ BUG_ON(ret); -+ -+ if (ck->key.btree_id != path->btree_id || -+ !bpos_eq(ck->key.pos, path->pos)) { -+ six_unlock_type(&ck->c.lock, lock_want); -+ goto retry; -+ } -+ -+ mark_btree_node_locked(trans, path, 0, -+ (enum btree_node_locked_type) lock_want); -+ } -+ -+ path->l[0].lock_seq = six_lock_seq(&ck->c.lock); -+ path->l[0].b = (void *) ck; -+fill: -+ path->uptodate = BTREE_ITER_UPTODATE; -+ -+ if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) { -+ /* -+ * Using the underscore version because we haven't set -+ * path->uptodate yet: -+ */ -+ if (!path->locks_want && -+ !__bch2_btree_path_upgrade(trans, path, 1, NULL)) { -+ trace_and_count(trans->c, trans_restart_key_cache_upgrade, trans, _THIS_IP_); -+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade); -+ goto err; -+ } -+ -+ ret = btree_key_cache_fill(trans, path, ck); -+ if (ret) -+ goto err; -+ -+ ret = bch2_btree_path_relock(trans, path, _THIS_IP_); -+ if (ret) -+ goto err; -+ -+ path->uptodate = BTREE_ITER_UPTODATE; -+ } -+ -+ if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) -+ set_bit(BKEY_CACHED_ACCESSED, &ck->flags); -+ -+ BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0)); -+ BUG_ON(path->uptodate); -+ -+ return ret; -+err: -+ path->uptodate = BTREE_ITER_NEED_TRAVERSE; -+ if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) { -+ btree_node_unlock(trans, path, 0); -+ path->l[0].b = ERR_PTR(ret); -+ } -+ return ret; -+} -+ -+int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path, -+ unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_cached *ck; -+ int ret = 0; -+ -+ EBUG_ON(path->level); -+ -+ path->l[1].b = NULL; -+ -+ if (bch2_btree_node_relock_notrace(trans, path, 0)) { -+ ck = (void *) path->l[0].b; -+ goto fill; -+ } -+retry: -+ ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); -+ if (!ck) { -+ return bch2_btree_path_traverse_cached_slowpath(trans, path, flags); -+ } else { -+ enum six_lock_type lock_want = __btree_lock_want(path, 0); -+ -+ ret = btree_node_lock(trans, path, (void *) ck, 0, -+ lock_want, _THIS_IP_); -+ EBUG_ON(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)); -+ -+ if (ret) -+ return ret; -+ -+ if (ck->key.btree_id != path->btree_id || -+ !bpos_eq(ck->key.pos, path->pos)) { -+ six_unlock_type(&ck->c.lock, lock_want); -+ goto retry; -+ } -+ -+ mark_btree_node_locked(trans, path, 0, -+ (enum btree_node_locked_type) lock_want); -+ } -+ -+ path->l[0].lock_seq = six_lock_seq(&ck->c.lock); -+ path->l[0].b = (void *) ck; -+fill: -+ if (!ck->valid) -+ return bch2_btree_path_traverse_cached_slowpath(trans, path, flags); -+ -+ if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) -+ set_bit(BKEY_CACHED_ACCESSED, &ck->flags); -+ -+ path->uptodate = BTREE_ITER_UPTODATE; -+ EBUG_ON(!ck->valid); -+ EBUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0)); -+ -+ return ret; -+} -+ -+static int btree_key_cache_flush_pos(struct btree_trans *trans, -+ struct bkey_cached_key key, -+ u64 journal_seq, -+ unsigned commit_flags, -+ bool evict) -+{ -+ struct bch_fs *c = trans->c; -+ struct journal *j = &c->journal; -+ struct btree_iter c_iter, b_iter; -+ struct bkey_cached *ck = NULL; -+ int ret; -+ -+ bch2_trans_iter_init(trans, &b_iter, key.btree_id, key.pos, -+ BTREE_ITER_SLOTS| -+ BTREE_ITER_INTENT| -+ BTREE_ITER_ALL_SNAPSHOTS); -+ bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos, -+ BTREE_ITER_CACHED| -+ BTREE_ITER_INTENT); -+ b_iter.flags &= ~BTREE_ITER_WITH_KEY_CACHE; -+ -+ ret = bch2_btree_iter_traverse(&c_iter); -+ if (ret) -+ goto out; -+ -+ ck = (void *) c_iter.path->l[0].b; -+ if (!ck) -+ goto out; -+ -+ if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { -+ if (evict) -+ goto evict; -+ goto out; -+ } -+ -+ BUG_ON(!ck->valid); -+ -+ if (journal_seq && ck->journal.seq != journal_seq) -+ goto out; -+ -+ /* -+ * Since journal reclaim depends on us making progress here, and the -+ * allocator/copygc depend on journal reclaim making progress, we need -+ * to be using alloc reserves: -+ */ -+ ret = bch2_btree_iter_traverse(&b_iter) ?: -+ bch2_trans_update(trans, &b_iter, ck->k, -+ BTREE_UPDATE_KEY_CACHE_RECLAIM| -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| -+ BTREE_TRIGGER_NORUN) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BTREE_INSERT_NOCHECK_RW| -+ BTREE_INSERT_NOFAIL| -+ (ck->journal.seq == journal_last_seq(j) -+ ? BCH_WATERMARK_reclaim -+ : 0)| -+ commit_flags); -+ -+ bch2_fs_fatal_err_on(ret && -+ !bch2_err_matches(ret, BCH_ERR_transaction_restart) && -+ !bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) && -+ !bch2_journal_error(j), c, -+ "error flushing key cache: %s", bch2_err_str(ret)); -+ if (ret) -+ goto out; -+ -+ bch2_journal_pin_drop(j, &ck->journal); -+ bch2_journal_preres_put(j, &ck->res); -+ -+ BUG_ON(!btree_node_locked(c_iter.path, 0)); -+ -+ if (!evict) { -+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { -+ clear_bit(BKEY_CACHED_DIRTY, &ck->flags); -+ atomic_long_dec(&c->btree_key_cache.nr_dirty); -+ } -+ } else { -+ struct btree_path *path2; -+evict: -+ trans_for_each_path(trans, path2) -+ if (path2 != c_iter.path) -+ __bch2_btree_path_unlock(trans, path2); -+ -+ bch2_btree_node_lock_write_nofail(trans, c_iter.path, &ck->c); -+ -+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { -+ clear_bit(BKEY_CACHED_DIRTY, &ck->flags); -+ atomic_long_dec(&c->btree_key_cache.nr_dirty); -+ } -+ -+ mark_btree_node_locked_noreset(c_iter.path, 0, BTREE_NODE_UNLOCKED); -+ bkey_cached_evict(&c->btree_key_cache, ck); -+ bkey_cached_free_fast(&c->btree_key_cache, ck); -+ } -+out: -+ bch2_trans_iter_exit(trans, &b_iter); -+ bch2_trans_iter_exit(trans, &c_iter); -+ return ret; -+} -+ -+int bch2_btree_key_cache_journal_flush(struct journal *j, -+ struct journal_entry_pin *pin, u64 seq) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct bkey_cached *ck = -+ container_of(pin, struct bkey_cached, journal); -+ struct bkey_cached_key key; -+ struct btree_trans *trans = bch2_trans_get(c); -+ int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); -+ int ret = 0; -+ -+ btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_read); -+ key = ck->key; -+ -+ if (ck->journal.seq != seq || -+ !test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { -+ six_unlock_read(&ck->c.lock); -+ goto unlock; -+ } -+ -+ if (ck->seq != seq) { -+ bch2_journal_pin_update(&c->journal, ck->seq, &ck->journal, -+ bch2_btree_key_cache_journal_flush); -+ six_unlock_read(&ck->c.lock); -+ goto unlock; -+ } -+ six_unlock_read(&ck->c.lock); -+ -+ ret = commit_do(trans, NULL, NULL, 0, -+ btree_key_cache_flush_pos(trans, key, seq, -+ BTREE_INSERT_JOURNAL_RECLAIM, false)); -+unlock: -+ srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); -+ -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+/* -+ * Flush and evict a key from the key cache: -+ */ -+int bch2_btree_key_cache_flush(struct btree_trans *trans, -+ enum btree_id id, struct bpos pos) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_cached_key key = { id, pos }; -+ -+ /* Fastpath - assume it won't be found: */ -+ if (!bch2_btree_key_cache_find(c, id, pos)) -+ return 0; -+ -+ return btree_key_cache_flush_pos(trans, key, 0, 0, true); -+} -+ -+bool bch2_btree_insert_key_cached(struct btree_trans *trans, -+ unsigned flags, -+ struct btree_insert_entry *insert_entry) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_cached *ck = (void *) insert_entry->path->l[0].b; -+ struct bkey_i *insert = insert_entry->k; -+ bool kick_reclaim = false; -+ -+ BUG_ON(insert->k.u64s > ck->u64s); -+ -+ if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) { -+ int difference; -+ -+ BUG_ON(jset_u64s(insert->k.u64s) > trans->journal_preres.u64s); -+ -+ difference = jset_u64s(insert->k.u64s) - ck->res.u64s; -+ if (difference > 0) { -+ trans->journal_preres.u64s -= difference; -+ ck->res.u64s += difference; -+ } -+ } -+ -+ bkey_copy(ck->k, insert); -+ ck->valid = true; -+ -+ if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { -+ EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags)); -+ set_bit(BKEY_CACHED_DIRTY, &ck->flags); -+ atomic_long_inc(&c->btree_key_cache.nr_dirty); -+ -+ if (bch2_nr_btree_keys_need_flush(c)) -+ kick_reclaim = true; -+ } -+ -+ /* -+ * To minimize lock contention, we only add the journal pin here and -+ * defer pin updates to the flush callback via ->seq. Be careful not to -+ * update ->seq on nojournal commits because we don't want to update the -+ * pin to a seq that doesn't include journal updates on disk. Otherwise -+ * we risk losing the update after a crash. -+ * -+ * The only exception is if the pin is not active in the first place. We -+ * have to add the pin because journal reclaim drives key cache -+ * flushing. The flush callback will not proceed unless ->seq matches -+ * the latest pin, so make sure it starts with a consistent value. -+ */ -+ if (!(insert_entry->flags & BTREE_UPDATE_NOJOURNAL) || -+ !journal_pin_active(&ck->journal)) { -+ ck->seq = trans->journal_res.seq; -+ } -+ bch2_journal_pin_add(&c->journal, trans->journal_res.seq, -+ &ck->journal, bch2_btree_key_cache_journal_flush); -+ -+ if (kick_reclaim) -+ journal_reclaim_kick(&c->journal); -+ return true; -+} -+ -+void bch2_btree_key_cache_drop(struct btree_trans *trans, -+ struct btree_path *path) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_cached *ck = (void *) path->l[0].b; -+ -+ BUG_ON(!ck->valid); -+ -+ /* -+ * We just did an update to the btree, bypassing the key cache: the key -+ * cache key is now stale and must be dropped, even if dirty: -+ */ -+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { -+ clear_bit(BKEY_CACHED_DIRTY, &ck->flags); -+ atomic_long_dec(&c->btree_key_cache.nr_dirty); -+ bch2_journal_pin_drop(&c->journal, &ck->journal); -+ } -+ -+ ck->valid = false; -+} -+ -+static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, -+ struct shrink_control *sc) -+{ -+ struct bch_fs *c = container_of(shrink, struct bch_fs, -+ btree_key_cache.shrink); -+ struct btree_key_cache *bc = &c->btree_key_cache; -+ struct bucket_table *tbl; -+ struct bkey_cached *ck, *t; -+ size_t scanned = 0, freed = 0, nr = sc->nr_to_scan; -+ unsigned start, flags; -+ int srcu_idx; -+ -+ mutex_lock(&bc->lock); -+ srcu_idx = srcu_read_lock(&c->btree_trans_barrier); -+ flags = memalloc_nofs_save(); -+ -+ /* -+ * Newest freed entries are at the end of the list - once we hit one -+ * that's too new to be freed, we can bail out: -+ */ -+ list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) { -+ if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, -+ ck->btree_trans_barrier_seq)) -+ break; -+ -+ list_del(&ck->list); -+ six_lock_exit(&ck->c.lock); -+ kmem_cache_free(bch2_key_cache, ck); -+ atomic_long_dec(&bc->nr_freed); -+ scanned++; -+ freed++; -+ } -+ -+ if (scanned >= nr) -+ goto out; -+ -+ list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) { -+ if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, -+ ck->btree_trans_barrier_seq)) -+ break; -+ -+ list_del(&ck->list); -+ six_lock_exit(&ck->c.lock); -+ kmem_cache_free(bch2_key_cache, ck); -+ atomic_long_dec(&bc->nr_freed); -+ scanned++; -+ freed++; -+ } -+ -+ if (scanned >= nr) -+ goto out; -+ -+ rcu_read_lock(); -+ tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); -+ if (bc->shrink_iter >= tbl->size) -+ bc->shrink_iter = 0; -+ start = bc->shrink_iter; -+ -+ do { -+ struct rhash_head *pos, *next; -+ -+ pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter)); -+ -+ while (!rht_is_a_nulls(pos)) { -+ next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter); -+ ck = container_of(pos, struct bkey_cached, hash); -+ -+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) -+ goto next; -+ -+ if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) -+ clear_bit(BKEY_CACHED_ACCESSED, &ck->flags); -+ else if (bkey_cached_lock_for_evict(ck)) { -+ bkey_cached_evict(bc, ck); -+ bkey_cached_free(bc, ck); -+ } -+ -+ scanned++; -+ if (scanned >= nr) -+ break; -+next: -+ pos = next; -+ } -+ -+ bc->shrink_iter++; -+ if (bc->shrink_iter >= tbl->size) -+ bc->shrink_iter = 0; -+ } while (scanned < nr && bc->shrink_iter != start); -+ -+ rcu_read_unlock(); -+out: -+ memalloc_nofs_restore(flags); -+ srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); -+ mutex_unlock(&bc->lock); -+ -+ return freed; -+} -+ -+static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink, -+ struct shrink_control *sc) -+{ -+ struct bch_fs *c = container_of(shrink, struct bch_fs, -+ btree_key_cache.shrink); -+ struct btree_key_cache *bc = &c->btree_key_cache; -+ long nr = atomic_long_read(&bc->nr_keys) - -+ atomic_long_read(&bc->nr_dirty); -+ -+ return max(0L, nr); -+} -+ -+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) -+{ -+ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); -+ struct bucket_table *tbl; -+ struct bkey_cached *ck, *n; -+ struct rhash_head *pos; -+ LIST_HEAD(items); -+ unsigned i; -+#ifdef __KERNEL__ -+ int cpu; -+#endif -+ -+ unregister_shrinker(&bc->shrink); -+ -+ mutex_lock(&bc->lock); -+ -+ /* -+ * The loop is needed to guard against racing with rehash: -+ */ -+ while (atomic_long_read(&bc->nr_keys)) { -+ rcu_read_lock(); -+ tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); -+ if (tbl) -+ for (i = 0; i < tbl->size; i++) -+ rht_for_each_entry_rcu(ck, pos, tbl, i, hash) { -+ bkey_cached_evict(bc, ck); -+ list_add(&ck->list, &items); -+ } -+ rcu_read_unlock(); -+ } -+ -+#ifdef __KERNEL__ -+ for_each_possible_cpu(cpu) { -+ struct btree_key_cache_freelist *f = -+ per_cpu_ptr(bc->pcpu_freed, cpu); -+ -+ for (i = 0; i < f->nr; i++) { -+ ck = f->objs[i]; -+ list_add(&ck->list, &items); -+ } -+ } -+#endif -+ -+ list_splice(&bc->freed_pcpu, &items); -+ list_splice(&bc->freed_nonpcpu, &items); -+ -+ mutex_unlock(&bc->lock); -+ -+ list_for_each_entry_safe(ck, n, &items, list) { -+ cond_resched(); -+ -+ bch2_journal_pin_drop(&c->journal, &ck->journal); -+ bch2_journal_preres_put(&c->journal, &ck->res); -+ -+ list_del(&ck->list); -+ kfree(ck->k); -+ six_lock_exit(&ck->c.lock); -+ kmem_cache_free(bch2_key_cache, ck); -+ } -+ -+ if (atomic_long_read(&bc->nr_dirty) && -+ !bch2_journal_error(&c->journal) && -+ test_bit(BCH_FS_WAS_RW, &c->flags)) -+ panic("btree key cache shutdown error: nr_dirty nonzero (%li)\n", -+ atomic_long_read(&bc->nr_dirty)); -+ -+ if (atomic_long_read(&bc->nr_keys)) -+ panic("btree key cache shutdown error: nr_keys nonzero (%li)\n", -+ atomic_long_read(&bc->nr_keys)); -+ -+ if (bc->table_init_done) -+ rhashtable_destroy(&bc->table); -+ -+ free_percpu(bc->pcpu_freed); -+} -+ -+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c) -+{ -+ mutex_init(&c->lock); -+ INIT_LIST_HEAD(&c->freed_pcpu); -+ INIT_LIST_HEAD(&c->freed_nonpcpu); -+} -+ -+int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) -+{ -+ struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache); -+ -+#ifdef __KERNEL__ -+ bc->pcpu_freed = alloc_percpu(struct btree_key_cache_freelist); -+ if (!bc->pcpu_freed) -+ return -BCH_ERR_ENOMEM_fs_btree_cache_init; -+#endif -+ -+ if (rhashtable_init(&bc->table, &bch2_btree_key_cache_params)) -+ return -BCH_ERR_ENOMEM_fs_btree_cache_init; -+ -+ bc->table_init_done = true; -+ -+ bc->shrink.seeks = 0; -+ bc->shrink.count_objects = bch2_btree_key_cache_count; -+ bc->shrink.scan_objects = bch2_btree_key_cache_scan; -+ if (register_shrinker(&bc->shrink, "%s-btree_key_cache", c->name)) -+ return -BCH_ERR_ENOMEM_fs_btree_cache_init; -+ return 0; -+} -+ -+void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c) -+{ -+ prt_printf(out, "nr_freed:\t%lu", atomic_long_read(&c->nr_freed)); -+ prt_newline(out); -+ prt_printf(out, "nr_keys:\t%lu", atomic_long_read(&c->nr_keys)); -+ prt_newline(out); -+ prt_printf(out, "nr_dirty:\t%lu", atomic_long_read(&c->nr_dirty)); -+ prt_newline(out); -+} -+ -+void bch2_btree_key_cache_exit(void) -+{ -+ kmem_cache_destroy(bch2_key_cache); -+} -+ -+int __init bch2_btree_key_cache_init(void) -+{ -+ bch2_key_cache = KMEM_CACHE(bkey_cached, SLAB_RECLAIM_ACCOUNT); -+ if (!bch2_key_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h -new file mode 100644 -index 000000000000..be3acde2caa0 ---- /dev/null -+++ b/fs/bcachefs/btree_key_cache.h -@@ -0,0 +1,48 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BTREE_KEY_CACHE_H -+#define _BCACHEFS_BTREE_KEY_CACHE_H -+ -+static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c) -+{ -+ size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); -+ size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); -+ size_t max_dirty = 1024 + nr_keys / 2; -+ -+ return max_t(ssize_t, 0, nr_dirty - max_dirty); -+} -+ -+static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) -+{ -+ size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); -+ size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); -+ size_t max_dirty = 4096 + (nr_keys * 3) / 4; -+ -+ return nr_dirty > max_dirty; -+} -+ -+int bch2_btree_key_cache_journal_flush(struct journal *, -+ struct journal_entry_pin *, u64); -+ -+struct bkey_cached * -+bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos); -+ -+int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *, -+ unsigned); -+ -+bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned, -+ struct btree_insert_entry *); -+int bch2_btree_key_cache_flush(struct btree_trans *, -+ enum btree_id, struct bpos); -+void bch2_btree_key_cache_drop(struct btree_trans *, -+ struct btree_path *); -+ -+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *); -+void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *); -+int bch2_fs_btree_key_cache_init(struct btree_key_cache *); -+ -+void bch2_btree_key_cache_to_text(struct printbuf *, struct btree_key_cache *); -+ -+void bch2_btree_key_cache_exit(void); -+int __init bch2_btree_key_cache_init(void); -+ -+#endif /* _BCACHEFS_BTREE_KEY_CACHE_H */ -diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c -new file mode 100644 -index 000000000000..3d48834d091f ---- /dev/null -+++ b/fs/bcachefs/btree_locking.c -@@ -0,0 +1,817 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "btree_locking.h" -+#include "btree_types.h" -+ -+static struct lock_class_key bch2_btree_node_lock_key; -+ -+void bch2_btree_lock_init(struct btree_bkey_cached_common *b, -+ enum six_lock_init_flags flags) -+{ -+ __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags); -+ lockdep_set_novalidate_class(&b->lock); -+} -+ -+#ifdef CONFIG_LOCKDEP -+void bch2_assert_btree_nodes_not_locked(void) -+{ -+#if 0 -+ //Re-enable when lock_class_is_held() is merged: -+ BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); -+#endif -+} -+#endif -+ -+/* Btree node locking: */ -+ -+struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans, -+ struct btree_path *skip, -+ struct btree_bkey_cached_common *b, -+ unsigned level) -+{ -+ struct btree_path *path; -+ struct six_lock_count ret; -+ -+ memset(&ret, 0, sizeof(ret)); -+ -+ if (IS_ERR_OR_NULL(b)) -+ return ret; -+ -+ trans_for_each_path(trans, path) -+ if (path != skip && &path->l[level].b->c == b) { -+ int t = btree_node_locked_type(path, level); -+ -+ if (t != BTREE_NODE_UNLOCKED) -+ ret.n[t]++; -+ } -+ -+ return ret; -+} -+ -+/* unlock */ -+ -+void bch2_btree_node_unlock_write(struct btree_trans *trans, -+ struct btree_path *path, struct btree *b) -+{ -+ bch2_btree_node_unlock_write_inlined(trans, path, b); -+} -+ -+/* lock */ -+ -+/* -+ * @trans wants to lock @b with type @type -+ */ -+struct trans_waiting_for_lock { -+ struct btree_trans *trans; -+ struct btree_bkey_cached_common *node_want; -+ enum six_lock_type lock_want; -+ -+ /* for iterating over held locks :*/ -+ u8 path_idx; -+ u8 level; -+ u64 lock_start_time; -+}; -+ -+struct lock_graph { -+ struct trans_waiting_for_lock g[8]; -+ unsigned nr; -+}; -+ -+static noinline void print_cycle(struct printbuf *out, struct lock_graph *g) -+{ -+ struct trans_waiting_for_lock *i; -+ -+ prt_printf(out, "Found lock cycle (%u entries):", g->nr); -+ prt_newline(out); -+ -+ for (i = g->g; i < g->g + g->nr; i++) -+ bch2_btree_trans_to_text(out, i->trans); -+} -+ -+static noinline void print_chain(struct printbuf *out, struct lock_graph *g) -+{ -+ struct trans_waiting_for_lock *i; -+ -+ for (i = g->g; i != g->g + g->nr; i++) { -+ if (i != g->g) -+ prt_str(out, "<- "); -+ prt_printf(out, "%u ", i->trans->locking_wait.task->pid); -+ } -+ prt_newline(out); -+} -+ -+static void lock_graph_up(struct lock_graph *g) -+{ -+ closure_put(&g->g[--g->nr].trans->ref); -+} -+ -+static noinline void lock_graph_pop_all(struct lock_graph *g) -+{ -+ while (g->nr) -+ lock_graph_up(g); -+} -+ -+static void __lock_graph_down(struct lock_graph *g, struct btree_trans *trans) -+{ -+ g->g[g->nr++] = (struct trans_waiting_for_lock) { -+ .trans = trans, -+ .node_want = trans->locking, -+ .lock_want = trans->locking_wait.lock_want, -+ }; -+} -+ -+static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans) -+{ -+ closure_get(&trans->ref); -+ __lock_graph_down(g, trans); -+} -+ -+static bool lock_graph_remove_non_waiters(struct lock_graph *g) -+{ -+ struct trans_waiting_for_lock *i; -+ -+ for (i = g->g + 1; i < g->g + g->nr; i++) -+ if (i->trans->locking != i->node_want || -+ i->trans->locking_wait.start_time != i[-1].lock_start_time) { -+ while (g->g + g->nr > i) -+ lock_graph_up(g); -+ return true; -+ } -+ -+ return false; -+} -+ -+static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i) -+{ -+ if (i == g->g) { -+ trace_and_count(i->trans->c, trans_restart_would_deadlock, i->trans, _RET_IP_); -+ return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock); -+ } else { -+ i->trans->lock_must_abort = true; -+ wake_up_process(i->trans->locking_wait.task); -+ return 0; -+ } -+} -+ -+static int btree_trans_abort_preference(struct btree_trans *trans) -+{ -+ if (trans->lock_may_not_fail) -+ return 0; -+ if (trans->locking_wait.lock_want == SIX_LOCK_write) -+ return 1; -+ if (!trans->in_traverse_all) -+ return 2; -+ return 3; -+} -+ -+static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) -+{ -+ struct trans_waiting_for_lock *i, *abort = NULL; -+ unsigned best = 0, pref; -+ int ret; -+ -+ if (lock_graph_remove_non_waiters(g)) -+ return 0; -+ -+ /* Only checking, for debugfs: */ -+ if (cycle) { -+ print_cycle(cycle, g); -+ ret = -1; -+ goto out; -+ } -+ -+ for (i = g->g; i < g->g + g->nr; i++) { -+ pref = btree_trans_abort_preference(i->trans); -+ if (pref > best) { -+ abort = i; -+ best = pref; -+ } -+ } -+ -+ if (unlikely(!best)) { -+ struct printbuf buf = PRINTBUF; -+ -+ prt_printf(&buf, bch2_fmt(g->g->trans->c, "cycle of nofail locks")); -+ -+ for (i = g->g; i < g->g + g->nr; i++) { -+ struct btree_trans *trans = i->trans; -+ -+ bch2_btree_trans_to_text(&buf, trans); -+ -+ prt_printf(&buf, "backtrace:"); -+ prt_newline(&buf); -+ printbuf_indent_add(&buf, 2); -+ bch2_prt_task_backtrace(&buf, trans->locking_wait.task); -+ printbuf_indent_sub(&buf, 2); -+ prt_newline(&buf); -+ } -+ -+ bch2_print_string_as_lines(KERN_ERR, buf.buf); -+ printbuf_exit(&buf); -+ BUG(); -+ } -+ -+ ret = abort_lock(g, abort); -+out: -+ if (ret) -+ while (g->nr) -+ lock_graph_up(g); -+ return ret; -+} -+ -+static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans, -+ struct printbuf *cycle) -+{ -+ struct btree_trans *orig_trans = g->g->trans; -+ struct trans_waiting_for_lock *i; -+ -+ for (i = g->g; i < g->g + g->nr; i++) -+ if (i->trans == trans) { -+ closure_put(&trans->ref); -+ return break_cycle(g, cycle); -+ } -+ -+ if (g->nr == ARRAY_SIZE(g->g)) { -+ closure_put(&trans->ref); -+ -+ if (orig_trans->lock_may_not_fail) -+ return 0; -+ -+ while (g->nr) -+ lock_graph_up(g); -+ -+ if (cycle) -+ return 0; -+ -+ trace_and_count(trans->c, trans_restart_would_deadlock_recursion_limit, trans, _RET_IP_); -+ return btree_trans_restart(orig_trans, BCH_ERR_transaction_restart_deadlock_recursion_limit); -+ } -+ -+ __lock_graph_down(g, trans); -+ return 0; -+} -+ -+static bool lock_type_conflicts(enum six_lock_type t1, enum six_lock_type t2) -+{ -+ return t1 + t2 > 1; -+} -+ -+int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) -+{ -+ struct lock_graph g; -+ struct trans_waiting_for_lock *top; -+ struct btree_bkey_cached_common *b; -+ struct btree_path *path; -+ unsigned path_idx; -+ int ret; -+ -+ if (trans->lock_must_abort) { -+ if (cycle) -+ return -1; -+ -+ trace_and_count(trans->c, trans_restart_would_deadlock, trans, _RET_IP_); -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock); -+ } -+ -+ g.nr = 0; -+ lock_graph_down(&g, trans); -+next: -+ if (!g.nr) -+ return 0; -+ -+ top = &g.g[g.nr - 1]; -+ -+ trans_for_each_path_safe_from(top->trans, path, path_idx, top->path_idx) { -+ if (!path->nodes_locked) -+ continue; -+ -+ if (path_idx != top->path_idx) { -+ top->path_idx = path_idx; -+ top->level = 0; -+ top->lock_start_time = 0; -+ } -+ -+ for (; -+ top->level < BTREE_MAX_DEPTH; -+ top->level++, top->lock_start_time = 0) { -+ int lock_held = btree_node_locked_type(path, top->level); -+ -+ if (lock_held == BTREE_NODE_UNLOCKED) -+ continue; -+ -+ b = &READ_ONCE(path->l[top->level].b)->c; -+ -+ if (IS_ERR_OR_NULL(b)) { -+ /* -+ * If we get here, it means we raced with the -+ * other thread updating its btree_path -+ * structures - which means it can't be blocked -+ * waiting on a lock: -+ */ -+ if (!lock_graph_remove_non_waiters(&g)) { -+ /* -+ * If lock_graph_remove_non_waiters() -+ * didn't do anything, it must be -+ * because we're being called by debugfs -+ * checking for lock cycles, which -+ * invokes us on btree_transactions that -+ * aren't actually waiting on anything. -+ * Just bail out: -+ */ -+ lock_graph_pop_all(&g); -+ } -+ -+ goto next; -+ } -+ -+ if (list_empty_careful(&b->lock.wait_list)) -+ continue; -+ -+ raw_spin_lock(&b->lock.wait_lock); -+ list_for_each_entry(trans, &b->lock.wait_list, locking_wait.list) { -+ BUG_ON(b != trans->locking); -+ -+ if (top->lock_start_time && -+ time_after_eq64(top->lock_start_time, trans->locking_wait.start_time)) -+ continue; -+ -+ top->lock_start_time = trans->locking_wait.start_time; -+ -+ /* Don't check for self deadlock: */ -+ if (trans == top->trans || -+ !lock_type_conflicts(lock_held, trans->locking_wait.lock_want)) -+ continue; -+ -+ closure_get(&trans->ref); -+ raw_spin_unlock(&b->lock.wait_lock); -+ -+ ret = lock_graph_descend(&g, trans, cycle); -+ if (ret) -+ return ret; -+ goto next; -+ -+ } -+ raw_spin_unlock(&b->lock.wait_lock); -+ } -+ } -+ -+ if (g.nr > 1 && cycle) -+ print_chain(cycle, &g); -+ lock_graph_up(&g); -+ goto next; -+} -+ -+int bch2_six_check_for_deadlock(struct six_lock *lock, void *p) -+{ -+ struct btree_trans *trans = p; -+ -+ return bch2_check_for_deadlock(trans, NULL); -+} -+ -+int __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree_path *path, -+ struct btree_bkey_cached_common *b, -+ bool lock_may_not_fail) -+{ -+ int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->level).n[SIX_LOCK_read]; -+ int ret; -+ -+ /* -+ * Must drop our read locks before calling six_lock_write() - -+ * six_unlock() won't do wakeups until the reader count -+ * goes to 0, and it's safe because we have the node intent -+ * locked: -+ */ -+ six_lock_readers_add(&b->lock, -readers); -+ ret = __btree_node_lock_nopath(trans, b, SIX_LOCK_write, -+ lock_may_not_fail, _RET_IP_); -+ six_lock_readers_add(&b->lock, readers); -+ -+ if (ret) -+ mark_btree_node_locked_noreset(path, b->level, BTREE_NODE_INTENT_LOCKED); -+ -+ return ret; -+} -+ -+void bch2_btree_node_lock_write_nofail(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree_bkey_cached_common *b) -+{ -+ struct btree_path *linked; -+ unsigned i; -+ int ret; -+ -+ /* -+ * XXX BIG FAT NOTICE -+ * -+ * Drop all read locks before taking a write lock: -+ * -+ * This is a hack, because bch2_btree_node_lock_write_nofail() is a -+ * hack - but by dropping read locks first, this should never fail, and -+ * we only use this in code paths where whatever read locks we've -+ * already taken are no longer needed: -+ */ -+ -+ trans_for_each_path(trans, linked) { -+ if (!linked->nodes_locked) -+ continue; -+ -+ for (i = 0; i < BTREE_MAX_DEPTH; i++) -+ if (btree_node_read_locked(linked, i)) { -+ btree_node_unlock(trans, linked, i); -+ btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK); -+ } -+ } -+ -+ ret = __btree_node_lock_write(trans, path, b, true); -+ BUG_ON(ret); -+} -+ -+/* relock */ -+ -+static inline bool btree_path_get_locks(struct btree_trans *trans, -+ struct btree_path *path, -+ bool upgrade, -+ struct get_locks_fail *f) -+{ -+ unsigned l = path->level; -+ int fail_idx = -1; -+ -+ do { -+ if (!btree_path_node(path, l)) -+ break; -+ -+ if (!(upgrade -+ ? bch2_btree_node_upgrade(trans, path, l) -+ : bch2_btree_node_relock(trans, path, l))) { -+ fail_idx = l; -+ -+ if (f) { -+ f->l = l; -+ f->b = path->l[l].b; -+ } -+ } -+ -+ l++; -+ } while (l < path->locks_want); -+ -+ /* -+ * When we fail to get a lock, we have to ensure that any child nodes -+ * can't be relocked so bch2_btree_path_traverse has to walk back up to -+ * the node that we failed to relock: -+ */ -+ if (fail_idx >= 0) { -+ __bch2_btree_path_unlock(trans, path); -+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); -+ -+ do { -+ path->l[fail_idx].b = upgrade -+ ? ERR_PTR(-BCH_ERR_no_btree_node_upgrade) -+ : ERR_PTR(-BCH_ERR_no_btree_node_relock); -+ --fail_idx; -+ } while (fail_idx >= 0); -+ } -+ -+ if (path->uptodate == BTREE_ITER_NEED_RELOCK) -+ path->uptodate = BTREE_ITER_UPTODATE; -+ -+ bch2_trans_verify_locks(trans); -+ -+ return path->uptodate < BTREE_ITER_NEED_RELOCK; -+} -+ -+bool __bch2_btree_node_relock(struct btree_trans *trans, -+ struct btree_path *path, unsigned level, -+ bool trace) -+{ -+ struct btree *b = btree_path_node(path, level); -+ int want = __btree_lock_want(path, level); -+ -+ if (race_fault()) -+ goto fail; -+ -+ if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) || -+ (btree_node_lock_seq_matches(path, b, level) && -+ btree_node_lock_increment(trans, &b->c, level, want))) { -+ mark_btree_node_locked(trans, path, level, want); -+ return true; -+ } -+fail: -+ if (trace && !trans->notrace_relock_fail) -+ trace_and_count(trans->c, btree_path_relock_fail, trans, _RET_IP_, path, level); -+ return false; -+} -+ -+/* upgrade */ -+ -+bool bch2_btree_node_upgrade(struct btree_trans *trans, -+ struct btree_path *path, unsigned level) -+{ -+ struct btree *b = path->l[level].b; -+ struct six_lock_count count = bch2_btree_node_lock_counts(trans, path, &b->c, level); -+ -+ if (!is_btree_node(path, level)) -+ return false; -+ -+ switch (btree_lock_want(path, level)) { -+ case BTREE_NODE_UNLOCKED: -+ BUG_ON(btree_node_locked(path, level)); -+ return true; -+ case BTREE_NODE_READ_LOCKED: -+ BUG_ON(btree_node_intent_locked(path, level)); -+ return bch2_btree_node_relock(trans, path, level); -+ case BTREE_NODE_INTENT_LOCKED: -+ break; -+ case BTREE_NODE_WRITE_LOCKED: -+ BUG(); -+ } -+ -+ if (btree_node_intent_locked(path, level)) -+ return true; -+ -+ if (race_fault()) -+ return false; -+ -+ if (btree_node_locked(path, level)) { -+ bool ret; -+ -+ six_lock_readers_add(&b->c.lock, -count.n[SIX_LOCK_read]); -+ ret = six_lock_tryupgrade(&b->c.lock); -+ six_lock_readers_add(&b->c.lock, count.n[SIX_LOCK_read]); -+ -+ if (ret) -+ goto success; -+ } else { -+ if (six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq)) -+ goto success; -+ } -+ -+ /* -+ * Do we already have an intent lock via another path? If so, just bump -+ * lock count: -+ */ -+ if (btree_node_lock_seq_matches(path, b, level) && -+ btree_node_lock_increment(trans, &b->c, level, BTREE_NODE_INTENT_LOCKED)) { -+ btree_node_unlock(trans, path, level); -+ goto success; -+ } -+ -+ trace_and_count(trans->c, btree_path_upgrade_fail, trans, _RET_IP_, path, level); -+ return false; -+success: -+ mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED); -+ return true; -+} -+ -+/* Btree path locking: */ -+ -+/* -+ * Only for btree_cache.c - only relocks intent locks -+ */ -+int bch2_btree_path_relock_intent(struct btree_trans *trans, -+ struct btree_path *path) -+{ -+ unsigned l; -+ -+ for (l = path->level; -+ l < path->locks_want && btree_path_node(path, l); -+ l++) { -+ if (!bch2_btree_node_relock(trans, path, l)) { -+ __bch2_btree_path_unlock(trans, path); -+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); -+ trace_and_count(trans->c, trans_restart_relock_path_intent, trans, _RET_IP_, path); -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent); -+ } -+ } -+ -+ return 0; -+} -+ -+__flatten -+bool bch2_btree_path_relock_norestart(struct btree_trans *trans, -+ struct btree_path *path, unsigned long trace_ip) -+{ -+ struct get_locks_fail f; -+ -+ return btree_path_get_locks(trans, path, false, &f); -+} -+ -+int __bch2_btree_path_relock(struct btree_trans *trans, -+ struct btree_path *path, unsigned long trace_ip) -+{ -+ if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) { -+ trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path); -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path); -+ } -+ -+ return 0; -+} -+ -+bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned new_locks_want, -+ struct get_locks_fail *f) -+{ -+ EBUG_ON(path->locks_want >= new_locks_want); -+ -+ path->locks_want = new_locks_want; -+ -+ return btree_path_get_locks(trans, path, true, f); -+} -+ -+bool __bch2_btree_path_upgrade(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned new_locks_want, -+ struct get_locks_fail *f) -+{ -+ struct btree_path *linked; -+ -+ if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f)) -+ return true; -+ -+ /* -+ * XXX: this is ugly - we'd prefer to not be mucking with other -+ * iterators in the btree_trans here. -+ * -+ * On failure to upgrade the iterator, setting iter->locks_want and -+ * calling get_locks() is sufficient to make bch2_btree_path_traverse() -+ * get the locks we want on transaction restart. -+ * -+ * But if this iterator was a clone, on transaction restart what we did -+ * to this iterator isn't going to be preserved. -+ * -+ * Possibly we could add an iterator field for the parent iterator when -+ * an iterator is a copy - for now, we'll just upgrade any other -+ * iterators with the same btree id. -+ * -+ * The code below used to be needed to ensure ancestor nodes get locked -+ * before interior nodes - now that's handled by -+ * bch2_btree_path_traverse_all(). -+ */ -+ if (!path->cached && !trans->in_traverse_all) -+ trans_for_each_path(trans, linked) -+ if (linked != path && -+ linked->cached == path->cached && -+ linked->btree_id == path->btree_id && -+ linked->locks_want < new_locks_want) { -+ linked->locks_want = new_locks_want; -+ btree_path_get_locks(trans, linked, true, NULL); -+ } -+ -+ return false; -+} -+ -+void __bch2_btree_path_downgrade(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned new_locks_want) -+{ -+ unsigned l; -+ -+ if (trans->restarted) -+ return; -+ -+ EBUG_ON(path->locks_want < new_locks_want); -+ -+ path->locks_want = new_locks_want; -+ -+ while (path->nodes_locked && -+ (l = btree_path_highest_level_locked(path)) >= path->locks_want) { -+ if (l > path->level) { -+ btree_node_unlock(trans, path, l); -+ } else { -+ if (btree_node_intent_locked(path, l)) { -+ six_lock_downgrade(&path->l[l].b->c.lock); -+ mark_btree_node_locked_noreset(path, l, BTREE_NODE_READ_LOCKED); -+ } -+ break; -+ } -+ } -+ -+ bch2_btree_path_verify_locks(path); -+ -+ path->downgrade_seq++; -+ trace_path_downgrade(trans, _RET_IP_, path); -+} -+ -+/* Btree transaction locking: */ -+ -+void bch2_trans_downgrade(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ -+ if (trans->restarted) -+ return; -+ -+ trans_for_each_path(trans, path) -+ bch2_btree_path_downgrade(trans, path); -+} -+ -+int bch2_trans_relock(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ -+ if (unlikely(trans->restarted)) -+ return -((int) trans->restarted); -+ -+ trans_for_each_path(trans, path) -+ if (path->should_be_locked && -+ !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) { -+ trace_and_count(trans->c, trans_restart_relock, trans, _RET_IP_, path); -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock); -+ } -+ return 0; -+} -+ -+int bch2_trans_relock_notrace(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ -+ if (unlikely(trans->restarted)) -+ return -((int) trans->restarted); -+ -+ trans_for_each_path(trans, path) -+ if (path->should_be_locked && -+ !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) { -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock); -+ } -+ return 0; -+} -+ -+void bch2_trans_unlock_noassert(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ __bch2_btree_path_unlock(trans, path); -+} -+ -+void bch2_trans_unlock(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ __bch2_btree_path_unlock(trans, path); -+} -+ -+void bch2_trans_unlock_long(struct btree_trans *trans) -+{ -+ bch2_trans_unlock(trans); -+ bch2_trans_srcu_unlock(trans); -+} -+ -+bool bch2_trans_locked(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ if (path->nodes_locked) -+ return true; -+ return false; -+} -+ -+int __bch2_trans_mutex_lock(struct btree_trans *trans, -+ struct mutex *lock) -+{ -+ int ret = drop_locks_do(trans, (mutex_lock(lock), 0)); -+ -+ if (ret) -+ mutex_unlock(lock); -+ return ret; -+} -+ -+/* Debug */ -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ -+void bch2_btree_path_verify_locks(struct btree_path *path) -+{ -+ unsigned l; -+ -+ if (!path->nodes_locked) { -+ BUG_ON(path->uptodate == BTREE_ITER_UPTODATE && -+ btree_path_node(path, path->level)); -+ return; -+ } -+ -+ for (l = 0; l < BTREE_MAX_DEPTH; l++) { -+ int want = btree_lock_want(path, l); -+ int have = btree_node_locked_type(path, l); -+ -+ BUG_ON(!is_btree_node(path, l) && have != BTREE_NODE_UNLOCKED); -+ -+ BUG_ON(is_btree_node(path, l) && -+ (want == BTREE_NODE_UNLOCKED || -+ have != BTREE_NODE_WRITE_LOCKED) && -+ want != have); -+ } -+} -+ -+void bch2_trans_verify_locks(struct btree_trans *trans) -+{ -+ struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ bch2_btree_path_verify_locks(path); -+} -+ -+#endif -diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h -new file mode 100644 -index 000000000000..11b0a2c8cd69 ---- /dev/null -+++ b/fs/bcachefs/btree_locking.h -@@ -0,0 +1,433 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BTREE_LOCKING_H -+#define _BCACHEFS_BTREE_LOCKING_H -+ -+/* -+ * Only for internal btree use: -+ * -+ * The btree iterator tracks what locks it wants to take, and what locks it -+ * currently has - here we have wrappers for locking/unlocking btree nodes and -+ * updating the iterator state -+ */ -+ -+#include "btree_iter.h" -+#include "six.h" -+ -+void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags); -+ -+#ifdef CONFIG_LOCKDEP -+void bch2_assert_btree_nodes_not_locked(void); -+#else -+static inline void bch2_assert_btree_nodes_not_locked(void) {} -+#endif -+ -+void bch2_trans_unlock_noassert(struct btree_trans *); -+ -+static inline bool is_btree_node(struct btree_path *path, unsigned l) -+{ -+ return l < BTREE_MAX_DEPTH && !IS_ERR_OR_NULL(path->l[l].b); -+} -+ -+static inline struct btree_transaction_stats *btree_trans_stats(struct btree_trans *trans) -+{ -+ return trans->fn_idx < ARRAY_SIZE(trans->c->btree_transaction_stats) -+ ? &trans->c->btree_transaction_stats[trans->fn_idx] -+ : NULL; -+} -+ -+/* matches six lock types */ -+enum btree_node_locked_type { -+ BTREE_NODE_UNLOCKED = -1, -+ BTREE_NODE_READ_LOCKED = SIX_LOCK_read, -+ BTREE_NODE_INTENT_LOCKED = SIX_LOCK_intent, -+ BTREE_NODE_WRITE_LOCKED = SIX_LOCK_write, -+}; -+ -+static inline int btree_node_locked_type(struct btree_path *path, -+ unsigned level) -+{ -+ return BTREE_NODE_UNLOCKED + ((path->nodes_locked >> (level << 1)) & 3); -+} -+ -+static inline bool btree_node_write_locked(struct btree_path *path, unsigned l) -+{ -+ return btree_node_locked_type(path, l) == BTREE_NODE_WRITE_LOCKED; -+} -+ -+static inline bool btree_node_intent_locked(struct btree_path *path, unsigned l) -+{ -+ return btree_node_locked_type(path, l) == BTREE_NODE_INTENT_LOCKED; -+} -+ -+static inline bool btree_node_read_locked(struct btree_path *path, unsigned l) -+{ -+ return btree_node_locked_type(path, l) == BTREE_NODE_READ_LOCKED; -+} -+ -+static inline bool btree_node_locked(struct btree_path *path, unsigned level) -+{ -+ return btree_node_locked_type(path, level) != BTREE_NODE_UNLOCKED; -+} -+ -+static inline void mark_btree_node_locked_noreset(struct btree_path *path, -+ unsigned level, -+ enum btree_node_locked_type type) -+{ -+ /* relying on this to avoid a branch */ -+ BUILD_BUG_ON(SIX_LOCK_read != 0); -+ BUILD_BUG_ON(SIX_LOCK_intent != 1); -+ -+ path->nodes_locked &= ~(3U << (level << 1)); -+ path->nodes_locked |= (type + 1) << (level << 1); -+} -+ -+static inline void mark_btree_node_unlocked(struct btree_path *path, -+ unsigned level) -+{ -+ EBUG_ON(btree_node_write_locked(path, level)); -+ mark_btree_node_locked_noreset(path, level, BTREE_NODE_UNLOCKED); -+} -+ -+static inline void mark_btree_node_locked(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned level, -+ enum btree_node_locked_type type) -+{ -+ mark_btree_node_locked_noreset(path, level, (enum btree_node_locked_type) type); -+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS -+ path->l[level].lock_taken_time = local_clock(); -+#endif -+} -+ -+static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level) -+{ -+ return level < path->locks_want -+ ? SIX_LOCK_intent -+ : SIX_LOCK_read; -+} -+ -+static inline enum btree_node_locked_type -+btree_lock_want(struct btree_path *path, int level) -+{ -+ if (level < path->level) -+ return BTREE_NODE_UNLOCKED; -+ if (level < path->locks_want) -+ return BTREE_NODE_INTENT_LOCKED; -+ if (level == path->level) -+ return BTREE_NODE_READ_LOCKED; -+ return BTREE_NODE_UNLOCKED; -+} -+ -+static void btree_trans_lock_hold_time_update(struct btree_trans *trans, -+ struct btree_path *path, unsigned level) -+{ -+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS -+ struct btree_transaction_stats *s = btree_trans_stats(trans); -+ -+ if (s) -+ __bch2_time_stats_update(&s->lock_hold_times, -+ path->l[level].lock_taken_time, -+ local_clock()); -+#endif -+} -+ -+/* unlock: */ -+ -+static inline void btree_node_unlock(struct btree_trans *trans, -+ struct btree_path *path, unsigned level) -+{ -+ int lock_type = btree_node_locked_type(path, level); -+ -+ EBUG_ON(level >= BTREE_MAX_DEPTH); -+ -+ if (lock_type != BTREE_NODE_UNLOCKED) { -+ six_unlock_type(&path->l[level].b->c.lock, lock_type); -+ btree_trans_lock_hold_time_update(trans, path, level); -+ } -+ mark_btree_node_unlocked(path, level); -+} -+ -+static inline int btree_path_lowest_level_locked(struct btree_path *path) -+{ -+ return __ffs(path->nodes_locked) >> 1; -+} -+ -+static inline int btree_path_highest_level_locked(struct btree_path *path) -+{ -+ return __fls(path->nodes_locked) >> 1; -+} -+ -+static inline void __bch2_btree_path_unlock(struct btree_trans *trans, -+ struct btree_path *path) -+{ -+ btree_path_set_dirty(path, BTREE_ITER_NEED_RELOCK); -+ -+ while (path->nodes_locked) -+ btree_node_unlock(trans, path, btree_path_lowest_level_locked(path)); -+} -+ -+/* -+ * Updates the saved lock sequence number, so that bch2_btree_node_relock() will -+ * succeed: -+ */ -+static inline void -+bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_path *path, -+ struct btree *b) -+{ -+ struct btree_path *linked; -+ -+ EBUG_ON(path->l[b->c.level].b != b); -+ EBUG_ON(path->l[b->c.level].lock_seq != six_lock_seq(&b->c.lock)); -+ EBUG_ON(btree_node_locked_type(path, b->c.level) != SIX_LOCK_write); -+ -+ mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED); -+ -+ trans_for_each_path_with_node(trans, b, linked) -+ linked->l[b->c.level].lock_seq++; -+ -+ six_unlock_write(&b->c.lock); -+} -+ -+void bch2_btree_node_unlock_write(struct btree_trans *, -+ struct btree_path *, struct btree *); -+ -+int bch2_six_check_for_deadlock(struct six_lock *lock, void *p); -+ -+/* lock: */ -+ -+static inline int __btree_node_lock_nopath(struct btree_trans *trans, -+ struct btree_bkey_cached_common *b, -+ enum six_lock_type type, -+ bool lock_may_not_fail, -+ unsigned long ip) -+{ -+ int ret; -+ -+ trans->lock_may_not_fail = lock_may_not_fail; -+ trans->lock_must_abort = false; -+ trans->locking = b; -+ -+ ret = six_lock_ip_waiter(&b->lock, type, &trans->locking_wait, -+ bch2_six_check_for_deadlock, trans, ip); -+ WRITE_ONCE(trans->locking, NULL); -+ WRITE_ONCE(trans->locking_wait.start_time, 0); -+ return ret; -+} -+ -+static inline int __must_check -+btree_node_lock_nopath(struct btree_trans *trans, -+ struct btree_bkey_cached_common *b, -+ enum six_lock_type type, -+ unsigned long ip) -+{ -+ return __btree_node_lock_nopath(trans, b, type, false, ip); -+} -+ -+static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans, -+ struct btree_bkey_cached_common *b, -+ enum six_lock_type type) -+{ -+ int ret = __btree_node_lock_nopath(trans, b, type, true, _THIS_IP_); -+ -+ BUG_ON(ret); -+} -+ -+/* -+ * Lock a btree node if we already have it locked on one of our linked -+ * iterators: -+ */ -+static inline bool btree_node_lock_increment(struct btree_trans *trans, -+ struct btree_bkey_cached_common *b, -+ unsigned level, -+ enum btree_node_locked_type want) -+{ -+ struct btree_path *path; -+ -+ trans_for_each_path(trans, path) -+ if (&path->l[level].b->c == b && -+ btree_node_locked_type(path, level) >= want) { -+ six_lock_increment(&b->lock, (enum six_lock_type) want); -+ return true; -+ } -+ -+ return false; -+} -+ -+static inline int btree_node_lock(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree_bkey_cached_common *b, -+ unsigned level, -+ enum six_lock_type type, -+ unsigned long ip) -+{ -+ int ret = 0; -+ -+ EBUG_ON(level >= BTREE_MAX_DEPTH); -+ EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); -+ -+ if (likely(six_trylock_type(&b->lock, type)) || -+ btree_node_lock_increment(trans, b, level, (enum btree_node_locked_type) type) || -+ !(ret = btree_node_lock_nopath(trans, b, type, btree_path_ip_allocated(path)))) { -+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS -+ path->l[b->level].lock_taken_time = local_clock(); -+#endif -+ } -+ -+ return ret; -+} -+ -+int __bch2_btree_node_lock_write(struct btree_trans *, struct btree_path *, -+ struct btree_bkey_cached_common *b, bool); -+ -+static inline int __btree_node_lock_write(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree_bkey_cached_common *b, -+ bool lock_may_not_fail) -+{ -+ EBUG_ON(&path->l[b->level].b->c != b); -+ EBUG_ON(path->l[b->level].lock_seq != six_lock_seq(&b->lock)); -+ EBUG_ON(!btree_node_intent_locked(path, b->level)); -+ -+ /* -+ * six locks are unfair, and read locks block while a thread wants a -+ * write lock: thus, we need to tell the cycle detector we have a write -+ * lock _before_ taking the lock: -+ */ -+ mark_btree_node_locked_noreset(path, b->level, BTREE_NODE_WRITE_LOCKED); -+ -+ return likely(six_trylock_write(&b->lock)) -+ ? 0 -+ : __bch2_btree_node_lock_write(trans, path, b, lock_may_not_fail); -+} -+ -+static inline int __must_check -+bch2_btree_node_lock_write(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree_bkey_cached_common *b) -+{ -+ return __btree_node_lock_write(trans, path, b, false); -+} -+ -+void bch2_btree_node_lock_write_nofail(struct btree_trans *, -+ struct btree_path *, -+ struct btree_bkey_cached_common *); -+ -+/* relock: */ -+ -+bool bch2_btree_path_relock_norestart(struct btree_trans *, -+ struct btree_path *, unsigned long); -+int __bch2_btree_path_relock(struct btree_trans *, -+ struct btree_path *, unsigned long); -+ -+static inline int bch2_btree_path_relock(struct btree_trans *trans, -+ struct btree_path *path, unsigned long trace_ip) -+{ -+ return btree_node_locked(path, path->level) -+ ? 0 -+ : __bch2_btree_path_relock(trans, path, trace_ip); -+} -+ -+bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned, bool trace); -+ -+static inline bool bch2_btree_node_relock(struct btree_trans *trans, -+ struct btree_path *path, unsigned level) -+{ -+ EBUG_ON(btree_node_locked(path, level) && -+ !btree_node_write_locked(path, level) && -+ btree_node_locked_type(path, level) != __btree_lock_want(path, level)); -+ -+ return likely(btree_node_locked(path, level)) || -+ (!IS_ERR_OR_NULL(path->l[level].b) && -+ __bch2_btree_node_relock(trans, path, level, true)); -+} -+ -+static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans, -+ struct btree_path *path, unsigned level) -+{ -+ EBUG_ON(btree_node_locked(path, level) && -+ !btree_node_write_locked(path, level) && -+ btree_node_locked_type(path, level) != __btree_lock_want(path, level)); -+ -+ return likely(btree_node_locked(path, level)) || -+ (!IS_ERR_OR_NULL(path->l[level].b) && -+ __bch2_btree_node_relock(trans, path, level, false)); -+} -+ -+/* upgrade */ -+ -+ -+struct get_locks_fail { -+ unsigned l; -+ struct btree *b; -+}; -+ -+bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *, -+ struct btree_path *, unsigned, -+ struct get_locks_fail *); -+ -+bool __bch2_btree_path_upgrade(struct btree_trans *, -+ struct btree_path *, unsigned, -+ struct get_locks_fail *); -+ -+static inline int bch2_btree_path_upgrade(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned new_locks_want) -+{ -+ struct get_locks_fail f; -+ unsigned old_locks_want = path->locks_want; -+ -+ new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH); -+ -+ if (path->locks_want < new_locks_want -+ ? __bch2_btree_path_upgrade(trans, path, new_locks_want, &f) -+ : path->uptodate == BTREE_ITER_UPTODATE) -+ return 0; -+ -+ trace_and_count(trans->c, trans_restart_upgrade, trans, _THIS_IP_, path, -+ old_locks_want, new_locks_want, &f); -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade); -+} -+ -+/* misc: */ -+ -+static inline void btree_path_set_should_be_locked(struct btree_path *path) -+{ -+ EBUG_ON(!btree_node_locked(path, path->level)); -+ EBUG_ON(path->uptodate); -+ -+ path->should_be_locked = true; -+} -+ -+static inline void __btree_path_set_level_up(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned l) -+{ -+ btree_node_unlock(trans, path, l); -+ path->l[l].b = ERR_PTR(-BCH_ERR_no_btree_node_up); -+} -+ -+static inline void btree_path_set_level_up(struct btree_trans *trans, -+ struct btree_path *path) -+{ -+ __btree_path_set_level_up(trans, path, path->level++); -+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); -+} -+ -+/* debug */ -+ -+struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *, -+ struct btree_path *, -+ struct btree_bkey_cached_common *b, -+ unsigned); -+ -+int bch2_check_for_deadlock(struct btree_trans *, struct printbuf *); -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+void bch2_btree_path_verify_locks(struct btree_path *); -+void bch2_trans_verify_locks(struct btree_trans *); -+#else -+static inline void bch2_btree_path_verify_locks(struct btree_path *path) {} -+static inline void bch2_trans_verify_locks(struct btree_trans *trans) {} -+#endif -+ -+#endif /* _BCACHEFS_BTREE_LOCKING_H */ -diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c -new file mode 100644 -index 000000000000..decad7b66c59 ---- /dev/null -+++ b/fs/bcachefs/btree_trans_commit.c -@@ -0,0 +1,1145 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "btree_gc.h" -+#include "btree_io.h" -+#include "btree_iter.h" -+#include "btree_journal_iter.h" -+#include "btree_key_cache.h" -+#include "btree_update_interior.h" -+#include "btree_write_buffer.h" -+#include "buckets.h" -+#include "errcode.h" -+#include "error.h" -+#include "journal.h" -+#include "journal_reclaim.h" -+#include "replicas.h" -+#include "snapshot.h" -+ -+#include -+ -+static void verify_update_old_key(struct btree_trans *trans, struct btree_insert_entry *i) -+{ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ struct bch_fs *c = trans->c; -+ struct bkey u; -+ struct bkey_s_c k = bch2_btree_path_peek_slot_exact(i->path, &u); -+ -+ if (unlikely(trans->journal_replay_not_finished)) { -+ struct bkey_i *j_k = -+ bch2_journal_keys_peek_slot(c, i->btree_id, i->level, i->k->k.p); -+ -+ if (j_k) -+ k = bkey_i_to_s_c(j_k); -+ } -+ -+ u = *k.k; -+ u.needs_whiteout = i->old_k.needs_whiteout; -+ -+ BUG_ON(memcmp(&i->old_k, &u, sizeof(struct bkey))); -+ BUG_ON(i->old_v != k.v); -+#endif -+} -+ -+static inline struct btree_path_level *insert_l(struct btree_insert_entry *i) -+{ -+ return i->path->l + i->level; -+} -+ -+static inline bool same_leaf_as_prev(struct btree_trans *trans, -+ struct btree_insert_entry *i) -+{ -+ return i != trans->updates && -+ insert_l(&i[0])->b == insert_l(&i[-1])->b; -+} -+ -+static inline bool same_leaf_as_next(struct btree_trans *trans, -+ struct btree_insert_entry *i) -+{ -+ return i + 1 < trans->updates + trans->nr_updates && -+ insert_l(&i[0])->b == insert_l(&i[1])->b; -+} -+ -+inline void bch2_btree_node_prep_for_write(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree *b) -+{ -+ struct bch_fs *c = trans->c; -+ -+ if (unlikely(btree_node_just_written(b)) && -+ bch2_btree_post_write_cleanup(c, b)) -+ bch2_trans_node_reinit_iter(trans, b); -+ -+ /* -+ * If the last bset has been written, or if it's gotten too big - start -+ * a new bset to insert into: -+ */ -+ if (want_new_bset(c, b)) -+ bch2_btree_init_next(trans, b); -+} -+ -+/* Inserting into a given leaf node (last stage of insert): */ -+ -+/* Handle overwrites and do insert, for non extents: */ -+bool bch2_btree_bset_insert_key(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree *b, -+ struct btree_node_iter *node_iter, -+ struct bkey_i *insert) -+{ -+ struct bkey_packed *k; -+ unsigned clobber_u64s = 0, new_u64s = 0; -+ -+ EBUG_ON(btree_node_just_written(b)); -+ EBUG_ON(bset_written(b, btree_bset_last(b))); -+ EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k)); -+ EBUG_ON(bpos_lt(insert->k.p, b->data->min_key)); -+ EBUG_ON(bpos_gt(insert->k.p, b->data->max_key)); -+ EBUG_ON(insert->k.u64s > -+ bch_btree_keys_u64s_remaining(trans->c, b)); -+ EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos)); -+ -+ k = bch2_btree_node_iter_peek_all(node_iter, b); -+ if (k && bkey_cmp_left_packed(b, k, &insert->k.p)) -+ k = NULL; -+ -+ /* @k is the key being overwritten/deleted, if any: */ -+ EBUG_ON(k && bkey_deleted(k)); -+ -+ /* Deleting, but not found? nothing to do: */ -+ if (bkey_deleted(&insert->k) && !k) -+ return false; -+ -+ if (bkey_deleted(&insert->k)) { -+ /* Deleting: */ -+ btree_account_key_drop(b, k); -+ k->type = KEY_TYPE_deleted; -+ -+ if (k->needs_whiteout) -+ push_whiteout(trans->c, b, insert->k.p); -+ k->needs_whiteout = false; -+ -+ if (k >= btree_bset_last(b)->start) { -+ clobber_u64s = k->u64s; -+ bch2_bset_delete(b, k, clobber_u64s); -+ goto fix_iter; -+ } else { -+ bch2_btree_path_fix_key_modified(trans, b, k); -+ } -+ -+ return true; -+ } -+ -+ if (k) { -+ /* Overwriting: */ -+ btree_account_key_drop(b, k); -+ k->type = KEY_TYPE_deleted; -+ -+ insert->k.needs_whiteout = k->needs_whiteout; -+ k->needs_whiteout = false; -+ -+ if (k >= btree_bset_last(b)->start) { -+ clobber_u64s = k->u64s; -+ goto overwrite; -+ } else { -+ bch2_btree_path_fix_key_modified(trans, b, k); -+ } -+ } -+ -+ k = bch2_btree_node_iter_bset_pos(node_iter, b, bset_tree_last(b)); -+overwrite: -+ bch2_bset_insert(b, node_iter, k, insert, clobber_u64s); -+ new_u64s = k->u64s; -+fix_iter: -+ if (clobber_u64s != new_u64s) -+ bch2_btree_node_iter_fix(trans, path, b, node_iter, k, -+ clobber_u64s, new_u64s); -+ return true; -+} -+ -+static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, -+ unsigned i, u64 seq) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct btree_write *w = container_of(pin, struct btree_write, journal); -+ struct btree *b = container_of(w, struct btree, writes[i]); -+ struct btree_trans *trans = bch2_trans_get(c); -+ unsigned long old, new, v; -+ unsigned idx = w - b->writes; -+ -+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); -+ v = READ_ONCE(b->flags); -+ -+ do { -+ old = new = v; -+ -+ if (!(old & (1 << BTREE_NODE_dirty)) || -+ !!(old & (1 << BTREE_NODE_write_idx)) != idx || -+ w->journal.seq != seq) -+ break; -+ -+ new &= ~BTREE_WRITE_TYPE_MASK; -+ new |= BTREE_WRITE_journal_reclaim; -+ new |= 1 << BTREE_NODE_need_write; -+ } while ((v = cmpxchg(&b->flags, old, new)) != old); -+ -+ btree_node_write_if_need(c, b, SIX_LOCK_read); -+ six_unlock_read(&b->c.lock); -+ -+ bch2_trans_put(trans); -+ return 0; -+} -+ -+int bch2_btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq) -+{ -+ return __btree_node_flush(j, pin, 0, seq); -+} -+ -+int bch2_btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq) -+{ -+ return __btree_node_flush(j, pin, 1, seq); -+} -+ -+inline void bch2_btree_add_journal_pin(struct bch_fs *c, -+ struct btree *b, u64 seq) -+{ -+ struct btree_write *w = btree_current_write(b); -+ -+ bch2_journal_pin_add(&c->journal, seq, &w->journal, -+ btree_node_write_idx(b) == 0 -+ ? bch2_btree_node_flush0 -+ : bch2_btree_node_flush1); -+} -+ -+/** -+ * bch2_btree_insert_key_leaf() - insert a key one key into a leaf node -+ * @trans: btree transaction object -+ * @path: path pointing to @insert's pos -+ * @insert: key to insert -+ * @journal_seq: sequence number of journal reservation -+ */ -+inline void bch2_btree_insert_key_leaf(struct btree_trans *trans, -+ struct btree_path *path, -+ struct bkey_i *insert, -+ u64 journal_seq) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree *b = path_l(path)->b; -+ struct bset_tree *t = bset_tree_last(b); -+ struct bset *i = bset(b, t); -+ int old_u64s = bset_u64s(t); -+ int old_live_u64s = b->nr.live_u64s; -+ int live_u64s_added, u64s_added; -+ -+ if (unlikely(!bch2_btree_bset_insert_key(trans, path, b, -+ &path_l(path)->iter, insert))) -+ return; -+ -+ i->journal_seq = cpu_to_le64(max(journal_seq, le64_to_cpu(i->journal_seq))); -+ -+ bch2_btree_add_journal_pin(c, b, journal_seq); -+ -+ if (unlikely(!btree_node_dirty(b))) { -+ EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags)); -+ set_btree_node_dirty_acct(c, b); -+ } -+ -+ live_u64s_added = (int) b->nr.live_u64s - old_live_u64s; -+ u64s_added = (int) bset_u64s(t) - old_u64s; -+ -+ if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0) -+ b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added); -+ if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0) -+ b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added); -+ -+ if (u64s_added > live_u64s_added && -+ bch2_maybe_compact_whiteouts(c, b)) -+ bch2_trans_node_reinit_iter(trans, b); -+} -+ -+/* Cached btree updates: */ -+ -+/* Normal update interface: */ -+ -+static inline void btree_insert_entry_checks(struct btree_trans *trans, -+ struct btree_insert_entry *i) -+{ -+ BUG_ON(!bpos_eq(i->k->k.p, i->path->pos)); -+ BUG_ON(i->cached != i->path->cached); -+ BUG_ON(i->level != i->path->level); -+ BUG_ON(i->btree_id != i->path->btree_id); -+ EBUG_ON(!i->level && -+ btree_type_has_snapshots(i->btree_id) && -+ !(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) && -+ test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) && -+ i->k->k.p.snapshot && -+ bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot)); -+} -+ -+static noinline int -+bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned flags, -+ unsigned long trace_ip) -+{ -+ return drop_locks_do(trans, -+ bch2_journal_preres_get(&trans->c->journal, -+ &trans->journal_preres, -+ trans->journal_preres_u64s, -+ (flags & BCH_WATERMARK_MASK))); -+} -+ -+static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans, -+ unsigned flags) -+{ -+ return bch2_journal_res_get(&trans->c->journal, &trans->journal_res, -+ trans->journal_u64s, flags); -+} -+ -+#define JSET_ENTRY_LOG_U64s 4 -+ -+static noinline void journal_transaction_name(struct btree_trans *trans) -+{ -+ struct bch_fs *c = trans->c; -+ struct journal *j = &c->journal; -+ struct jset_entry *entry = -+ bch2_journal_add_entry(j, &trans->journal_res, -+ BCH_JSET_ENTRY_log, 0, 0, -+ JSET_ENTRY_LOG_U64s); -+ struct jset_entry_log *l = -+ container_of(entry, struct jset_entry_log, entry); -+ -+ strncpy(l->d, trans->fn, JSET_ENTRY_LOG_U64s * sizeof(u64)); -+} -+ -+static inline int btree_key_can_insert(struct btree_trans *trans, -+ struct btree *b, unsigned u64s) -+{ -+ struct bch_fs *c = trans->c; -+ -+ if (!bch2_btree_node_insert_fits(c, b, u64s)) -+ return -BCH_ERR_btree_insert_btree_node_full; -+ -+ return 0; -+} -+ -+static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags, -+ struct btree_path *path, unsigned u64s) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_cached *ck = (void *) path->l[0].b; -+ struct btree_insert_entry *i; -+ unsigned new_u64s; -+ struct bkey_i *new_k; -+ -+ EBUG_ON(path->level); -+ -+ if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) && -+ bch2_btree_key_cache_must_wait(c) && -+ !(flags & BTREE_INSERT_JOURNAL_RECLAIM)) -+ return -BCH_ERR_btree_insert_need_journal_reclaim; -+ -+ /* -+ * bch2_varint_decode can read past the end of the buffer by at most 7 -+ * bytes (it won't be used): -+ */ -+ u64s += 1; -+ -+ if (u64s <= ck->u64s) -+ return 0; -+ -+ new_u64s = roundup_pow_of_two(u64s); -+ new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS); -+ if (!new_k) { -+ bch_err(c, "error allocating memory for key cache key, btree %s u64s %u", -+ bch2_btree_id_str(path->btree_id), new_u64s); -+ return -BCH_ERR_ENOMEM_btree_key_cache_insert; -+ } -+ -+ trans_for_each_update(trans, i) -+ if (i->old_v == &ck->k->v) -+ i->old_v = &new_k->v; -+ -+ ck->u64s = new_u64s; -+ ck->k = new_k; -+ return 0; -+} -+ -+/* Triggers: */ -+ -+static int run_one_mem_trigger(struct btree_trans *trans, -+ struct btree_insert_entry *i, -+ unsigned flags) -+{ -+ struct bkey_s_c old = { &i->old_k, i->old_v }; -+ struct bkey_i *new = i->k; -+ const struct bkey_ops *old_ops = bch2_bkey_type_ops(old.k->type); -+ const struct bkey_ops *new_ops = bch2_bkey_type_ops(i->k->k.type); -+ int ret; -+ -+ verify_update_old_key(trans, i); -+ -+ if (unlikely(flags & BTREE_TRIGGER_NORUN)) -+ return 0; -+ -+ if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id))) -+ return 0; -+ -+ if (old_ops->atomic_trigger == new_ops->atomic_trigger) { -+ ret = bch2_mark_key(trans, i->btree_id, i->level, -+ old, bkey_i_to_s_c(new), -+ BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); -+ } else { -+ struct bkey _deleted = KEY(0, 0, 0); -+ struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; -+ -+ _deleted.p = i->path->pos; -+ -+ ret = bch2_mark_key(trans, i->btree_id, i->level, -+ deleted, bkey_i_to_s_c(new), -+ BTREE_TRIGGER_INSERT|flags) ?: -+ bch2_mark_key(trans, i->btree_id, i->level, -+ old, deleted, -+ BTREE_TRIGGER_OVERWRITE|flags); -+ } -+ -+ return ret; -+} -+ -+static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_entry *i, -+ bool overwrite) -+{ -+ /* -+ * Transactional triggers create new btree_insert_entries, so we can't -+ * pass them a pointer to a btree_insert_entry, that memory is going to -+ * move: -+ */ -+ struct bkey old_k = i->old_k; -+ struct bkey_s_c old = { &old_k, i->old_v }; -+ const struct bkey_ops *old_ops = bch2_bkey_type_ops(old.k->type); -+ const struct bkey_ops *new_ops = bch2_bkey_type_ops(i->k->k.type); -+ -+ verify_update_old_key(trans, i); -+ -+ if ((i->flags & BTREE_TRIGGER_NORUN) || -+ !(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type))) -+ return 0; -+ -+ if (!i->insert_trigger_run && -+ !i->overwrite_trigger_run && -+ old_ops->trans_trigger == new_ops->trans_trigger) { -+ i->overwrite_trigger_run = true; -+ i->insert_trigger_run = true; -+ return bch2_trans_mark_key(trans, i->btree_id, i->level, old, i->k, -+ BTREE_TRIGGER_INSERT| -+ BTREE_TRIGGER_OVERWRITE| -+ i->flags) ?: 1; -+ } else if (overwrite && !i->overwrite_trigger_run) { -+ i->overwrite_trigger_run = true; -+ return bch2_trans_mark_old(trans, i->btree_id, i->level, old, i->flags) ?: 1; -+ } else if (!overwrite && !i->insert_trigger_run) { -+ i->insert_trigger_run = true; -+ return bch2_trans_mark_new(trans, i->btree_id, i->level, i->k, i->flags) ?: 1; -+ } else { -+ return 0; -+ } -+} -+ -+static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, -+ struct btree_insert_entry *btree_id_start) -+{ -+ struct btree_insert_entry *i; -+ bool trans_trigger_run; -+ int ret, overwrite; -+ -+ for (overwrite = 1; overwrite >= 0; --overwrite) { -+ -+ /* -+ * Running triggers will append more updates to the list of updates as -+ * we're walking it: -+ */ -+ do { -+ trans_trigger_run = false; -+ -+ for (i = btree_id_start; -+ i < trans->updates + trans->nr_updates && i->btree_id <= btree_id; -+ i++) { -+ if (i->btree_id != btree_id) -+ continue; -+ -+ ret = run_one_trans_trigger(trans, i, overwrite); -+ if (ret < 0) -+ return ret; -+ if (ret) -+ trans_trigger_run = true; -+ } -+ } while (trans_trigger_run); -+ } -+ -+ return 0; -+} -+ -+static int bch2_trans_commit_run_triggers(struct btree_trans *trans) -+{ -+ struct btree_insert_entry *i = NULL, *btree_id_start = trans->updates; -+ unsigned btree_id = 0; -+ int ret = 0; -+ -+ /* -+ * -+ * For a given btree, this algorithm runs insert triggers before -+ * overwrite triggers: this is so that when extents are being moved -+ * (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop references before -+ * they are re-added. -+ */ -+ for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) { -+ if (btree_id == BTREE_ID_alloc) -+ continue; -+ -+ while (btree_id_start < trans->updates + trans->nr_updates && -+ btree_id_start->btree_id < btree_id) -+ btree_id_start++; -+ -+ ret = run_btree_triggers(trans, btree_id, btree_id_start); -+ if (ret) -+ return ret; -+ } -+ -+ trans_for_each_update(trans, i) { -+ if (i->btree_id > BTREE_ID_alloc) -+ break; -+ if (i->btree_id == BTREE_ID_alloc) { -+ ret = run_btree_triggers(trans, BTREE_ID_alloc, i); -+ if (ret) -+ return ret; -+ break; -+ } -+ } -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ trans_for_each_update(trans, i) -+ BUG_ON(!(i->flags & BTREE_TRIGGER_NORUN) && -+ (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS & (1U << i->bkey_type)) && -+ (!i->insert_trigger_run || !i->overwrite_trigger_run)); -+#endif -+ return 0; -+} -+ -+static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_insert_entry *i; -+ int ret = 0; -+ -+ trans_for_each_update(trans, i) { -+ /* -+ * XXX: synchronization of cached update triggers with gc -+ * XXX: synchronization of interior node updates with gc -+ */ -+ BUG_ON(i->cached || i->level); -+ -+ if (gc_visited(c, gc_pos_btree_node(insert_l(i)->b))) { -+ ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_GC); -+ if (ret) -+ break; -+ } -+ } -+ -+ return ret; -+} -+ -+static inline int -+bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, -+ struct btree_insert_entry **stopped_at, -+ unsigned long trace_ip) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_insert_entry *i; -+ struct btree_write_buffered_key *wb; -+ struct btree_trans_commit_hook *h; -+ unsigned u64s = 0; -+ int ret; -+ -+ if (race_fault()) { -+ trace_and_count(c, trans_restart_fault_inject, trans, trace_ip); -+ return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject); -+ } -+ -+ /* -+ * Check if the insert will fit in the leaf node with the write lock -+ * held, otherwise another thread could write the node changing the -+ * amount of space available: -+ */ -+ -+ prefetch(&trans->c->journal.flags); -+ -+ trans_for_each_update(trans, i) { -+ /* Multiple inserts might go to same leaf: */ -+ if (!same_leaf_as_prev(trans, i)) -+ u64s = 0; -+ -+ u64s += i->k->k.u64s; -+ ret = !i->cached -+ ? btree_key_can_insert(trans, insert_l(i)->b, u64s) -+ : btree_key_can_insert_cached(trans, flags, i->path, u64s); -+ if (ret) { -+ *stopped_at = i; -+ return ret; -+ } -+ } -+ -+ if (trans->nr_wb_updates && -+ trans->nr_wb_updates + c->btree_write_buffer.state.nr > c->btree_write_buffer.size) -+ return -BCH_ERR_btree_insert_need_flush_buffer; -+ -+ /* -+ * Don't get journal reservation until after we know insert will -+ * succeed: -+ */ -+ if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) { -+ ret = bch2_trans_journal_res_get(trans, -+ (flags & BCH_WATERMARK_MASK)| -+ JOURNAL_RES_GET_NONBLOCK); -+ if (ret) -+ return ret; -+ -+ if (unlikely(trans->journal_transaction_names)) -+ journal_transaction_name(trans); -+ } else { -+ trans->journal_res.seq = c->journal.replay_journal_seq; -+ } -+ -+ /* -+ * Not allowed to fail after we've gotten our journal reservation - we -+ * have to use it: -+ */ -+ -+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && -+ !(flags & BTREE_INSERT_JOURNAL_REPLAY)) { -+ if (bch2_journal_seq_verify) -+ trans_for_each_update(trans, i) -+ i->k->k.version.lo = trans->journal_res.seq; -+ else if (bch2_inject_invalid_keys) -+ trans_for_each_update(trans, i) -+ i->k->k.version = MAX_VERSION; -+ } -+ -+ if (trans->fs_usage_deltas && -+ bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas)) -+ return -BCH_ERR_btree_insert_need_mark_replicas; -+ -+ if (trans->nr_wb_updates) { -+ EBUG_ON(flags & BTREE_INSERT_JOURNAL_REPLAY); -+ -+ ret = bch2_btree_insert_keys_write_buffer(trans); -+ if (ret) -+ goto revert_fs_usage; -+ } -+ -+ h = trans->hooks; -+ while (h) { -+ ret = h->fn(trans, h); -+ if (ret) -+ goto revert_fs_usage; -+ h = h->next; -+ } -+ -+ trans_for_each_update(trans, i) -+ if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) { -+ ret = run_one_mem_trigger(trans, i, i->flags); -+ if (ret) -+ goto fatal_err; -+ } -+ -+ if (unlikely(c->gc_pos.phase)) { -+ ret = bch2_trans_commit_run_gc_triggers(trans); -+ if (ret) -+ goto fatal_err; -+ } -+ -+ if (unlikely(trans->extra_journal_entries.nr)) { -+ memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res), -+ trans->extra_journal_entries.data, -+ trans->extra_journal_entries.nr); -+ -+ trans->journal_res.offset += trans->extra_journal_entries.nr; -+ trans->journal_res.u64s -= trans->extra_journal_entries.nr; -+ } -+ -+ if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) { -+ struct journal *j = &c->journal; -+ struct jset_entry *entry; -+ -+ trans_for_each_update(trans, i) { -+ if (i->key_cache_already_flushed) -+ continue; -+ -+ if (i->flags & BTREE_UPDATE_NOJOURNAL) -+ continue; -+ -+ verify_update_old_key(trans, i); -+ -+ if (trans->journal_transaction_names) { -+ entry = bch2_journal_add_entry(j, &trans->journal_res, -+ BCH_JSET_ENTRY_overwrite, -+ i->btree_id, i->level, -+ i->old_k.u64s); -+ bkey_reassemble((struct bkey_i *) entry->start, -+ (struct bkey_s_c) { &i->old_k, i->old_v }); -+ } -+ -+ entry = bch2_journal_add_entry(j, &trans->journal_res, -+ BCH_JSET_ENTRY_btree_keys, -+ i->btree_id, i->level, -+ i->k->k.u64s); -+ bkey_copy((struct bkey_i *) entry->start, i->k); -+ } -+ -+ trans_for_each_wb_update(trans, wb) { -+ entry = bch2_journal_add_entry(j, &trans->journal_res, -+ BCH_JSET_ENTRY_btree_keys, -+ wb->btree, 0, -+ wb->k.k.u64s); -+ bkey_copy((struct bkey_i *) entry->start, &wb->k); -+ } -+ -+ if (trans->journal_seq) -+ *trans->journal_seq = trans->journal_res.seq; -+ } -+ -+ trans_for_each_update(trans, i) { -+ i->k->k.needs_whiteout = false; -+ -+ if (!i->cached) { -+ u64 seq = trans->journal_res.seq; -+ -+ if (i->flags & BTREE_UPDATE_PREJOURNAL) -+ seq = i->seq; -+ -+ bch2_btree_insert_key_leaf(trans, i->path, i->k, seq); -+ } else if (!i->key_cache_already_flushed) -+ bch2_btree_insert_key_cached(trans, flags, i); -+ else { -+ bch2_btree_key_cache_drop(trans, i->path); -+ btree_path_set_dirty(i->path, BTREE_ITER_NEED_TRAVERSE); -+ } -+ } -+ -+ return 0; -+fatal_err: -+ bch2_fatal_error(c); -+revert_fs_usage: -+ if (trans->fs_usage_deltas) -+ bch2_trans_fs_usage_revert(trans, trans->fs_usage_deltas); -+ return ret; -+} -+ -+static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i) -+{ -+ while (--i >= trans->updates) { -+ if (same_leaf_as_prev(trans, i)) -+ continue; -+ -+ bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b); -+ } -+ -+ trace_and_count(trans->c, trans_restart_would_deadlock_write, trans); -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write); -+} -+ -+static inline int trans_lock_write(struct btree_trans *trans) -+{ -+ struct btree_insert_entry *i; -+ -+ trans_for_each_update(trans, i) { -+ if (same_leaf_as_prev(trans, i)) -+ continue; -+ -+ if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c)) -+ return trans_lock_write_fail(trans, i); -+ -+ if (!i->cached) -+ bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b); -+ } -+ -+ return 0; -+} -+ -+static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans) -+{ -+ struct btree_insert_entry *i; -+ struct btree_write_buffered_key *wb; -+ -+ trans_for_each_update(trans, i) -+ bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p); -+ -+ trans_for_each_wb_update(trans, wb) -+ bch2_journal_key_overwritten(trans->c, wb->btree, 0, wb->k.k.p); -+} -+ -+static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, -+ enum bkey_invalid_flags flags, -+ struct btree_insert_entry *i, -+ struct printbuf *err) -+{ -+ struct bch_fs *c = trans->c; -+ -+ printbuf_reset(err); -+ prt_printf(err, "invalid bkey on insert from %s -> %ps", -+ trans->fn, (void *) i->ip_allocated); -+ prt_newline(err); -+ printbuf_indent_add(err, 2); -+ -+ bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k)); -+ prt_newline(err); -+ -+ bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err); -+ bch2_print_string_as_lines(KERN_ERR, err->buf); -+ -+ bch2_inconsistent_error(c); -+ bch2_dump_trans_updates(trans); -+ -+ return -EINVAL; -+} -+ -+/* -+ * Get journal reservation, take write locks, and attempt to do btree update(s): -+ */ -+static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags, -+ struct btree_insert_entry **stopped_at, -+ unsigned long trace_ip) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_insert_entry *i; -+ int ret = 0, u64s_delta = 0; -+ -+ trans_for_each_update(trans, i) { -+ if (i->cached) -+ continue; -+ -+ u64s_delta += !bkey_deleted(&i->k->k) ? i->k->k.u64s : 0; -+ u64s_delta -= i->old_btree_u64s; -+ -+ if (!same_leaf_as_next(trans, i)) { -+ if (u64s_delta <= 0) { -+ ret = bch2_foreground_maybe_merge(trans, i->path, -+ i->level, flags); -+ if (unlikely(ret)) -+ return ret; -+ } -+ -+ u64s_delta = 0; -+ } -+ } -+ -+ ret = bch2_journal_preres_get(&c->journal, -+ &trans->journal_preres, trans->journal_preres_u64s, -+ (flags & BCH_WATERMARK_MASK)|JOURNAL_RES_GET_NONBLOCK); -+ if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked)) -+ ret = bch2_trans_journal_preres_get_cold(trans, flags, trace_ip); -+ if (unlikely(ret)) -+ return ret; -+ -+ ret = trans_lock_write(trans); -+ if (unlikely(ret)) -+ return ret; -+ -+ ret = bch2_trans_commit_write_locked(trans, flags, stopped_at, trace_ip); -+ -+ if (!ret && unlikely(trans->journal_replay_not_finished)) -+ bch2_drop_overwrites_from_journal(trans); -+ -+ trans_for_each_update(trans, i) -+ if (!same_leaf_as_prev(trans, i)) -+ bch2_btree_node_unlock_write_inlined(trans, i->path, -+ insert_l(i)->b); -+ -+ if (!ret && trans->journal_pin) -+ bch2_journal_pin_add(&c->journal, trans->journal_res.seq, -+ trans->journal_pin, NULL); -+ -+ /* -+ * Drop journal reservation after dropping write locks, since dropping -+ * the journal reservation may kick off a journal write: -+ */ -+ bch2_journal_res_put(&c->journal, &trans->journal_res); -+ -+ return ret; -+} -+ -+static int journal_reclaim_wait_done(struct bch_fs *c) -+{ -+ int ret = bch2_journal_error(&c->journal) ?: -+ !bch2_btree_key_cache_must_wait(c); -+ -+ if (!ret) -+ journal_reclaim_kick(&c->journal); -+ return ret; -+} -+ -+static noinline -+int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, -+ struct btree_insert_entry *i, -+ int ret, unsigned long trace_ip) -+{ -+ struct bch_fs *c = trans->c; -+ -+ switch (ret) { -+ case -BCH_ERR_btree_insert_btree_node_full: -+ ret = bch2_btree_split_leaf(trans, i->path, flags); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ trace_and_count(c, trans_restart_btree_node_split, trans, trace_ip, i->path); -+ break; -+ case -BCH_ERR_btree_insert_need_mark_replicas: -+ ret = drop_locks_do(trans, -+ bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas)); -+ break; -+ case -BCH_ERR_journal_res_get_blocked: -+ /* -+ * XXX: this should probably be a separate BTREE_INSERT_NONBLOCK -+ * flag -+ */ -+ if ((flags & BTREE_INSERT_JOURNAL_RECLAIM) && -+ (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) { -+ ret = -BCH_ERR_journal_reclaim_would_deadlock; -+ break; -+ } -+ -+ ret = drop_locks_do(trans, -+ bch2_trans_journal_res_get(trans, -+ (flags & BCH_WATERMARK_MASK)| -+ JOURNAL_RES_GET_CHECK)); -+ break; -+ case -BCH_ERR_btree_insert_need_journal_reclaim: -+ bch2_trans_unlock(trans); -+ -+ trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip); -+ -+ wait_event_freezable(c->journal.reclaim_wait, -+ (ret = journal_reclaim_wait_done(c))); -+ if (ret < 0) -+ break; -+ -+ ret = bch2_trans_relock(trans); -+ break; -+ case -BCH_ERR_btree_insert_need_flush_buffer: { -+ struct btree_write_buffer *wb = &c->btree_write_buffer; -+ -+ ret = 0; -+ -+ if (wb->state.nr > wb->size * 3 / 4) { -+ bch2_trans_unlock(trans); -+ mutex_lock(&wb->flush_lock); -+ -+ if (wb->state.nr > wb->size * 3 / 4) { -+ bch2_trans_begin(trans); -+ ret = __bch2_btree_write_buffer_flush(trans, -+ flags|BTREE_INSERT_NOCHECK_RW, true); -+ if (!ret) { -+ trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); -+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); -+ } -+ } else { -+ mutex_unlock(&wb->flush_lock); -+ ret = bch2_trans_relock(trans); -+ } -+ } -+ break; -+ } -+ default: -+ BUG_ON(ret >= 0); -+ break; -+ } -+ -+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted); -+ -+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) && -+ !(flags & BTREE_INSERT_NOWAIT) && -+ (flags & BTREE_INSERT_NOFAIL), c, -+ "%s: incorrectly got %s\n", __func__, bch2_err_str(ret)); -+ -+ return ret; -+} -+ -+static noinline int -+bch2_trans_commit_get_rw_cold(struct btree_trans *trans, unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ int ret; -+ -+ if (likely(!(flags & BTREE_INSERT_LAZY_RW)) || -+ test_bit(BCH_FS_STARTED, &c->flags)) -+ return -BCH_ERR_erofs_trans_commit; -+ -+ ret = drop_locks_do(trans, bch2_fs_read_write_early(c)); -+ if (ret) -+ return ret; -+ -+ bch2_write_ref_get(c, BCH_WRITE_REF_trans); -+ return 0; -+} -+ -+/* -+ * This is for updates done in the early part of fsck - btree_gc - before we've -+ * gone RW. we only add the new key to the list of keys for journal replay to -+ * do. -+ */ -+static noinline int -+do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_insert_entry *i; -+ int ret = 0; -+ -+ trans_for_each_update(trans, i) { -+ ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); -+ if (ret) -+ break; -+ } -+ -+ return ret; -+} -+ -+int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_insert_entry *i = NULL; -+ struct btree_write_buffered_key *wb; -+ unsigned u64s; -+ int ret = 0; -+ -+ if (!trans->nr_updates && -+ !trans->nr_wb_updates && -+ !trans->extra_journal_entries.nr) -+ goto out_reset; -+ -+ if (flags & BTREE_INSERT_GC_LOCK_HELD) -+ lockdep_assert_held(&c->gc_lock); -+ -+ ret = bch2_trans_commit_run_triggers(trans); -+ if (ret) -+ goto out_reset; -+ -+ trans_for_each_update(trans, i) { -+ struct printbuf buf = PRINTBUF; -+ enum bkey_invalid_flags invalid_flags = 0; -+ -+ if (!(flags & BTREE_INSERT_JOURNAL_REPLAY)) -+ invalid_flags |= BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT; -+ -+ if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), -+ i->bkey_type, invalid_flags, &buf))) -+ ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf); -+ btree_insert_entry_checks(trans, i); -+ printbuf_exit(&buf); -+ -+ if (ret) -+ return ret; -+ } -+ -+ if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) { -+ ret = do_bch2_trans_commit_to_journal_replay(trans); -+ goto out_reset; -+ } -+ -+ if (!(flags & BTREE_INSERT_NOCHECK_RW) && -+ unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) { -+ ret = bch2_trans_commit_get_rw_cold(trans, flags); -+ if (ret) -+ goto out_reset; -+ } -+ -+ if (c->btree_write_buffer.state.nr > c->btree_write_buffer.size / 2 && -+ mutex_trylock(&c->btree_write_buffer.flush_lock)) { -+ bch2_trans_begin(trans); -+ bch2_trans_unlock(trans); -+ -+ ret = __bch2_btree_write_buffer_flush(trans, -+ flags|BTREE_INSERT_NOCHECK_RW, true); -+ if (!ret) { -+ trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_); -+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush); -+ } -+ goto out; -+ } -+ -+ EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags)); -+ -+ memset(&trans->journal_preres, 0, sizeof(trans->journal_preres)); -+ -+ trans->journal_u64s = trans->extra_journal_entries.nr; -+ trans->journal_preres_u64s = 0; -+ -+ trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names); -+ -+ if (trans->journal_transaction_names) -+ trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); -+ -+ trans_for_each_update(trans, i) { -+ EBUG_ON(!i->path->should_be_locked); -+ -+ ret = bch2_btree_path_upgrade(trans, i->path, i->level + 1); -+ if (unlikely(ret)) -+ goto out; -+ -+ EBUG_ON(!btree_node_intent_locked(i->path, i->level)); -+ -+ if (i->key_cache_already_flushed) -+ continue; -+ -+ /* we're going to journal the key being updated: */ -+ u64s = jset_u64s(i->k->k.u64s); -+ if (i->cached && -+ likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) -+ trans->journal_preres_u64s += u64s; -+ -+ if (i->flags & BTREE_UPDATE_NOJOURNAL) -+ continue; -+ -+ trans->journal_u64s += u64s; -+ -+ /* and we're also going to log the overwrite: */ -+ if (trans->journal_transaction_names) -+ trans->journal_u64s += jset_u64s(i->old_k.u64s); -+ } -+ -+ trans_for_each_wb_update(trans, wb) -+ trans->journal_u64s += jset_u64s(wb->k.k.u64s); -+ -+ if (trans->extra_journal_res) { -+ ret = bch2_disk_reservation_add(c, trans->disk_res, -+ trans->extra_journal_res, -+ (flags & BTREE_INSERT_NOFAIL) -+ ? BCH_DISK_RESERVATION_NOFAIL : 0); -+ if (ret) -+ goto err; -+ } -+retry: -+ bch2_trans_verify_not_in_restart(trans); -+ memset(&trans->journal_res, 0, sizeof(trans->journal_res)); -+ -+ ret = do_bch2_trans_commit(trans, flags, &i, _RET_IP_); -+ -+ /* make sure we didn't drop or screw up locks: */ -+ bch2_trans_verify_locks(trans); -+ -+ if (ret) -+ goto err; -+ -+ trace_and_count(c, transaction_commit, trans, _RET_IP_); -+out: -+ bch2_journal_preres_put(&c->journal, &trans->journal_preres); -+ -+ if (likely(!(flags & BTREE_INSERT_NOCHECK_RW))) -+ bch2_write_ref_put(c, BCH_WRITE_REF_trans); -+out_reset: -+ if (!ret) -+ bch2_trans_downgrade(trans); -+ bch2_trans_reset_updates(trans); -+ -+ return ret; -+err: -+ ret = bch2_trans_commit_error(trans, flags, i, ret, _RET_IP_); -+ if (ret) -+ goto out; -+ -+ goto retry; -+} -diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h -new file mode 100644 -index 000000000000..3ab773005484 ---- /dev/null -+++ b/fs/bcachefs/btree_types.h -@@ -0,0 +1,756 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BTREE_TYPES_H -+#define _BCACHEFS_BTREE_TYPES_H -+ -+#include -+#include -+ -+//#include "bkey_methods.h" -+#include "buckets_types.h" -+#include "darray.h" -+#include "errcode.h" -+#include "journal_types.h" -+#include "replicas_types.h" -+#include "six.h" -+ -+struct open_bucket; -+struct btree_update; -+struct btree_trans; -+ -+#define MAX_BSETS 3U -+ -+struct btree_nr_keys { -+ -+ /* -+ * Amount of live metadata (i.e. size of node after a compaction) in -+ * units of u64s -+ */ -+ u16 live_u64s; -+ u16 bset_u64s[MAX_BSETS]; -+ -+ /* live keys only: */ -+ u16 packed_keys; -+ u16 unpacked_keys; -+}; -+ -+struct bset_tree { -+ /* -+ * We construct a binary tree in an array as if the array -+ * started at 1, so that things line up on the same cachelines -+ * better: see comments in bset.c at cacheline_to_bkey() for -+ * details -+ */ -+ -+ /* size of the binary tree and prev array */ -+ u16 size; -+ -+ /* function of size - precalculated for to_inorder() */ -+ u16 extra; -+ -+ u16 data_offset; -+ u16 aux_data_offset; -+ u16 end_offset; -+}; -+ -+struct btree_write { -+ struct journal_entry_pin journal; -+}; -+ -+struct btree_alloc { -+ struct open_buckets ob; -+ __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); -+}; -+ -+struct btree_bkey_cached_common { -+ struct six_lock lock; -+ u8 level; -+ u8 btree_id; -+ bool cached; -+}; -+ -+struct btree { -+ struct btree_bkey_cached_common c; -+ -+ struct rhash_head hash; -+ u64 hash_val; -+ -+ unsigned long flags; -+ u16 written; -+ u8 nsets; -+ u8 nr_key_bits; -+ u16 version_ondisk; -+ -+ struct bkey_format format; -+ -+ struct btree_node *data; -+ void *aux_data; -+ -+ /* -+ * Sets of sorted keys - the real btree node - plus a binary search tree -+ * -+ * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point -+ * to the memory we have allocated for this btree node. Additionally, -+ * set[0]->data points to the entire btree node as it exists on disk. -+ */ -+ struct bset_tree set[MAX_BSETS]; -+ -+ struct btree_nr_keys nr; -+ u16 sib_u64s[2]; -+ u16 whiteout_u64s; -+ u8 byte_order; -+ u8 unpack_fn_len; -+ -+ struct btree_write writes[2]; -+ -+ /* Key/pointer for this btree node */ -+ __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); -+ -+ /* -+ * XXX: add a delete sequence number, so when bch2_btree_node_relock() -+ * fails because the lock sequence number has changed - i.e. the -+ * contents were modified - we can still relock the node if it's still -+ * the one we want, without redoing the traversal -+ */ -+ -+ /* -+ * For asynchronous splits/interior node updates: -+ * When we do a split, we allocate new child nodes and update the parent -+ * node to point to them: we update the parent in memory immediately, -+ * but then we must wait until the children have been written out before -+ * the update to the parent can be written - this is a list of the -+ * btree_updates that are blocking this node from being -+ * written: -+ */ -+ struct list_head write_blocked; -+ -+ /* -+ * Also for asynchronous splits/interior node updates: -+ * If a btree node isn't reachable yet, we don't want to kick off -+ * another write - because that write also won't yet be reachable and -+ * marking it as completed before it's reachable would be incorrect: -+ */ -+ unsigned long will_make_reachable; -+ -+ struct open_buckets ob; -+ -+ /* lru list */ -+ struct list_head list; -+}; -+ -+struct btree_cache { -+ struct rhashtable table; -+ bool table_init_done; -+ /* -+ * We never free a struct btree, except on shutdown - we just put it on -+ * the btree_cache_freed list and reuse it later. This simplifies the -+ * code, and it doesn't cost us much memory as the memory usage is -+ * dominated by buffers that hold the actual btree node data and those -+ * can be freed - and the number of struct btrees allocated is -+ * effectively bounded. -+ * -+ * btree_cache_freeable effectively is a small cache - we use it because -+ * high order page allocations can be rather expensive, and it's quite -+ * common to delete and allocate btree nodes in quick succession. It -+ * should never grow past ~2-3 nodes in practice. -+ */ -+ struct mutex lock; -+ struct list_head live; -+ struct list_head freeable; -+ struct list_head freed_pcpu; -+ struct list_head freed_nonpcpu; -+ -+ /* Number of elements in live + freeable lists */ -+ unsigned used; -+ unsigned reserve; -+ atomic_t dirty; -+ struct shrinker shrink; -+ -+ /* -+ * If we need to allocate memory for a new btree node and that -+ * allocation fails, we can cannibalize another node in the btree cache -+ * to satisfy the allocation - lock to guarantee only one thread does -+ * this at a time: -+ */ -+ struct task_struct *alloc_lock; -+ struct closure_waitlist alloc_wait; -+}; -+ -+struct btree_node_iter { -+ struct btree_node_iter_set { -+ u16 k, end; -+ } data[MAX_BSETS]; -+}; -+ -+/* -+ * Iterate over all possible positions, synthesizing deleted keys for holes: -+ */ -+static const __maybe_unused u16 BTREE_ITER_SLOTS = 1 << 0; -+static const __maybe_unused u16 BTREE_ITER_ALL_LEVELS = 1 << 1; -+/* -+ * Indicates that intent locks should be taken on leaf nodes, because we expect -+ * to be doing updates: -+ */ -+static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 2; -+/* -+ * Causes the btree iterator code to prefetch additional btree nodes from disk: -+ */ -+static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 3; -+/* -+ * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for -+ * @pos or the first key strictly greater than @pos -+ */ -+static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 4; -+static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 5; -+static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 6; -+static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 7; -+static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 8; -+static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 9; -+static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 10; -+static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 11; -+static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 12; -+static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 13; -+static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 14; -+static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 15; -+#define __BTREE_ITER_FLAGS_END 16 -+ -+enum btree_path_uptodate { -+ BTREE_ITER_UPTODATE = 0, -+ BTREE_ITER_NEED_RELOCK = 1, -+ BTREE_ITER_NEED_TRAVERSE = 2, -+}; -+ -+#if defined(CONFIG_BCACHEFS_LOCK_TIME_STATS) || defined(CONFIG_BCACHEFS_DEBUG) -+#define TRACK_PATH_ALLOCATED -+#endif -+ -+struct btree_path { -+ u8 idx; -+ u8 sorted_idx; -+ u8 ref; -+ u8 intent_ref; -+ u32 alloc_seq; -+ u32 downgrade_seq; -+ -+ /* btree_iter_copy starts here: */ -+ struct bpos pos; -+ -+ enum btree_id btree_id:5; -+ bool cached:1; -+ bool preserve:1; -+ enum btree_path_uptodate uptodate:2; -+ /* -+ * When true, failing to relock this path will cause the transaction to -+ * restart: -+ */ -+ bool should_be_locked:1; -+ unsigned level:3, -+ locks_want:3; -+ u8 nodes_locked; -+ -+ struct btree_path_level { -+ struct btree *b; -+ struct btree_node_iter iter; -+ u32 lock_seq; -+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS -+ u64 lock_taken_time; -+#endif -+ } l[BTREE_MAX_DEPTH]; -+#ifdef TRACK_PATH_ALLOCATED -+ unsigned long ip_allocated; -+#endif -+}; -+ -+static inline struct btree_path_level *path_l(struct btree_path *path) -+{ -+ return path->l + path->level; -+} -+ -+static inline unsigned long btree_path_ip_allocated(struct btree_path *path) -+{ -+#ifdef TRACK_PATH_ALLOCATED -+ return path->ip_allocated; -+#else -+ return _THIS_IP_; -+#endif -+} -+ -+/* -+ * @pos - iterator's current position -+ * @level - current btree depth -+ * @locks_want - btree level below which we start taking intent locks -+ * @nodes_locked - bitmask indicating which nodes in @nodes are locked -+ * @nodes_intent_locked - bitmask indicating which locks are intent locks -+ */ -+struct btree_iter { -+ struct btree_trans *trans; -+ struct btree_path *path; -+ struct btree_path *update_path; -+ struct btree_path *key_cache_path; -+ -+ enum btree_id btree_id:8; -+ unsigned min_depth:3; -+ unsigned advanced:1; -+ -+ /* btree_iter_copy starts here: */ -+ u16 flags; -+ -+ /* When we're filtering by snapshot, the snapshot ID we're looking for: */ -+ unsigned snapshot; -+ -+ struct bpos pos; -+ /* -+ * Current unpacked key - so that bch2_btree_iter_next()/ -+ * bch2_btree_iter_next_slot() can correctly advance pos. -+ */ -+ struct bkey k; -+ -+ /* BTREE_ITER_WITH_JOURNAL: */ -+ size_t journal_idx; -+ struct bpos journal_pos; -+#ifdef TRACK_PATH_ALLOCATED -+ unsigned long ip_allocated; -+#endif -+}; -+ -+struct btree_key_cache_freelist { -+ struct bkey_cached *objs[16]; -+ unsigned nr; -+}; -+ -+struct btree_key_cache { -+ struct mutex lock; -+ struct rhashtable table; -+ bool table_init_done; -+ struct list_head freed_pcpu; -+ struct list_head freed_nonpcpu; -+ struct shrinker shrink; -+ unsigned shrink_iter; -+ struct btree_key_cache_freelist __percpu *pcpu_freed; -+ -+ atomic_long_t nr_freed; -+ atomic_long_t nr_keys; -+ atomic_long_t nr_dirty; -+}; -+ -+struct bkey_cached_key { -+ u32 btree_id; -+ struct bpos pos; -+} __packed __aligned(4); -+ -+#define BKEY_CACHED_ACCESSED 0 -+#define BKEY_CACHED_DIRTY 1 -+ -+struct bkey_cached { -+ struct btree_bkey_cached_common c; -+ -+ unsigned long flags; -+ u16 u64s; -+ bool valid; -+ u32 btree_trans_barrier_seq; -+ struct bkey_cached_key key; -+ -+ struct rhash_head hash; -+ struct list_head list; -+ -+ struct journal_preres res; -+ struct journal_entry_pin journal; -+ u64 seq; -+ -+ struct bkey_i *k; -+}; -+ -+static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b) -+{ -+ return !b->cached -+ ? container_of(b, struct btree, c)->key.k.p -+ : container_of(b, struct bkey_cached, c)->key.pos; -+} -+ -+struct btree_insert_entry { -+ unsigned flags; -+ u8 bkey_type; -+ enum btree_id btree_id:8; -+ u8 level:4; -+ bool cached:1; -+ bool insert_trigger_run:1; -+ bool overwrite_trigger_run:1; -+ bool key_cache_already_flushed:1; -+ /* -+ * @old_k may be a key from the journal; @old_btree_u64s always refers -+ * to the size of the key being overwritten in the btree: -+ */ -+ u8 old_btree_u64s; -+ struct bkey_i *k; -+ struct btree_path *path; -+ u64 seq; -+ /* key being overwritten: */ -+ struct bkey old_k; -+ const struct bch_val *old_v; -+ unsigned long ip_allocated; -+}; -+ -+#ifndef CONFIG_LOCKDEP -+#define BTREE_ITER_MAX 64 -+#else -+#define BTREE_ITER_MAX 32 -+#endif -+ -+struct btree_trans_commit_hook; -+typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *); -+ -+struct btree_trans_commit_hook { -+ btree_trans_commit_hook_fn *fn; -+ struct btree_trans_commit_hook *next; -+}; -+ -+#define BTREE_TRANS_MEM_MAX (1U << 16) -+ -+#define BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS 10000 -+ -+struct btree_trans { -+ struct bch_fs *c; -+ const char *fn; -+ struct closure ref; -+ struct list_head list; -+ u64 last_begin_time; -+ -+ u8 lock_may_not_fail; -+ u8 lock_must_abort; -+ struct btree_bkey_cached_common *locking; -+ struct six_lock_waiter locking_wait; -+ -+ int srcu_idx; -+ -+ u8 fn_idx; -+ u8 nr_sorted; -+ u8 nr_updates; -+ u8 nr_wb_updates; -+ u8 wb_updates_size; -+ bool srcu_held:1; -+ bool used_mempool:1; -+ bool in_traverse_all:1; -+ bool paths_sorted:1; -+ bool memory_allocation_failure:1; -+ bool journal_transaction_names:1; -+ bool journal_replay_not_finished:1; -+ bool notrace_relock_fail:1; -+ enum bch_errcode restarted:16; -+ u32 restart_count; -+ unsigned long last_begin_ip; -+ unsigned long last_restarted_ip; -+ unsigned long srcu_lock_time; -+ -+ /* -+ * For when bch2_trans_update notices we'll be splitting a compressed -+ * extent: -+ */ -+ unsigned extra_journal_res; -+ unsigned nr_max_paths; -+ -+ u64 paths_allocated; -+ -+ unsigned mem_top; -+ unsigned mem_max; -+ unsigned mem_bytes; -+ void *mem; -+ -+ u8 sorted[BTREE_ITER_MAX + 8]; -+ struct btree_path paths[BTREE_ITER_MAX]; -+ struct btree_insert_entry updates[BTREE_ITER_MAX]; -+ struct btree_write_buffered_key *wb_updates; -+ -+ /* update path: */ -+ struct btree_trans_commit_hook *hooks; -+ darray_u64 extra_journal_entries; -+ struct journal_entry_pin *journal_pin; -+ -+ struct journal_res journal_res; -+ struct journal_preres journal_preres; -+ u64 *journal_seq; -+ struct disk_reservation *disk_res; -+ unsigned journal_u64s; -+ unsigned journal_preres_u64s; -+ struct replicas_delta_list *fs_usage_deltas; -+}; -+ -+#define BCH_BTREE_WRITE_TYPES() \ -+ x(initial, 0) \ -+ x(init_next_bset, 1) \ -+ x(cache_reclaim, 2) \ -+ x(journal_reclaim, 3) \ -+ x(interior, 4) -+ -+enum btree_write_type { -+#define x(t, n) BTREE_WRITE_##t, -+ BCH_BTREE_WRITE_TYPES() -+#undef x -+ BTREE_WRITE_TYPE_NR, -+}; -+ -+#define BTREE_WRITE_TYPE_MASK (roundup_pow_of_two(BTREE_WRITE_TYPE_NR) - 1) -+#define BTREE_WRITE_TYPE_BITS ilog2(roundup_pow_of_two(BTREE_WRITE_TYPE_NR)) -+ -+#define BTREE_FLAGS() \ -+ x(read_in_flight) \ -+ x(read_error) \ -+ x(dirty) \ -+ x(need_write) \ -+ x(write_blocked) \ -+ x(will_make_reachable) \ -+ x(noevict) \ -+ x(write_idx) \ -+ x(accessed) \ -+ x(write_in_flight) \ -+ x(write_in_flight_inner) \ -+ x(just_written) \ -+ x(dying) \ -+ x(fake) \ -+ x(need_rewrite) \ -+ x(never_write) -+ -+enum btree_flags { -+ /* First bits for btree node write type */ -+ BTREE_NODE_FLAGS_START = BTREE_WRITE_TYPE_BITS - 1, -+#define x(flag) BTREE_NODE_##flag, -+ BTREE_FLAGS() -+#undef x -+}; -+ -+#define x(flag) \ -+static inline bool btree_node_ ## flag(struct btree *b) \ -+{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \ -+ \ -+static inline void set_btree_node_ ## flag(struct btree *b) \ -+{ set_bit(BTREE_NODE_ ## flag, &b->flags); } \ -+ \ -+static inline void clear_btree_node_ ## flag(struct btree *b) \ -+{ clear_bit(BTREE_NODE_ ## flag, &b->flags); } -+ -+BTREE_FLAGS() -+#undef x -+ -+static inline struct btree_write *btree_current_write(struct btree *b) -+{ -+ return b->writes + btree_node_write_idx(b); -+} -+ -+static inline struct btree_write *btree_prev_write(struct btree *b) -+{ -+ return b->writes + (btree_node_write_idx(b) ^ 1); -+} -+ -+static inline struct bset_tree *bset_tree_last(struct btree *b) -+{ -+ EBUG_ON(!b->nsets); -+ return b->set + b->nsets - 1; -+} -+ -+static inline void * -+__btree_node_offset_to_ptr(const struct btree *b, u16 offset) -+{ -+ return (void *) ((u64 *) b->data + 1 + offset); -+} -+ -+static inline u16 -+__btree_node_ptr_to_offset(const struct btree *b, const void *p) -+{ -+ u16 ret = (u64 *) p - 1 - (u64 *) b->data; -+ -+ EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p); -+ return ret; -+} -+ -+static inline struct bset *bset(const struct btree *b, -+ const struct bset_tree *t) -+{ -+ return __btree_node_offset_to_ptr(b, t->data_offset); -+} -+ -+static inline void set_btree_bset_end(struct btree *b, struct bset_tree *t) -+{ -+ t->end_offset = -+ __btree_node_ptr_to_offset(b, vstruct_last(bset(b, t))); -+} -+ -+static inline void set_btree_bset(struct btree *b, struct bset_tree *t, -+ const struct bset *i) -+{ -+ t->data_offset = __btree_node_ptr_to_offset(b, i); -+ set_btree_bset_end(b, t); -+} -+ -+static inline struct bset *btree_bset_first(struct btree *b) -+{ -+ return bset(b, b->set); -+} -+ -+static inline struct bset *btree_bset_last(struct btree *b) -+{ -+ return bset(b, bset_tree_last(b)); -+} -+ -+static inline u16 -+__btree_node_key_to_offset(const struct btree *b, const struct bkey_packed *k) -+{ -+ return __btree_node_ptr_to_offset(b, k); -+} -+ -+static inline struct bkey_packed * -+__btree_node_offset_to_key(const struct btree *b, u16 k) -+{ -+ return __btree_node_offset_to_ptr(b, k); -+} -+ -+static inline unsigned btree_bkey_first_offset(const struct bset_tree *t) -+{ -+ return t->data_offset + offsetof(struct bset, _data) / sizeof(u64); -+} -+ -+#define btree_bkey_first(_b, _t) \ -+({ \ -+ EBUG_ON(bset(_b, _t)->start != \ -+ __btree_node_offset_to_key(_b, btree_bkey_first_offset(_t)));\ -+ \ -+ bset(_b, _t)->start; \ -+}) -+ -+#define btree_bkey_last(_b, _t) \ -+({ \ -+ EBUG_ON(__btree_node_offset_to_key(_b, (_t)->end_offset) != \ -+ vstruct_last(bset(_b, _t))); \ -+ \ -+ __btree_node_offset_to_key(_b, (_t)->end_offset); \ -+}) -+ -+static inline unsigned bset_u64s(struct bset_tree *t) -+{ -+ return t->end_offset - t->data_offset - -+ sizeof(struct bset) / sizeof(u64); -+} -+ -+static inline unsigned bset_dead_u64s(struct btree *b, struct bset_tree *t) -+{ -+ return bset_u64s(t) - b->nr.bset_u64s[t - b->set]; -+} -+ -+static inline unsigned bset_byte_offset(struct btree *b, void *i) -+{ -+ return i - (void *) b->data; -+} -+ -+enum btree_node_type { -+ BKEY_TYPE_btree, -+#define x(kwd, val, ...) BKEY_TYPE_##kwd = val + 1, -+ BCH_BTREE_IDS() -+#undef x -+ BKEY_TYPE_NR -+}; -+ -+/* Type of a key in btree @id at level @level: */ -+static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id) -+{ -+ return level ? BKEY_TYPE_btree : (unsigned) id + 1; -+} -+ -+/* Type of keys @b contains: */ -+static inline enum btree_node_type btree_node_type(struct btree *b) -+{ -+ return __btree_node_type(b->c.level, b->c.btree_id); -+} -+ -+const char *bch2_btree_node_type_str(enum btree_node_type); -+ -+#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ -+ (BIT_ULL(BKEY_TYPE_extents)| \ -+ BIT_ULL(BKEY_TYPE_alloc)| \ -+ BIT_ULL(BKEY_TYPE_inodes)| \ -+ BIT_ULL(BKEY_TYPE_stripes)| \ -+ BIT_ULL(BKEY_TYPE_reflink)| \ -+ BIT_ULL(BKEY_TYPE_btree)) -+ -+#define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \ -+ (BIT_ULL(BKEY_TYPE_alloc)| \ -+ BIT_ULL(BKEY_TYPE_inodes)| \ -+ BIT_ULL(BKEY_TYPE_stripes)| \ -+ BIT_ULL(BKEY_TYPE_snapshots)) -+ -+#define BTREE_NODE_TYPE_HAS_TRIGGERS \ -+ (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ -+ BTREE_NODE_TYPE_HAS_MEM_TRIGGERS) -+ -+static inline bool btree_node_type_needs_gc(enum btree_node_type type) -+{ -+ return BTREE_NODE_TYPE_HAS_TRIGGERS & BIT_ULL(type); -+} -+ -+static inline bool btree_node_type_is_extents(enum btree_node_type type) -+{ -+ const unsigned mask = 0 -+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1)) -+ BCH_BTREE_IDS() -+#undef x -+ ; -+ -+ return (1U << type) & mask; -+} -+ -+static inline bool btree_id_is_extents(enum btree_id btree) -+{ -+ return btree_node_type_is_extents(__btree_node_type(0, btree)); -+} -+ -+static inline bool btree_type_has_snapshots(enum btree_id id) -+{ -+ const unsigned mask = 0 -+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr) -+ BCH_BTREE_IDS() -+#undef x -+ ; -+ -+ return (1U << id) & mask; -+} -+ -+static inline bool btree_type_has_snapshot_field(enum btree_id id) -+{ -+ const unsigned mask = 0 -+#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr) -+ BCH_BTREE_IDS() -+#undef x -+ ; -+ -+ return (1U << id) & mask; -+} -+ -+static inline bool btree_type_has_ptrs(enum btree_id id) -+{ -+ const unsigned mask = 0 -+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr) -+ BCH_BTREE_IDS() -+#undef x -+ ; -+ -+ return (1U << id) & mask; -+} -+ -+struct btree_root { -+ struct btree *b; -+ -+ /* On disk root - see async splits: */ -+ __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); -+ u8 level; -+ u8 alive; -+ s8 error; -+}; -+ -+enum btree_gc_coalesce_fail_reason { -+ BTREE_GC_COALESCE_FAIL_RESERVE_GET, -+ BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC, -+ BTREE_GC_COALESCE_FAIL_FORMAT_FITS, -+}; -+ -+enum btree_node_sibling { -+ btree_prev_sib, -+ btree_next_sib, -+}; -+ -+#endif /* _BCACHEFS_BTREE_TYPES_H */ -diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c -new file mode 100644 -index 000000000000..324767c0ddcc ---- /dev/null -+++ b/fs/bcachefs/btree_update.c -@@ -0,0 +1,933 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "btree_update.h" -+#include "btree_iter.h" -+#include "btree_journal_iter.h" -+#include "btree_locking.h" -+#include "buckets.h" -+#include "debug.h" -+#include "errcode.h" -+#include "error.h" -+#include "extents.h" -+#include "keylist.h" -+#include "snapshot.h" -+#include "trace.h" -+ -+static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, -+ const struct btree_insert_entry *r) -+{ -+ return cmp_int(l->btree_id, r->btree_id) ?: -+ cmp_int(l->cached, r->cached) ?: -+ -cmp_int(l->level, r->level) ?: -+ bpos_cmp(l->k->k.p, r->k->k.p); -+} -+ -+static int __must_check -+bch2_trans_update_by_path(struct btree_trans *, struct btree_path *, -+ struct bkey_i *, enum btree_update_flags, -+ unsigned long ip); -+ -+static noinline int extent_front_merge(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k, -+ struct bkey_i **insert, -+ enum btree_update_flags flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_i *update; -+ int ret; -+ -+ update = bch2_bkey_make_mut_noupdate(trans, k); -+ ret = PTR_ERR_OR_ZERO(update); -+ if (ret) -+ return ret; -+ -+ if (!bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert))) -+ return 0; -+ -+ ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p) ?: -+ bch2_key_has_snapshot_overwrites(trans, iter->btree_id, (*insert)->k.p); -+ if (ret < 0) -+ return ret; -+ if (ret) -+ return 0; -+ -+ ret = bch2_btree_delete_at(trans, iter, flags); -+ if (ret) -+ return ret; -+ -+ *insert = update; -+ return 0; -+} -+ -+static noinline int extent_back_merge(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_i *insert, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ int ret; -+ -+ ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?: -+ bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p); -+ if (ret < 0) -+ return ret; -+ if (ret) -+ return 0; -+ -+ bch2_bkey_merge(c, bkey_i_to_s(insert), k); -+ return 0; -+} -+ -+/* -+ * When deleting, check if we need to emit a whiteout (because we're overwriting -+ * something in an ancestor snapshot) -+ */ -+static int need_whiteout_for_snapshot(struct btree_trans *trans, -+ enum btree_id btree_id, struct bpos pos) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ u32 snapshot = pos.snapshot; -+ int ret; -+ -+ if (!bch2_snapshot_parent(trans->c, pos.snapshot)) -+ return 0; -+ -+ pos.snapshot++; -+ -+ for_each_btree_key_norestart(trans, iter, btree_id, pos, -+ BTREE_ITER_ALL_SNAPSHOTS| -+ BTREE_ITER_NOPRESERVE, k, ret) { -+ if (!bkey_eq(k.k->p, pos)) -+ break; -+ -+ if (bch2_snapshot_is_ancestor(trans->c, snapshot, -+ k.k->p.snapshot)) { -+ ret = !bkey_whiteout(k.k); -+ break; -+ } -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ return ret; -+} -+ -+int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, -+ enum btree_id id, -+ struct bpos old_pos, -+ struct bpos new_pos) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter old_iter, new_iter = { NULL }; -+ struct bkey_s_c old_k, new_k; -+ snapshot_id_list s; -+ struct bkey_i *update; -+ int ret = 0; -+ -+ if (!bch2_snapshot_has_children(c, old_pos.snapshot)) -+ return 0; -+ -+ darray_init(&s); -+ -+ bch2_trans_iter_init(trans, &old_iter, id, old_pos, -+ BTREE_ITER_NOT_EXTENTS| -+ BTREE_ITER_ALL_SNAPSHOTS); -+ while ((old_k = bch2_btree_iter_prev(&old_iter)).k && -+ !(ret = bkey_err(old_k)) && -+ bkey_eq(old_pos, old_k.k->p)) { -+ struct bpos whiteout_pos = -+ SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);; -+ -+ if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) || -+ snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot)) -+ continue; -+ -+ new_k = bch2_bkey_get_iter(trans, &new_iter, id, whiteout_pos, -+ BTREE_ITER_NOT_EXTENTS| -+ BTREE_ITER_INTENT); -+ ret = bkey_err(new_k); -+ if (ret) -+ break; -+ -+ if (new_k.k->type == KEY_TYPE_deleted) { -+ update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); -+ ret = PTR_ERR_OR_ZERO(update); -+ if (ret) -+ break; -+ -+ bkey_init(&update->k); -+ update->k.p = whiteout_pos; -+ update->k.type = KEY_TYPE_whiteout; -+ -+ ret = bch2_trans_update(trans, &new_iter, update, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+ } -+ bch2_trans_iter_exit(trans, &new_iter); -+ -+ ret = snapshot_list_add(c, &s, old_k.k->p.snapshot); -+ if (ret) -+ break; -+ } -+ bch2_trans_iter_exit(trans, &new_iter); -+ bch2_trans_iter_exit(trans, &old_iter); -+ darray_exit(&s); -+ -+ return ret; -+} -+ -+int bch2_trans_update_extent_overwrite(struct btree_trans *trans, -+ struct btree_iter *iter, -+ enum btree_update_flags flags, -+ struct bkey_s_c old, -+ struct bkey_s_c new) -+{ -+ enum btree_id btree_id = iter->btree_id; -+ struct bkey_i *update; -+ struct bpos new_start = bkey_start_pos(new.k); -+ bool front_split = bkey_lt(bkey_start_pos(old.k), new_start); -+ bool back_split = bkey_gt(old.k->p, new.k->p); -+ int ret = 0, compressed_sectors; -+ -+ /* -+ * If we're going to be splitting a compressed extent, note it -+ * so that __bch2_trans_commit() can increase our disk -+ * reservation: -+ */ -+ if (((front_split && back_split) || -+ ((front_split || back_split) && old.k->p.snapshot != new.k->p.snapshot)) && -+ (compressed_sectors = bch2_bkey_sectors_compressed(old))) -+ trans->extra_journal_res += compressed_sectors; -+ -+ if (front_split) { -+ update = bch2_bkey_make_mut_noupdate(trans, old); -+ if ((ret = PTR_ERR_OR_ZERO(update))) -+ return ret; -+ -+ bch2_cut_back(new_start, update); -+ -+ ret = bch2_insert_snapshot_whiteouts(trans, btree_id, -+ old.k->p, update->k.p) ?: -+ bch2_btree_insert_nonextent(trans, btree_id, update, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); -+ if (ret) -+ return ret; -+ } -+ -+ /* If we're overwriting in a different snapshot - middle split: */ -+ if (old.k->p.snapshot != new.k->p.snapshot && -+ (front_split || back_split)) { -+ update = bch2_bkey_make_mut_noupdate(trans, old); -+ if ((ret = PTR_ERR_OR_ZERO(update))) -+ return ret; -+ -+ bch2_cut_front(new_start, update); -+ bch2_cut_back(new.k->p, update); -+ -+ ret = bch2_insert_snapshot_whiteouts(trans, btree_id, -+ old.k->p, update->k.p) ?: -+ bch2_btree_insert_nonextent(trans, btree_id, update, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); -+ if (ret) -+ return ret; -+ } -+ -+ if (bkey_le(old.k->p, new.k->p)) { -+ update = bch2_trans_kmalloc(trans, sizeof(*update)); -+ if ((ret = PTR_ERR_OR_ZERO(update))) -+ return ret; -+ -+ bkey_init(&update->k); -+ update->k.p = old.k->p; -+ update->k.p.snapshot = new.k->p.snapshot; -+ -+ if (new.k->p.snapshot != old.k->p.snapshot) { -+ update->k.type = KEY_TYPE_whiteout; -+ } else if (btree_type_has_snapshots(btree_id)) { -+ ret = need_whiteout_for_snapshot(trans, btree_id, update->k.p); -+ if (ret < 0) -+ return ret; -+ if (ret) -+ update->k.type = KEY_TYPE_whiteout; -+ } -+ -+ ret = bch2_btree_insert_nonextent(trans, btree_id, update, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); -+ if (ret) -+ return ret; -+ } -+ -+ if (back_split) { -+ update = bch2_bkey_make_mut_noupdate(trans, old); -+ if ((ret = PTR_ERR_OR_ZERO(update))) -+ return ret; -+ -+ bch2_cut_front(new.k->p, update); -+ -+ ret = bch2_trans_update_by_path(trans, iter->path, update, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| -+ flags, _RET_IP_); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int bch2_trans_update_extent(struct btree_trans *trans, -+ struct btree_iter *orig_iter, -+ struct bkey_i *insert, -+ enum btree_update_flags flags) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ enum btree_id btree_id = orig_iter->btree_id; -+ int ret = 0; -+ -+ bch2_trans_iter_init(trans, &iter, btree_id, bkey_start_pos(&insert->k), -+ BTREE_ITER_INTENT| -+ BTREE_ITER_WITH_UPDATES| -+ BTREE_ITER_NOT_EXTENTS); -+ k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); -+ if ((ret = bkey_err(k))) -+ goto err; -+ if (!k.k) -+ goto out; -+ -+ if (bkey_eq(k.k->p, bkey_start_pos(&insert->k))) { -+ if (bch2_bkey_maybe_mergable(k.k, &insert->k)) { -+ ret = extent_front_merge(trans, &iter, k, &insert, flags); -+ if (ret) -+ goto err; -+ } -+ -+ goto next; -+ } -+ -+ while (bkey_gt(insert->k.p, bkey_start_pos(k.k))) { -+ bool done = bkey_lt(insert->k.p, k.k->p); -+ -+ ret = bch2_trans_update_extent_overwrite(trans, &iter, flags, k, bkey_i_to_s_c(insert)); -+ if (ret) -+ goto err; -+ -+ if (done) -+ goto out; -+next: -+ bch2_btree_iter_advance(&iter); -+ k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); -+ if ((ret = bkey_err(k))) -+ goto err; -+ if (!k.k) -+ goto out; -+ } -+ -+ if (bch2_bkey_maybe_mergable(&insert->k, k.k)) { -+ ret = extent_back_merge(trans, &iter, insert, k); -+ if (ret) -+ goto err; -+ } -+out: -+ if (!bkey_deleted(&insert->k)) -+ ret = bch2_btree_insert_nonextent(trans, btree_id, insert, flags); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ -+ return ret; -+} -+ -+static noinline int flush_new_cached_update(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree_insert_entry *i, -+ enum btree_update_flags flags, -+ unsigned long ip) -+{ -+ struct btree_path *btree_path; -+ struct bkey k; -+ int ret; -+ -+ btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0, -+ BTREE_ITER_INTENT, _THIS_IP_); -+ ret = bch2_btree_path_traverse(trans, btree_path, 0); -+ if (ret) -+ goto out; -+ -+ /* -+ * The old key in the insert entry might actually refer to an existing -+ * key in the btree that has been deleted from cache and not yet -+ * flushed. Check for this and skip the flush so we don't run triggers -+ * against a stale key. -+ */ -+ bch2_btree_path_peek_slot_exact(btree_path, &k); -+ if (!bkey_deleted(&k)) -+ goto out; -+ -+ i->key_cache_already_flushed = true; -+ i->flags |= BTREE_TRIGGER_NORUN; -+ -+ btree_path_set_should_be_locked(btree_path); -+ ret = bch2_trans_update_by_path(trans, btree_path, i->k, flags, ip); -+out: -+ bch2_path_put(trans, btree_path, true); -+ return ret; -+} -+ -+static int __must_check -+bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, -+ struct bkey_i *k, enum btree_update_flags flags, -+ unsigned long ip) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_insert_entry *i, n; -+ u64 seq = 0; -+ int cmp; -+ -+ EBUG_ON(!path->should_be_locked); -+ EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX); -+ EBUG_ON(!bpos_eq(k->k.p, path->pos)); -+ -+ /* -+ * The transaction journal res hasn't been allocated at this point. -+ * That occurs at commit time. Reuse the seq field to pass in the seq -+ * of a prejournaled key. -+ */ -+ if (flags & BTREE_UPDATE_PREJOURNAL) -+ seq = trans->journal_res.seq; -+ -+ n = (struct btree_insert_entry) { -+ .flags = flags, -+ .bkey_type = __btree_node_type(path->level, path->btree_id), -+ .btree_id = path->btree_id, -+ .level = path->level, -+ .cached = path->cached, -+ .path = path, -+ .k = k, -+ .seq = seq, -+ .ip_allocated = ip, -+ }; -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ trans_for_each_update(trans, i) -+ BUG_ON(i != trans->updates && -+ btree_insert_entry_cmp(i - 1, i) >= 0); -+#endif -+ -+ /* -+ * Pending updates are kept sorted: first, find position of new update, -+ * then delete/trim any updates the new update overwrites: -+ */ -+ trans_for_each_update(trans, i) { -+ cmp = btree_insert_entry_cmp(&n, i); -+ if (cmp <= 0) -+ break; -+ } -+ -+ if (!cmp && i < trans->updates + trans->nr_updates) { -+ EBUG_ON(i->insert_trigger_run || i->overwrite_trigger_run); -+ -+ bch2_path_put(trans, i->path, true); -+ i->flags = n.flags; -+ i->cached = n.cached; -+ i->k = n.k; -+ i->path = n.path; -+ i->seq = n.seq; -+ i->ip_allocated = n.ip_allocated; -+ } else { -+ array_insert_item(trans->updates, trans->nr_updates, -+ i - trans->updates, n); -+ -+ i->old_v = bch2_btree_path_peek_slot_exact(path, &i->old_k).v; -+ i->old_btree_u64s = !bkey_deleted(&i->old_k) ? i->old_k.u64s : 0; -+ -+ if (unlikely(trans->journal_replay_not_finished)) { -+ struct bkey_i *j_k = -+ bch2_journal_keys_peek_slot(c, n.btree_id, n.level, k->k.p); -+ -+ if (j_k) { -+ i->old_k = j_k->k; -+ i->old_v = &j_k->v; -+ } -+ } -+ } -+ -+ __btree_path_get(i->path, true); -+ -+ /* -+ * If a key is present in the key cache, it must also exist in the -+ * btree - this is necessary for cache coherency. When iterating over -+ * a btree that's cached in the key cache, the btree iter code checks -+ * the key cache - but the key has to exist in the btree for that to -+ * work: -+ */ -+ if (path->cached && bkey_deleted(&i->old_k)) -+ return flush_new_cached_update(trans, path, i, flags, ip); -+ -+ return 0; -+} -+ -+static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct btree_path *path) -+{ -+ if (!iter->key_cache_path || -+ !iter->key_cache_path->should_be_locked || -+ !bpos_eq(iter->key_cache_path->pos, iter->pos)) { -+ struct bkey_cached *ck; -+ int ret; -+ -+ if (!iter->key_cache_path) -+ iter->key_cache_path = -+ bch2_path_get(trans, path->btree_id, path->pos, 1, 0, -+ BTREE_ITER_INTENT| -+ BTREE_ITER_CACHED, _THIS_IP_); -+ -+ iter->key_cache_path = -+ bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos, -+ iter->flags & BTREE_ITER_INTENT, -+ _THIS_IP_); -+ -+ ret = bch2_btree_path_traverse(trans, iter->key_cache_path, -+ BTREE_ITER_CACHED); -+ if (unlikely(ret)) -+ return ret; -+ -+ ck = (void *) iter->key_cache_path->l[0].b; -+ -+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { -+ trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_); -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced); -+ } -+ -+ btree_path_set_should_be_locked(iter->key_cache_path); -+ } -+ -+ return 0; -+} -+ -+int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, -+ struct bkey_i *k, enum btree_update_flags flags) -+{ -+ struct btree_path *path = iter->update_path ?: iter->path; -+ int ret; -+ -+ if (iter->flags & BTREE_ITER_IS_EXTENTS) -+ return bch2_trans_update_extent(trans, iter, k, flags); -+ -+ if (bkey_deleted(&k->k) && -+ !(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) && -+ (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) { -+ ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p); -+ if (unlikely(ret < 0)) -+ return ret; -+ -+ if (ret) -+ k->k.type = KEY_TYPE_whiteout; -+ } -+ -+ /* -+ * Ensure that updates to cached btrees go to the key cache: -+ */ -+ if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) && -+ !path->cached && -+ !path->level && -+ btree_id_cached(trans->c, path->btree_id)) { -+ ret = bch2_trans_update_get_key_cache(trans, iter, path); -+ if (ret) -+ return ret; -+ -+ path = iter->key_cache_path; -+ } -+ -+ return bch2_trans_update_by_path(trans, path, k, flags, _RET_IP_); -+} -+ -+/* -+ * Add a transaction update for a key that has already been journaled. -+ */ -+int __must_check bch2_trans_update_seq(struct btree_trans *trans, u64 seq, -+ struct btree_iter *iter, struct bkey_i *k, -+ enum btree_update_flags flags) -+{ -+ trans->journal_res.seq = seq; -+ return bch2_trans_update(trans, iter, k, flags|BTREE_UPDATE_NOJOURNAL| -+ BTREE_UPDATE_PREJOURNAL); -+} -+ -+int __must_check bch2_trans_update_buffered(struct btree_trans *trans, -+ enum btree_id btree, -+ struct bkey_i *k) -+{ -+ struct btree_write_buffered_key *i; -+ int ret; -+ -+ EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size); -+ EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); -+ -+ trans_for_each_wb_update(trans, i) { -+ if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) { -+ bkey_copy(&i->k, k); -+ return 0; -+ } -+ } -+ -+ if (!trans->wb_updates || -+ trans->nr_wb_updates == trans->wb_updates_size) { -+ struct btree_write_buffered_key *u; -+ -+ if (trans->nr_wb_updates == trans->wb_updates_size) { -+ struct btree_transaction_stats *s = btree_trans_stats(trans); -+ -+ BUG_ON(trans->wb_updates_size > U8_MAX / 2); -+ trans->wb_updates_size = max(1, trans->wb_updates_size * 2); -+ if (s) -+ s->wb_updates_size = trans->wb_updates_size; -+ } -+ -+ u = bch2_trans_kmalloc_nomemzero(trans, -+ trans->wb_updates_size * -+ sizeof(struct btree_write_buffered_key)); -+ ret = PTR_ERR_OR_ZERO(u); -+ if (ret) -+ return ret; -+ -+ if (trans->nr_wb_updates) -+ memcpy(u, trans->wb_updates, trans->nr_wb_updates * -+ sizeof(struct btree_write_buffered_key)); -+ trans->wb_updates = u; -+ } -+ -+ trans->wb_updates[trans->nr_wb_updates] = (struct btree_write_buffered_key) { -+ .btree = btree, -+ }; -+ -+ bkey_copy(&trans->wb_updates[trans->nr_wb_updates].k, k); -+ trans->nr_wb_updates++; -+ -+ return 0; -+} -+ -+int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, -+ enum btree_id btree, struct bpos end) -+{ -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_INTENT); -+ k = bch2_btree_iter_prev(iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ bch2_btree_iter_advance(iter); -+ k = bch2_btree_iter_peek_slot(iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ BUG_ON(k.k->type != KEY_TYPE_deleted); -+ -+ if (bkey_gt(k.k->p, end)) { -+ ret = -BCH_ERR_ENOSPC_btree_slot; -+ goto err; -+ } -+ -+ return 0; -+err: -+ bch2_trans_iter_exit(trans, iter); -+ return ret; -+} -+ -+void bch2_trans_commit_hook(struct btree_trans *trans, -+ struct btree_trans_commit_hook *h) -+{ -+ h->next = trans->hooks; -+ trans->hooks = h; -+} -+ -+int bch2_btree_insert_nonextent(struct btree_trans *trans, -+ enum btree_id btree, struct bkey_i *k, -+ enum btree_update_flags flags) -+{ -+ struct btree_iter iter; -+ int ret; -+ -+ bch2_trans_iter_init(trans, &iter, btree, k->k.p, -+ BTREE_ITER_CACHED| -+ BTREE_ITER_NOT_EXTENTS| -+ BTREE_ITER_INTENT); -+ ret = bch2_btree_iter_traverse(&iter) ?: -+ bch2_trans_update(trans, &iter, k, flags); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id, -+ struct bkey_i *k, enum btree_update_flags flags) -+{ -+ struct btree_iter iter; -+ int ret; -+ -+ bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k), -+ BTREE_ITER_CACHED| -+ BTREE_ITER_INTENT); -+ ret = bch2_btree_iter_traverse(&iter) ?: -+ bch2_trans_update(trans, &iter, k, flags); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+/** -+ * bch2_btree_insert - insert keys into the extent btree -+ * @c: pointer to struct bch_fs -+ * @id: btree to insert into -+ * @k: key to insert -+ * @disk_res: must be non-NULL whenever inserting or potentially -+ * splitting data extents -+ * @flags: transaction commit flags -+ * -+ * Returns: 0 on success, error code on failure -+ */ -+int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, -+ struct disk_reservation *disk_res, int flags) -+{ -+ return bch2_trans_do(c, disk_res, NULL, flags, -+ bch2_btree_insert_trans(trans, id, k, 0)); -+} -+ -+int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter, -+ unsigned len, unsigned update_flags) -+{ -+ struct bkey_i *k; -+ -+ k = bch2_trans_kmalloc(trans, sizeof(*k)); -+ if (IS_ERR(k)) -+ return PTR_ERR(k); -+ -+ bkey_init(&k->k); -+ k->k.p = iter->pos; -+ bch2_key_resize(&k->k, len); -+ return bch2_trans_update(trans, iter, k, update_flags); -+} -+ -+int bch2_btree_delete_at(struct btree_trans *trans, -+ struct btree_iter *iter, unsigned update_flags) -+{ -+ return bch2_btree_delete_extent_at(trans, iter, 0, update_flags); -+} -+ -+int bch2_btree_delete_at_buffered(struct btree_trans *trans, -+ enum btree_id btree, struct bpos pos) -+{ -+ struct bkey_i *k; -+ -+ k = bch2_trans_kmalloc(trans, sizeof(*k)); -+ if (IS_ERR(k)) -+ return PTR_ERR(k); -+ -+ bkey_init(&k->k); -+ k->k.p = pos; -+ return bch2_trans_update_buffered(trans, btree, k); -+} -+ -+int bch2_btree_delete(struct btree_trans *trans, -+ enum btree_id btree, struct bpos pos, -+ unsigned update_flags) -+{ -+ struct btree_iter iter; -+ int ret; -+ -+ bch2_trans_iter_init(trans, &iter, btree, pos, -+ BTREE_ITER_CACHED| -+ BTREE_ITER_INTENT); -+ ret = bch2_btree_iter_traverse(&iter) ?: -+ bch2_btree_delete_at(trans, &iter, update_flags); -+ bch2_trans_iter_exit(trans, &iter); -+ -+ return ret; -+} -+ -+int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, -+ struct bpos start, struct bpos end, -+ unsigned update_flags, -+ u64 *journal_seq) -+{ -+ u32 restart_count = trans->restart_count; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT); -+ while ((k = bch2_btree_iter_peek_upto(&iter, end)).k) { -+ struct disk_reservation disk_res = -+ bch2_disk_reservation_init(trans->c, 0); -+ struct bkey_i delete; -+ -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ bkey_init(&delete.k); -+ -+ /* -+ * This could probably be more efficient for extents: -+ */ -+ -+ /* -+ * For extents, iter.pos won't necessarily be the same as -+ * bkey_start_pos(k.k) (for non extents they always will be the -+ * same). It's important that we delete starting from iter.pos -+ * because the range we want to delete could start in the middle -+ * of k. -+ * -+ * (bch2_btree_iter_peek() does guarantee that iter.pos >= -+ * bkey_start_pos(k.k)). -+ */ -+ delete.k.p = iter.pos; -+ -+ if (iter.flags & BTREE_ITER_IS_EXTENTS) -+ bch2_key_resize(&delete.k, -+ bpos_min(end, k.k->p).offset - -+ iter.pos.offset); -+ -+ ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?: -+ bch2_trans_commit(trans, &disk_res, journal_seq, -+ BTREE_INSERT_NOFAIL); -+ bch2_disk_reservation_put(trans->c, &disk_res); -+err: -+ /* -+ * the bch2_trans_begin() call is in a weird place because we -+ * need to call it after every transaction commit, to avoid path -+ * overflow, but don't want to call it if the delete operation -+ * is a no-op and we have no work to do: -+ */ -+ bch2_trans_begin(trans); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ ret = 0; -+ if (ret) -+ break; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ return ret ?: trans_was_restarted(trans, restart_count); -+} -+ -+/* -+ * bch_btree_delete_range - delete everything within a given range -+ * -+ * Range is a half open interval - [start, end) -+ */ -+int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, -+ struct bpos start, struct bpos end, -+ unsigned update_flags, -+ u64 *journal_seq) -+{ -+ int ret = bch2_trans_run(c, -+ bch2_btree_delete_range_trans(trans, id, start, end, -+ update_flags, journal_seq)); -+ if (ret == -BCH_ERR_transaction_restart_nested) -+ ret = 0; -+ return ret; -+} -+ -+int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, -+ struct bpos pos, bool set) -+{ -+ struct bkey_i *k; -+ int ret = 0; -+ -+ k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k)); -+ ret = PTR_ERR_OR_ZERO(k); -+ if (unlikely(ret)) -+ return ret; -+ -+ bkey_init(&k->k); -+ k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; -+ k->k.p = pos; -+ -+ return bch2_trans_update_buffered(trans, btree, k); -+} -+ -+__printf(2, 0) -+static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args) -+{ -+ struct printbuf buf = PRINTBUF; -+ struct jset_entry_log *l; -+ unsigned u64s; -+ int ret; -+ -+ prt_vprintf(&buf, fmt, args); -+ ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; -+ if (ret) -+ goto err; -+ -+ u64s = DIV_ROUND_UP(buf.pos, sizeof(u64)); -+ -+ ret = darray_make_room(entries, jset_u64s(u64s)); -+ if (ret) -+ goto err; -+ -+ l = (void *) &darray_top(*entries); -+ l->entry.u64s = cpu_to_le16(u64s); -+ l->entry.btree_id = 0; -+ l->entry.level = 1; -+ l->entry.type = BCH_JSET_ENTRY_log; -+ l->entry.pad[0] = 0; -+ l->entry.pad[1] = 0; -+ l->entry.pad[2] = 0; -+ memcpy(l->d, buf.buf, buf.pos); -+ while (buf.pos & 7) -+ l->d[buf.pos++] = '\0'; -+ -+ entries->nr += jset_u64s(u64s); -+err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+__printf(3, 0) -+static int -+__bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, -+ va_list args) -+{ -+ int ret; -+ -+ if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) { -+ ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args); -+ } else { -+ ret = bch2_trans_do(c, NULL, NULL, -+ BTREE_INSERT_LAZY_RW|commit_flags, -+ __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args)); -+ } -+ -+ return ret; -+} -+ -+__printf(2, 3) -+int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...) -+{ -+ va_list args; -+ int ret; -+ -+ va_start(args, fmt); -+ ret = __bch2_fs_log_msg(c, 0, fmt, args); -+ va_end(args); -+ return ret; -+} -+ -+/* -+ * Use for logging messages during recovery to enable reserved space and avoid -+ * blocking. -+ */ -+__printf(2, 3) -+int bch2_journal_log_msg(struct bch_fs *c, const char *fmt, ...) -+{ -+ va_list args; -+ int ret; -+ -+ va_start(args, fmt); -+ ret = __bch2_fs_log_msg(c, BCH_WATERMARK_reclaim, fmt, args); -+ va_end(args); -+ return ret; -+} -diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h -new file mode 100644 -index 000000000000..9816d2286540 ---- /dev/null -+++ b/fs/bcachefs/btree_update.h -@@ -0,0 +1,340 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BTREE_UPDATE_H -+#define _BCACHEFS_BTREE_UPDATE_H -+ -+#include "btree_iter.h" -+#include "journal.h" -+ -+struct bch_fs; -+struct btree; -+ -+void bch2_btree_node_prep_for_write(struct btree_trans *, -+ struct btree_path *, struct btree *); -+bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *, -+ struct btree *, struct btree_node_iter *, -+ struct bkey_i *); -+ -+int bch2_btree_node_flush0(struct journal *, struct journal_entry_pin *, u64); -+int bch2_btree_node_flush1(struct journal *, struct journal_entry_pin *, u64); -+void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64); -+ -+void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *, -+ struct bkey_i *, u64); -+ -+enum btree_insert_flags { -+ /* First bits for bch_watermark: */ -+ __BTREE_INSERT_NOFAIL = BCH_WATERMARK_BITS, -+ __BTREE_INSERT_NOCHECK_RW, -+ __BTREE_INSERT_LAZY_RW, -+ __BTREE_INSERT_JOURNAL_REPLAY, -+ __BTREE_INSERT_JOURNAL_RECLAIM, -+ __BTREE_INSERT_NOWAIT, -+ __BTREE_INSERT_GC_LOCK_HELD, -+ __BCH_HASH_SET_MUST_CREATE, -+ __BCH_HASH_SET_MUST_REPLACE, -+}; -+ -+/* Don't check for -ENOSPC: */ -+#define BTREE_INSERT_NOFAIL BIT(__BTREE_INSERT_NOFAIL) -+ -+#define BTREE_INSERT_NOCHECK_RW BIT(__BTREE_INSERT_NOCHECK_RW) -+#define BTREE_INSERT_LAZY_RW BIT(__BTREE_INSERT_LAZY_RW) -+ -+/* Insert is for journal replay - don't get journal reservations: */ -+#define BTREE_INSERT_JOURNAL_REPLAY BIT(__BTREE_INSERT_JOURNAL_REPLAY) -+ -+/* Insert is being called from journal reclaim path: */ -+#define BTREE_INSERT_JOURNAL_RECLAIM BIT(__BTREE_INSERT_JOURNAL_RECLAIM) -+ -+/* Don't block on allocation failure (for new btree nodes: */ -+#define BTREE_INSERT_NOWAIT BIT(__BTREE_INSERT_NOWAIT) -+#define BTREE_INSERT_GC_LOCK_HELD BIT(__BTREE_INSERT_GC_LOCK_HELD) -+ -+#define BCH_HASH_SET_MUST_CREATE BIT(__BCH_HASH_SET_MUST_CREATE) -+#define BCH_HASH_SET_MUST_REPLACE BIT(__BCH_HASH_SET_MUST_REPLACE) -+ -+int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *, -+ unsigned, unsigned); -+int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned); -+int bch2_btree_delete_at_buffered(struct btree_trans *, enum btree_id, struct bpos); -+int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned); -+ -+int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id, -+ struct bkey_i *, enum btree_update_flags); -+ -+int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *, -+ enum btree_update_flags); -+int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, -+ struct disk_reservation *, int flags); -+ -+int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id, -+ struct bpos, struct bpos, unsigned, u64 *); -+int bch2_btree_delete_range(struct bch_fs *, enum btree_id, -+ struct bpos, struct bpos, unsigned, u64 *); -+ -+int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool); -+ -+int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id, -+ struct bpos, struct bpos); -+ -+/* -+ * For use when splitting extents in existing snapshots: -+ * -+ * If @old_pos is an interior snapshot node, iterate over descendent snapshot -+ * nodes: for every descendent snapshot in whiche @old_pos is overwritten and -+ * not visible, emit a whiteout at @new_pos. -+ */ -+static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans, -+ enum btree_id btree, -+ struct bpos old_pos, -+ struct bpos new_pos) -+{ -+ if (!btree_type_has_snapshots(btree) || -+ bkey_eq(old_pos, new_pos)) -+ return 0; -+ -+ return __bch2_insert_snapshot_whiteouts(trans, btree, old_pos, new_pos); -+} -+ -+int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *, -+ enum btree_update_flags, -+ struct bkey_s_c, struct bkey_s_c); -+ -+int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *, -+ enum btree_id, struct bpos); -+ -+int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *, -+ struct bkey_i *, enum btree_update_flags); -+int __must_check bch2_trans_update_seq(struct btree_trans *, u64, struct btree_iter *, -+ struct bkey_i *, enum btree_update_flags); -+int __must_check bch2_trans_update_buffered(struct btree_trans *, -+ enum btree_id, struct bkey_i *); -+ -+void bch2_trans_commit_hook(struct btree_trans *, -+ struct btree_trans_commit_hook *); -+int __bch2_trans_commit(struct btree_trans *, unsigned); -+ -+__printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...); -+__printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...); -+ -+/** -+ * bch2_trans_commit - insert keys at given iterator positions -+ * -+ * This is main entry point for btree updates. -+ * -+ * Return values: -+ * -EROFS: filesystem read only -+ * -EIO: journal or btree node IO error -+ */ -+static inline int bch2_trans_commit(struct btree_trans *trans, -+ struct disk_reservation *disk_res, -+ u64 *journal_seq, -+ unsigned flags) -+{ -+ trans->disk_res = disk_res; -+ trans->journal_seq = journal_seq; -+ -+ return __bch2_trans_commit(trans, flags); -+} -+ -+#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \ -+ lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ -+ (_journal_seq), (_flags))) -+ -+#define nested_commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \ -+ nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ -+ (_journal_seq), (_flags))) -+ -+#define bch2_trans_run(_c, _do) \ -+({ \ -+ struct btree_trans *trans = bch2_trans_get(_c); \ -+ int _ret = (_do); \ -+ bch2_trans_put(trans); \ -+ _ret; \ -+}) -+ -+#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \ -+ bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do)) -+ -+#define trans_for_each_update(_trans, _i) \ -+ for ((_i) = (_trans)->updates; \ -+ (_i) < (_trans)->updates + (_trans)->nr_updates; \ -+ (_i)++) -+ -+#define trans_for_each_wb_update(_trans, _i) \ -+ for ((_i) = (_trans)->wb_updates; \ -+ (_i) < (_trans)->wb_updates + (_trans)->nr_wb_updates; \ -+ (_i)++) -+ -+static inline void bch2_trans_reset_updates(struct btree_trans *trans) -+{ -+ struct btree_insert_entry *i; -+ -+ trans_for_each_update(trans, i) -+ bch2_path_put(trans, i->path, true); -+ -+ trans->extra_journal_res = 0; -+ trans->nr_updates = 0; -+ trans->nr_wb_updates = 0; -+ trans->wb_updates = NULL; -+ trans->hooks = NULL; -+ trans->extra_journal_entries.nr = 0; -+ -+ if (trans->fs_usage_deltas) { -+ trans->fs_usage_deltas->used = 0; -+ memset((void *) trans->fs_usage_deltas + -+ offsetof(struct replicas_delta_list, memset_start), 0, -+ (void *) &trans->fs_usage_deltas->memset_end - -+ (void *) &trans->fs_usage_deltas->memset_start); -+ } -+} -+ -+static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k, -+ unsigned type, unsigned min_bytes) -+{ -+ unsigned bytes = max_t(unsigned, min_bytes, bkey_bytes(k.k)); -+ struct bkey_i *mut; -+ -+ if (type && k.k->type != type) -+ return ERR_PTR(-ENOENT); -+ -+ mut = bch2_trans_kmalloc_nomemzero(trans, bytes); -+ if (!IS_ERR(mut)) { -+ bkey_reassemble(mut, k); -+ -+ if (unlikely(bytes > bkey_bytes(k.k))) { -+ memset((void *) mut + bkey_bytes(k.k), 0, -+ bytes - bkey_bytes(k.k)); -+ mut->k.u64s = DIV_ROUND_UP(bytes, sizeof(u64)); -+ } -+ } -+ return mut; -+} -+ -+static inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k) -+{ -+ return __bch2_bkey_make_mut_noupdate(trans, k, 0, 0); -+} -+ -+#define bch2_bkey_make_mut_noupdate_typed(_trans, _k, _type) \ -+ bkey_i_to_##_type(__bch2_bkey_make_mut_noupdate(_trans, _k, \ -+ KEY_TYPE_##_type, sizeof(struct bkey_i_##_type))) -+ -+static inline struct bkey_i *__bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter, -+ struct bkey_s_c *k, unsigned flags, -+ unsigned type, unsigned min_bytes) -+{ -+ struct bkey_i *mut = __bch2_bkey_make_mut_noupdate(trans, *k, type, min_bytes); -+ int ret; -+ -+ if (IS_ERR(mut)) -+ return mut; -+ -+ ret = bch2_trans_update(trans, iter, mut, flags); -+ if (ret) -+ return ERR_PTR(ret); -+ -+ *k = bkey_i_to_s_c(mut); -+ return mut; -+} -+ -+static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter, -+ struct bkey_s_c *k, unsigned flags) -+{ -+ return __bch2_bkey_make_mut(trans, iter, k, flags, 0, 0); -+} -+ -+#define bch2_bkey_make_mut_typed(_trans, _iter, _k, _flags, _type) \ -+ bkey_i_to_##_type(__bch2_bkey_make_mut(_trans, _iter, _k, _flags,\ -+ KEY_TYPE_##_type, sizeof(struct bkey_i_##_type))) -+ -+static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_trans *trans, -+ struct btree_iter *iter, -+ unsigned btree_id, struct bpos pos, -+ unsigned flags, unsigned type, unsigned min_bytes) -+{ -+ struct bkey_s_c k = __bch2_bkey_get_iter(trans, iter, -+ btree_id, pos, flags|BTREE_ITER_INTENT, type); -+ struct bkey_i *ret = IS_ERR(k.k) -+ ? ERR_CAST(k.k) -+ : __bch2_bkey_make_mut_noupdate(trans, k, 0, min_bytes); -+ if (IS_ERR(ret)) -+ bch2_trans_iter_exit(trans, iter); -+ return ret; -+} -+ -+static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_trans *trans, -+ struct btree_iter *iter, -+ unsigned btree_id, struct bpos pos, -+ unsigned flags) -+{ -+ return __bch2_bkey_get_mut_noupdate(trans, iter, btree_id, pos, flags, 0, 0); -+} -+ -+static inline struct bkey_i *__bch2_bkey_get_mut(struct btree_trans *trans, -+ struct btree_iter *iter, -+ unsigned btree_id, struct bpos pos, -+ unsigned flags, unsigned type, unsigned min_bytes) -+{ -+ struct bkey_i *mut = __bch2_bkey_get_mut_noupdate(trans, iter, -+ btree_id, pos, flags|BTREE_ITER_INTENT, type, min_bytes); -+ int ret; -+ -+ if (IS_ERR(mut)) -+ return mut; -+ -+ ret = bch2_trans_update(trans, iter, mut, flags); -+ if (ret) { -+ bch2_trans_iter_exit(trans, iter); -+ return ERR_PTR(ret); -+ } -+ -+ return mut; -+} -+ -+static inline struct bkey_i *bch2_bkey_get_mut_minsize(struct btree_trans *trans, -+ struct btree_iter *iter, -+ unsigned btree_id, struct bpos pos, -+ unsigned flags, unsigned min_bytes) -+{ -+ return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, min_bytes); -+} -+ -+static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans, -+ struct btree_iter *iter, -+ unsigned btree_id, struct bpos pos, -+ unsigned flags) -+{ -+ return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, 0); -+} -+ -+#define bch2_bkey_get_mut_typed(_trans, _iter, _btree_id, _pos, _flags, _type)\ -+ bkey_i_to_##_type(__bch2_bkey_get_mut(_trans, _iter, \ -+ _btree_id, _pos, _flags, \ -+ KEY_TYPE_##_type, sizeof(struct bkey_i_##_type))) -+ -+static inline struct bkey_i *__bch2_bkey_alloc(struct btree_trans *trans, struct btree_iter *iter, -+ unsigned flags, unsigned type, unsigned val_size) -+{ -+ struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k) + val_size); -+ int ret; -+ -+ if (IS_ERR(k)) -+ return k; -+ -+ bkey_init(&k->k); -+ k->k.p = iter->pos; -+ k->k.type = type; -+ set_bkey_val_bytes(&k->k, val_size); -+ -+ ret = bch2_trans_update(trans, iter, k, flags); -+ if (unlikely(ret)) -+ return ERR_PTR(ret); -+ return k; -+} -+ -+#define bch2_bkey_alloc(_trans, _iter, _flags, _type) \ -+ bkey_i_to_##_type(__bch2_bkey_alloc(_trans, _iter, _flags, \ -+ KEY_TYPE_##_type, sizeof(struct bch_##_type))) -+ -+#endif /* _BCACHEFS_BTREE_UPDATE_H */ -diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c -new file mode 100644 -index 000000000000..39c2db68123b ---- /dev/null -+++ b/fs/bcachefs/btree_update_interior.c -@@ -0,0 +1,2474 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "alloc_foreground.h" -+#include "bkey_methods.h" -+#include "btree_cache.h" -+#include "btree_gc.h" -+#include "btree_journal_iter.h" -+#include "btree_update.h" -+#include "btree_update_interior.h" -+#include "btree_io.h" -+#include "btree_iter.h" -+#include "btree_locking.h" -+#include "buckets.h" -+#include "clock.h" -+#include "error.h" -+#include "extents.h" -+#include "journal.h" -+#include "journal_reclaim.h" -+#include "keylist.h" -+#include "replicas.h" -+#include "super-io.h" -+#include "trace.h" -+ -+#include -+ -+static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *, -+ struct btree_path *, struct btree *, -+ struct keylist *, unsigned); -+static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); -+ -+static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans, -+ enum btree_id btree_id, -+ unsigned level, -+ struct bpos pos) -+{ -+ struct btree_path *path; -+ -+ path = bch2_path_get(trans, btree_id, pos, level + 1, level, -+ BTREE_ITER_NOPRESERVE| -+ BTREE_ITER_INTENT, _RET_IP_); -+ path = bch2_btree_path_make_mut(trans, path, true, _RET_IP_); -+ bch2_btree_path_downgrade(trans, path); -+ __bch2_btree_path_unlock(trans, path); -+ return path; -+} -+ -+/* Debug code: */ -+ -+/* -+ * Verify that child nodes correctly span parent node's range: -+ */ -+static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) -+{ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ struct bpos next_node = b->data->min_key; -+ struct btree_node_iter iter; -+ struct bkey_s_c k; -+ struct bkey_s_c_btree_ptr_v2 bp; -+ struct bkey unpacked; -+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; -+ -+ BUG_ON(!b->c.level); -+ -+ if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) -+ return; -+ -+ bch2_btree_node_iter_init_from_start(&iter, b); -+ -+ while (1) { -+ k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked); -+ if (k.k->type != KEY_TYPE_btree_ptr_v2) -+ break; -+ bp = bkey_s_c_to_btree_ptr_v2(k); -+ -+ if (!bpos_eq(next_node, bp.v->min_key)) { -+ bch2_dump_btree_node(c, b); -+ bch2_bpos_to_text(&buf1, next_node); -+ bch2_bpos_to_text(&buf2, bp.v->min_key); -+ panic("expected next min_key %s got %s\n", buf1.buf, buf2.buf); -+ } -+ -+ bch2_btree_node_iter_advance(&iter, b); -+ -+ if (bch2_btree_node_iter_end(&iter)) { -+ if (!bpos_eq(k.k->p, b->key.k.p)) { -+ bch2_dump_btree_node(c, b); -+ bch2_bpos_to_text(&buf1, b->key.k.p); -+ bch2_bpos_to_text(&buf2, k.k->p); -+ panic("expected end %s got %s\n", buf1.buf, buf2.buf); -+ } -+ break; -+ } -+ -+ next_node = bpos_successor(k.k->p); -+ } -+#endif -+} -+ -+/* Calculate ideal packed bkey format for new btree nodes: */ -+ -+void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b) -+{ -+ struct bkey_packed *k; -+ struct bset_tree *t; -+ struct bkey uk; -+ -+ for_each_bset(b, t) -+ bset_tree_for_each_key(b, t, k) -+ if (!bkey_deleted(k)) { -+ uk = bkey_unpack_key(b, k); -+ bch2_bkey_format_add_key(s, &uk); -+ } -+} -+ -+static struct bkey_format bch2_btree_calc_format(struct btree *b) -+{ -+ struct bkey_format_state s; -+ -+ bch2_bkey_format_init(&s); -+ bch2_bkey_format_add_pos(&s, b->data->min_key); -+ bch2_bkey_format_add_pos(&s, b->data->max_key); -+ __bch2_btree_calc_format(&s, b); -+ -+ return bch2_bkey_format_done(&s); -+} -+ -+static size_t btree_node_u64s_with_format(struct btree *b, -+ struct bkey_format *new_f) -+{ -+ struct bkey_format *old_f = &b->format; -+ -+ /* stupid integer promotion rules */ -+ ssize_t delta = -+ (((int) new_f->key_u64s - old_f->key_u64s) * -+ (int) b->nr.packed_keys) + -+ (((int) new_f->key_u64s - BKEY_U64s) * -+ (int) b->nr.unpacked_keys); -+ -+ BUG_ON(delta + b->nr.live_u64s < 0); -+ -+ return b->nr.live_u64s + delta; -+} -+ -+/** -+ * bch2_btree_node_format_fits - check if we could rewrite node with a new format -+ * -+ * @c: filesystem handle -+ * @b: btree node to rewrite -+ * @new_f: bkey format to translate keys to -+ * -+ * Returns: true if all re-packed keys will be able to fit in a new node. -+ * -+ * Assumes all keys will successfully pack with the new format. -+ */ -+bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b, -+ struct bkey_format *new_f) -+{ -+ size_t u64s = btree_node_u64s_with_format(b, new_f); -+ -+ return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c); -+} -+ -+/* Btree node freeing/allocation: */ -+ -+static void __btree_node_free(struct bch_fs *c, struct btree *b) -+{ -+ trace_and_count(c, btree_node_free, c, b); -+ -+ BUG_ON(btree_node_write_blocked(b)); -+ BUG_ON(btree_node_dirty(b)); -+ BUG_ON(btree_node_need_write(b)); -+ BUG_ON(b == btree_node_root(c, b)); -+ BUG_ON(b->ob.nr); -+ BUG_ON(!list_empty(&b->write_blocked)); -+ BUG_ON(b->will_make_reachable); -+ -+ clear_btree_node_noevict(b); -+ -+ mutex_lock(&c->btree_cache.lock); -+ list_move(&b->list, &c->btree_cache.freeable); -+ mutex_unlock(&c->btree_cache.lock); -+} -+ -+static void bch2_btree_node_free_inmem(struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree *b) -+{ -+ struct bch_fs *c = trans->c; -+ unsigned level = b->c.level; -+ -+ bch2_btree_node_lock_write_nofail(trans, path, &b->c); -+ bch2_btree_node_hash_remove(&c->btree_cache, b); -+ __btree_node_free(c, b); -+ six_unlock_write(&b->c.lock); -+ mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED); -+ -+ trans_for_each_path(trans, path) -+ if (path->l[level].b == b) { -+ btree_node_unlock(trans, path, level); -+ path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init); -+ } -+} -+ -+static void bch2_btree_node_free_never_used(struct btree_update *as, -+ struct btree_trans *trans, -+ struct btree *b) -+{ -+ struct bch_fs *c = as->c; -+ struct prealloc_nodes *p = &as->prealloc_nodes[b->c.lock.readers != NULL]; -+ struct btree_path *path; -+ unsigned level = b->c.level; -+ -+ BUG_ON(!list_empty(&b->write_blocked)); -+ BUG_ON(b->will_make_reachable != (1UL|(unsigned long) as)); -+ -+ b->will_make_reachable = 0; -+ closure_put(&as->cl); -+ -+ clear_btree_node_will_make_reachable(b); -+ clear_btree_node_accessed(b); -+ clear_btree_node_dirty_acct(c, b); -+ clear_btree_node_need_write(b); -+ -+ mutex_lock(&c->btree_cache.lock); -+ list_del_init(&b->list); -+ bch2_btree_node_hash_remove(&c->btree_cache, b); -+ mutex_unlock(&c->btree_cache.lock); -+ -+ BUG_ON(p->nr >= ARRAY_SIZE(p->b)); -+ p->b[p->nr++] = b; -+ -+ six_unlock_intent(&b->c.lock); -+ -+ trans_for_each_path(trans, path) -+ if (path->l[level].b == b) { -+ btree_node_unlock(trans, path, level); -+ path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init); -+ } -+} -+ -+static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, -+ struct disk_reservation *res, -+ struct closure *cl, -+ bool interior_node, -+ unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct write_point *wp; -+ struct btree *b; -+ BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; -+ struct open_buckets obs = { .nr = 0 }; -+ struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; -+ enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; -+ unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim -+ ? BTREE_NODE_RESERVE -+ : 0; -+ int ret; -+ -+ mutex_lock(&c->btree_reserve_cache_lock); -+ if (c->btree_reserve_cache_nr > nr_reserve) { -+ struct btree_alloc *a = -+ &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; -+ -+ obs = a->ob; -+ bkey_copy(&tmp.k, &a->k); -+ mutex_unlock(&c->btree_reserve_cache_lock); -+ goto mem_alloc; -+ } -+ mutex_unlock(&c->btree_reserve_cache_lock); -+ -+retry: -+ ret = bch2_alloc_sectors_start_trans(trans, -+ c->opts.metadata_target ?: -+ c->opts.foreground_target, -+ 0, -+ writepoint_ptr(&c->btree_write_point), -+ &devs_have, -+ res->nr_replicas, -+ c->opts.metadata_replicas_required, -+ watermark, 0, cl, &wp); -+ if (unlikely(ret)) -+ return ERR_PTR(ret); -+ -+ if (wp->sectors_free < btree_sectors(c)) { -+ struct open_bucket *ob; -+ unsigned i; -+ -+ open_bucket_for_each(c, &wp->ptrs, ob, i) -+ if (ob->sectors_free < btree_sectors(c)) -+ ob->sectors_free = 0; -+ -+ bch2_alloc_sectors_done(c, wp); -+ goto retry; -+ } -+ -+ bkey_btree_ptr_v2_init(&tmp.k); -+ bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, btree_sectors(c), false); -+ -+ bch2_open_bucket_get(c, wp, &obs); -+ bch2_alloc_sectors_done(c, wp); -+mem_alloc: -+ b = bch2_btree_node_mem_alloc(trans, interior_node); -+ six_unlock_write(&b->c.lock); -+ six_unlock_intent(&b->c.lock); -+ -+ /* we hold cannibalize_lock: */ -+ BUG_ON(IS_ERR(b)); -+ BUG_ON(b->ob.nr); -+ -+ bkey_copy(&b->key, &tmp.k); -+ b->ob = obs; -+ -+ return b; -+} -+ -+static struct btree *bch2_btree_node_alloc(struct btree_update *as, -+ struct btree_trans *trans, -+ unsigned level) -+{ -+ struct bch_fs *c = as->c; -+ struct btree *b; -+ struct prealloc_nodes *p = &as->prealloc_nodes[!!level]; -+ int ret; -+ -+ BUG_ON(level >= BTREE_MAX_DEPTH); -+ BUG_ON(!p->nr); -+ -+ b = p->b[--p->nr]; -+ -+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); -+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); -+ -+ set_btree_node_accessed(b); -+ set_btree_node_dirty_acct(c, b); -+ set_btree_node_need_write(b); -+ -+ bch2_bset_init_first(b, &b->data->keys); -+ b->c.level = level; -+ b->c.btree_id = as->btree_id; -+ b->version_ondisk = c->sb.version; -+ -+ memset(&b->nr, 0, sizeof(b->nr)); -+ b->data->magic = cpu_to_le64(bset_magic(c)); -+ memset(&b->data->_ptr, 0, sizeof(b->data->_ptr)); -+ b->data->flags = 0; -+ SET_BTREE_NODE_ID(b->data, as->btree_id); -+ SET_BTREE_NODE_LEVEL(b->data, level); -+ -+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { -+ struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key); -+ -+ bp->v.mem_ptr = 0; -+ bp->v.seq = b->data->keys.seq; -+ bp->v.sectors_written = 0; -+ } -+ -+ SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true); -+ -+ bch2_btree_build_aux_trees(b); -+ -+ ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id); -+ BUG_ON(ret); -+ -+ trace_and_count(c, btree_node_alloc, c, b); -+ bch2_increment_clock(c, btree_sectors(c), WRITE); -+ return b; -+} -+ -+static void btree_set_min(struct btree *b, struct bpos pos) -+{ -+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) -+ bkey_i_to_btree_ptr_v2(&b->key)->v.min_key = pos; -+ b->data->min_key = pos; -+} -+ -+static void btree_set_max(struct btree *b, struct bpos pos) -+{ -+ b->key.k.p = pos; -+ b->data->max_key = pos; -+} -+ -+static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as, -+ struct btree_trans *trans, -+ struct btree *b) -+{ -+ struct btree *n = bch2_btree_node_alloc(as, trans, b->c.level); -+ struct bkey_format format = bch2_btree_calc_format(b); -+ -+ /* -+ * The keys might expand with the new format - if they wouldn't fit in -+ * the btree node anymore, use the old format for now: -+ */ -+ if (!bch2_btree_node_format_fits(as->c, b, &format)) -+ format = b->format; -+ -+ SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1); -+ -+ btree_set_min(n, b->data->min_key); -+ btree_set_max(n, b->data->max_key); -+ -+ n->data->format = format; -+ btree_node_set_format(n, format); -+ -+ bch2_btree_sort_into(as->c, n, b); -+ -+ btree_node_reset_sib_u64s(n); -+ return n; -+} -+ -+static struct btree *__btree_root_alloc(struct btree_update *as, -+ struct btree_trans *trans, unsigned level) -+{ -+ struct btree *b = bch2_btree_node_alloc(as, trans, level); -+ -+ btree_set_min(b, POS_MIN); -+ btree_set_max(b, SPOS_MAX); -+ b->data->format = bch2_btree_calc_format(b); -+ -+ btree_node_set_format(b, b->data->format); -+ bch2_btree_build_aux_trees(b); -+ -+ return b; -+} -+ -+static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *trans) -+{ -+ struct bch_fs *c = as->c; -+ struct prealloc_nodes *p; -+ -+ for (p = as->prealloc_nodes; -+ p < as->prealloc_nodes + ARRAY_SIZE(as->prealloc_nodes); -+ p++) { -+ while (p->nr) { -+ struct btree *b = p->b[--p->nr]; -+ -+ mutex_lock(&c->btree_reserve_cache_lock); -+ -+ if (c->btree_reserve_cache_nr < -+ ARRAY_SIZE(c->btree_reserve_cache)) { -+ struct btree_alloc *a = -+ &c->btree_reserve_cache[c->btree_reserve_cache_nr++]; -+ -+ a->ob = b->ob; -+ b->ob.nr = 0; -+ bkey_copy(&a->k, &b->key); -+ } else { -+ bch2_open_buckets_put(c, &b->ob); -+ } -+ -+ mutex_unlock(&c->btree_reserve_cache_lock); -+ -+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); -+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); -+ __btree_node_free(c, b); -+ six_unlock_write(&b->c.lock); -+ six_unlock_intent(&b->c.lock); -+ } -+ } -+} -+ -+static int bch2_btree_reserve_get(struct btree_trans *trans, -+ struct btree_update *as, -+ unsigned nr_nodes[2], -+ unsigned flags, -+ struct closure *cl) -+{ -+ struct bch_fs *c = as->c; -+ struct btree *b; -+ unsigned interior; -+ int ret = 0; -+ -+ BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX); -+ -+ /* -+ * Protects reaping from the btree node cache and using the btree node -+ * open bucket reserve: -+ * -+ * BTREE_INSERT_NOWAIT only applies to btree node allocation, not -+ * blocking on this lock: -+ */ -+ ret = bch2_btree_cache_cannibalize_lock(c, cl); -+ if (ret) -+ return ret; -+ -+ for (interior = 0; interior < 2; interior++) { -+ struct prealloc_nodes *p = as->prealloc_nodes + interior; -+ -+ while (p->nr < nr_nodes[interior]) { -+ b = __bch2_btree_node_alloc(trans, &as->disk_res, -+ flags & BTREE_INSERT_NOWAIT ? NULL : cl, -+ interior, flags); -+ if (IS_ERR(b)) { -+ ret = PTR_ERR(b); -+ goto err; -+ } -+ -+ p->b[p->nr++] = b; -+ } -+ } -+err: -+ bch2_btree_cache_cannibalize_unlock(c); -+ return ret; -+} -+ -+/* Asynchronous interior node update machinery */ -+ -+static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *trans) -+{ -+ struct bch_fs *c = as->c; -+ -+ if (as->took_gc_lock) -+ up_read(&c->gc_lock); -+ as->took_gc_lock = false; -+ -+ bch2_journal_preres_put(&c->journal, &as->journal_preres); -+ -+ bch2_journal_pin_drop(&c->journal, &as->journal); -+ bch2_journal_pin_flush(&c->journal, &as->journal); -+ bch2_disk_reservation_put(c, &as->disk_res); -+ bch2_btree_reserve_put(as, trans); -+ -+ bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total], -+ as->start_time); -+ -+ mutex_lock(&c->btree_interior_update_lock); -+ list_del(&as->unwritten_list); -+ list_del(&as->list); -+ -+ closure_debug_destroy(&as->cl); -+ mempool_free(as, &c->btree_interior_update_pool); -+ -+ /* -+ * Have to do the wakeup with btree_interior_update_lock still held, -+ * since being on btree_interior_update_list is our ref on @c: -+ */ -+ closure_wake_up(&c->btree_interior_update_wait); -+ -+ mutex_unlock(&c->btree_interior_update_lock); -+} -+ -+static void btree_update_add_key(struct btree_update *as, -+ struct keylist *keys, struct btree *b) -+{ -+ struct bkey_i *k = &b->key; -+ -+ BUG_ON(bch2_keylist_u64s(keys) + k->k.u64s > -+ ARRAY_SIZE(as->_old_keys)); -+ -+ bkey_copy(keys->top, k); -+ bkey_i_to_btree_ptr_v2(keys->top)->v.mem_ptr = b->c.level + 1; -+ -+ bch2_keylist_push(keys); -+} -+ -+/* -+ * The transactional part of an interior btree node update, where we journal the -+ * update we did to the interior node and update alloc info: -+ */ -+static int btree_update_nodes_written_trans(struct btree_trans *trans, -+ struct btree_update *as) -+{ -+ struct bkey_i *k; -+ int ret; -+ -+ ret = darray_make_room(&trans->extra_journal_entries, as->journal_u64s); -+ if (ret) -+ return ret; -+ -+ memcpy(&darray_top(trans->extra_journal_entries), -+ as->journal_entries, -+ as->journal_u64s * sizeof(u64)); -+ trans->extra_journal_entries.nr += as->journal_u64s; -+ -+ trans->journal_pin = &as->journal; -+ -+ for_each_keylist_key(&as->old_keys, k) { -+ unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr; -+ -+ ret = bch2_trans_mark_old(trans, as->btree_id, level, bkey_i_to_s_c(k), 0); -+ if (ret) -+ return ret; -+ } -+ -+ for_each_keylist_key(&as->new_keys, k) { -+ unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr; -+ -+ ret = bch2_trans_mark_new(trans, as->btree_id, level, k, 0); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static void btree_update_nodes_written(struct btree_update *as) -+{ -+ struct bch_fs *c = as->c; -+ struct btree *b; -+ struct btree_trans *trans = bch2_trans_get(c); -+ u64 journal_seq = 0; -+ unsigned i; -+ int ret; -+ -+ /* -+ * If we're already in an error state, it might be because a btree node -+ * was never written, and we might be trying to free that same btree -+ * node here, but it won't have been marked as allocated and we'll see -+ * spurious disk usage inconsistencies in the transactional part below -+ * if we don't skip it: -+ */ -+ ret = bch2_journal_error(&c->journal); -+ if (ret) -+ goto err; -+ -+ /* -+ * Wait for any in flight writes to finish before we free the old nodes -+ * on disk: -+ */ -+ for (i = 0; i < as->nr_old_nodes; i++) { -+ __le64 seq; -+ -+ b = as->old_nodes[i]; -+ -+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); -+ seq = b->data ? b->data->keys.seq : 0; -+ six_unlock_read(&b->c.lock); -+ -+ if (seq == as->old_nodes_seq[i]) -+ wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner, -+ TASK_UNINTERRUPTIBLE); -+ } -+ -+ /* -+ * We did an update to a parent node where the pointers we added pointed -+ * to child nodes that weren't written yet: now, the child nodes have -+ * been written so we can write out the update to the interior node. -+ */ -+ -+ /* -+ * We can't call into journal reclaim here: we'd block on the journal -+ * reclaim lock, but we may need to release the open buckets we have -+ * pinned in order for other btree updates to make forward progress, and -+ * journal reclaim does btree updates when flushing bkey_cached entries, -+ * which may require allocations as well. -+ */ -+ ret = commit_do(trans, &as->disk_res, &journal_seq, -+ BCH_WATERMARK_reclaim| -+ BTREE_INSERT_NOFAIL| -+ BTREE_INSERT_NOCHECK_RW| -+ BTREE_INSERT_JOURNAL_RECLAIM, -+ btree_update_nodes_written_trans(trans, as)); -+ bch2_trans_unlock(trans); -+ -+ bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c, -+ "%s(): error %s", __func__, bch2_err_str(ret)); -+err: -+ if (as->b) { -+ struct btree_path *path; -+ -+ b = as->b; -+ path = get_unlocked_mut_path(trans, as->btree_id, b->c.level, b->key.k.p); -+ /* -+ * @b is the node we did the final insert into: -+ * -+ * On failure to get a journal reservation, we still have to -+ * unblock the write and allow most of the write path to happen -+ * so that shutdown works, but the i->journal_seq mechanism -+ * won't work to prevent the btree write from being visible (we -+ * didn't get a journal sequence number) - instead -+ * __bch2_btree_node_write() doesn't do the actual write if -+ * we're in journal error state: -+ */ -+ -+ /* -+ * Ensure transaction is unlocked before using -+ * btree_node_lock_nopath() (the use of which is always suspect, -+ * we need to work on removing this in the future) -+ * -+ * It should be, but get_unlocked_mut_path() -> bch2_path_get() -+ * calls bch2_path_upgrade(), before we call path_make_mut(), so -+ * we may rarely end up with a locked path besides the one we -+ * have here: -+ */ -+ bch2_trans_unlock(trans); -+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); -+ mark_btree_node_locked(trans, path, b->c.level, BTREE_NODE_INTENT_LOCKED); -+ path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock); -+ path->l[b->c.level].b = b; -+ -+ bch2_btree_node_lock_write_nofail(trans, path, &b->c); -+ -+ mutex_lock(&c->btree_interior_update_lock); -+ -+ list_del(&as->write_blocked_list); -+ if (list_empty(&b->write_blocked)) -+ clear_btree_node_write_blocked(b); -+ -+ /* -+ * Node might have been freed, recheck under -+ * btree_interior_update_lock: -+ */ -+ if (as->b == b) { -+ BUG_ON(!b->c.level); -+ BUG_ON(!btree_node_dirty(b)); -+ -+ if (!ret) { -+ struct bset *last = btree_bset_last(b); -+ -+ last->journal_seq = cpu_to_le64( -+ max(journal_seq, -+ le64_to_cpu(last->journal_seq))); -+ -+ bch2_btree_add_journal_pin(c, b, journal_seq); -+ } else { -+ /* -+ * If we didn't get a journal sequence number we -+ * can't write this btree node, because recovery -+ * won't know to ignore this write: -+ */ -+ set_btree_node_never_write(b); -+ } -+ } -+ -+ mutex_unlock(&c->btree_interior_update_lock); -+ -+ mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED); -+ six_unlock_write(&b->c.lock); -+ -+ btree_node_write_if_need(c, b, SIX_LOCK_intent); -+ btree_node_unlock(trans, path, b->c.level); -+ bch2_path_put(trans, path, true); -+ } -+ -+ bch2_journal_pin_drop(&c->journal, &as->journal); -+ -+ bch2_journal_preres_put(&c->journal, &as->journal_preres); -+ -+ mutex_lock(&c->btree_interior_update_lock); -+ for (i = 0; i < as->nr_new_nodes; i++) { -+ b = as->new_nodes[i]; -+ -+ BUG_ON(b->will_make_reachable != (unsigned long) as); -+ b->will_make_reachable = 0; -+ clear_btree_node_will_make_reachable(b); -+ } -+ mutex_unlock(&c->btree_interior_update_lock); -+ -+ for (i = 0; i < as->nr_new_nodes; i++) { -+ b = as->new_nodes[i]; -+ -+ btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); -+ btree_node_write_if_need(c, b, SIX_LOCK_read); -+ six_unlock_read(&b->c.lock); -+ } -+ -+ for (i = 0; i < as->nr_open_buckets; i++) -+ bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]); -+ -+ bch2_btree_update_free(as, trans); -+ bch2_trans_put(trans); -+} -+ -+static void btree_interior_update_work(struct work_struct *work) -+{ -+ struct bch_fs *c = -+ container_of(work, struct bch_fs, btree_interior_update_work); -+ struct btree_update *as; -+ -+ while (1) { -+ mutex_lock(&c->btree_interior_update_lock); -+ as = list_first_entry_or_null(&c->btree_interior_updates_unwritten, -+ struct btree_update, unwritten_list); -+ if (as && !as->nodes_written) -+ as = NULL; -+ mutex_unlock(&c->btree_interior_update_lock); -+ -+ if (!as) -+ break; -+ -+ btree_update_nodes_written(as); -+ } -+} -+ -+static void btree_update_set_nodes_written(struct closure *cl) -+{ -+ struct btree_update *as = container_of(cl, struct btree_update, cl); -+ struct bch_fs *c = as->c; -+ -+ mutex_lock(&c->btree_interior_update_lock); -+ as->nodes_written = true; -+ mutex_unlock(&c->btree_interior_update_lock); -+ -+ queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work); -+} -+ -+/* -+ * We're updating @b with pointers to nodes that haven't finished writing yet: -+ * block @b from being written until @as completes -+ */ -+static void btree_update_updated_node(struct btree_update *as, struct btree *b) -+{ -+ struct bch_fs *c = as->c; -+ -+ mutex_lock(&c->btree_interior_update_lock); -+ list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); -+ -+ BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE); -+ BUG_ON(!btree_node_dirty(b)); -+ BUG_ON(!b->c.level); -+ -+ as->mode = BTREE_INTERIOR_UPDATING_NODE; -+ as->b = b; -+ -+ set_btree_node_write_blocked(b); -+ list_add(&as->write_blocked_list, &b->write_blocked); -+ -+ mutex_unlock(&c->btree_interior_update_lock); -+} -+ -+static void btree_update_reparent(struct btree_update *as, -+ struct btree_update *child) -+{ -+ struct bch_fs *c = as->c; -+ -+ lockdep_assert_held(&c->btree_interior_update_lock); -+ -+ child->b = NULL; -+ child->mode = BTREE_INTERIOR_UPDATING_AS; -+ -+ bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, NULL); -+} -+ -+static void btree_update_updated_root(struct btree_update *as, struct btree *b) -+{ -+ struct bkey_i *insert = &b->key; -+ struct bch_fs *c = as->c; -+ -+ BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE); -+ -+ BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) > -+ ARRAY_SIZE(as->journal_entries)); -+ -+ as->journal_u64s += -+ journal_entry_set((void *) &as->journal_entries[as->journal_u64s], -+ BCH_JSET_ENTRY_btree_root, -+ b->c.btree_id, b->c.level, -+ insert, insert->k.u64s); -+ -+ mutex_lock(&c->btree_interior_update_lock); -+ list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); -+ -+ as->mode = BTREE_INTERIOR_UPDATING_ROOT; -+ mutex_unlock(&c->btree_interior_update_lock); -+} -+ -+/* -+ * bch2_btree_update_add_new_node: -+ * -+ * This causes @as to wait on @b to be written, before it gets to -+ * bch2_btree_update_nodes_written -+ * -+ * Additionally, it sets b->will_make_reachable to prevent any additional writes -+ * to @b from happening besides the first until @b is reachable on disk -+ * -+ * And it adds @b to the list of @as's new nodes, so that we can update sector -+ * counts in bch2_btree_update_nodes_written: -+ */ -+static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree *b) -+{ -+ struct bch_fs *c = as->c; -+ -+ closure_get(&as->cl); -+ -+ mutex_lock(&c->btree_interior_update_lock); -+ BUG_ON(as->nr_new_nodes >= ARRAY_SIZE(as->new_nodes)); -+ BUG_ON(b->will_make_reachable); -+ -+ as->new_nodes[as->nr_new_nodes++] = b; -+ b->will_make_reachable = 1UL|(unsigned long) as; -+ set_btree_node_will_make_reachable(b); -+ -+ mutex_unlock(&c->btree_interior_update_lock); -+ -+ btree_update_add_key(as, &as->new_keys, b); -+ -+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { -+ unsigned bytes = vstruct_end(&b->data->keys) - (void *) b->data; -+ unsigned sectors = round_up(bytes, block_bytes(c)) >> 9; -+ -+ bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written = -+ cpu_to_le16(sectors); -+ } -+} -+ -+/* -+ * returns true if @b was a new node -+ */ -+static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b) -+{ -+ struct btree_update *as; -+ unsigned long v; -+ unsigned i; -+ -+ mutex_lock(&c->btree_interior_update_lock); -+ /* -+ * When b->will_make_reachable != 0, it owns a ref on as->cl that's -+ * dropped when it gets written by bch2_btree_complete_write - the -+ * xchg() is for synchronization with bch2_btree_complete_write: -+ */ -+ v = xchg(&b->will_make_reachable, 0); -+ clear_btree_node_will_make_reachable(b); -+ as = (struct btree_update *) (v & ~1UL); -+ -+ if (!as) { -+ mutex_unlock(&c->btree_interior_update_lock); -+ return; -+ } -+ -+ for (i = 0; i < as->nr_new_nodes; i++) -+ if (as->new_nodes[i] == b) -+ goto found; -+ -+ BUG(); -+found: -+ array_remove_item(as->new_nodes, as->nr_new_nodes, i); -+ mutex_unlock(&c->btree_interior_update_lock); -+ -+ if (v & 1) -+ closure_put(&as->cl); -+} -+ -+static void bch2_btree_update_get_open_buckets(struct btree_update *as, struct btree *b) -+{ -+ while (b->ob.nr) -+ as->open_buckets[as->nr_open_buckets++] = -+ b->ob.v[--b->ob.nr]; -+} -+ -+/* -+ * @b is being split/rewritten: it may have pointers to not-yet-written btree -+ * nodes and thus outstanding btree_updates - redirect @b's -+ * btree_updates to point to this btree_update: -+ */ -+static void bch2_btree_interior_update_will_free_node(struct btree_update *as, -+ struct btree *b) -+{ -+ struct bch_fs *c = as->c; -+ struct btree_update *p, *n; -+ struct btree_write *w; -+ -+ set_btree_node_dying(b); -+ -+ if (btree_node_fake(b)) -+ return; -+ -+ mutex_lock(&c->btree_interior_update_lock); -+ -+ /* -+ * Does this node have any btree_update operations preventing -+ * it from being written? -+ * -+ * If so, redirect them to point to this btree_update: we can -+ * write out our new nodes, but we won't make them visible until those -+ * operations complete -+ */ -+ list_for_each_entry_safe(p, n, &b->write_blocked, write_blocked_list) { -+ list_del_init(&p->write_blocked_list); -+ btree_update_reparent(as, p); -+ -+ /* -+ * for flush_held_btree_writes() waiting on updates to flush or -+ * nodes to be writeable: -+ */ -+ closure_wake_up(&c->btree_interior_update_wait); -+ } -+ -+ clear_btree_node_dirty_acct(c, b); -+ clear_btree_node_need_write(b); -+ clear_btree_node_write_blocked(b); -+ -+ /* -+ * Does this node have unwritten data that has a pin on the journal? -+ * -+ * If so, transfer that pin to the btree_update operation - -+ * note that if we're freeing multiple nodes, we only need to keep the -+ * oldest pin of any of the nodes we're freeing. We'll release the pin -+ * when the new nodes are persistent and reachable on disk: -+ */ -+ w = btree_current_write(b); -+ bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL); -+ bch2_journal_pin_drop(&c->journal, &w->journal); -+ -+ w = btree_prev_write(b); -+ bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL); -+ bch2_journal_pin_drop(&c->journal, &w->journal); -+ -+ mutex_unlock(&c->btree_interior_update_lock); -+ -+ /* -+ * Is this a node that isn't reachable on disk yet? -+ * -+ * Nodes that aren't reachable yet have writes blocked until they're -+ * reachable - now that we've cancelled any pending writes and moved -+ * things waiting on that write to wait on this update, we can drop this -+ * node from the list of nodes that the other update is making -+ * reachable, prior to freeing it: -+ */ -+ btree_update_drop_new_node(c, b); -+ -+ btree_update_add_key(as, &as->old_keys, b); -+ -+ as->old_nodes[as->nr_old_nodes] = b; -+ as->old_nodes_seq[as->nr_old_nodes] = b->data->keys.seq; -+ as->nr_old_nodes++; -+} -+ -+static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *trans) -+{ -+ struct bch_fs *c = as->c; -+ u64 start_time = as->start_time; -+ -+ BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE); -+ -+ if (as->took_gc_lock) -+ up_read(&as->c->gc_lock); -+ as->took_gc_lock = false; -+ -+ bch2_btree_reserve_put(as, trans); -+ -+ continue_at(&as->cl, btree_update_set_nodes_written, -+ as->c->btree_interior_update_worker); -+ -+ bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_foreground], -+ start_time); -+} -+ -+static struct btree_update * -+bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, -+ unsigned level, bool split, unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_update *as; -+ u64 start_time = local_clock(); -+ int disk_res_flags = (flags & BTREE_INSERT_NOFAIL) -+ ? BCH_DISK_RESERVATION_NOFAIL : 0; -+ unsigned nr_nodes[2] = { 0, 0 }; -+ unsigned update_level = level; -+ enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; -+ unsigned journal_flags = 0; -+ int ret = 0; -+ u32 restart_count = trans->restart_count; -+ -+ BUG_ON(!path->should_be_locked); -+ -+ if (watermark == BCH_WATERMARK_copygc) -+ watermark = BCH_WATERMARK_btree_copygc; -+ if (watermark < BCH_WATERMARK_btree) -+ watermark = BCH_WATERMARK_btree; -+ -+ flags &= ~BCH_WATERMARK_MASK; -+ flags |= watermark; -+ -+ if (flags & BTREE_INSERT_JOURNAL_RECLAIM) -+ journal_flags |= JOURNAL_RES_GET_NONBLOCK; -+ journal_flags |= watermark; -+ -+ while (1) { -+ nr_nodes[!!update_level] += 1 + split; -+ update_level++; -+ -+ ret = bch2_btree_path_upgrade(trans, path, update_level + 1); -+ if (ret) -+ return ERR_PTR(ret); -+ -+ if (!btree_path_node(path, update_level)) { -+ /* Allocating new root? */ -+ nr_nodes[1] += split; -+ update_level = BTREE_MAX_DEPTH; -+ break; -+ } -+ -+ if (bch2_btree_node_insert_fits(c, path->l[update_level].b, -+ BKEY_BTREE_PTR_U64s_MAX * (1 + split))) -+ break; -+ -+ split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c); -+ } -+ -+ if (flags & BTREE_INSERT_GC_LOCK_HELD) -+ lockdep_assert_held(&c->gc_lock); -+ else if (!down_read_trylock(&c->gc_lock)) { -+ ret = drop_locks_do(trans, (down_read(&c->gc_lock), 0)); -+ if (ret) { -+ up_read(&c->gc_lock); -+ return ERR_PTR(ret); -+ } -+ } -+ -+ as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS); -+ memset(as, 0, sizeof(*as)); -+ closure_init(&as->cl, NULL); -+ as->c = c; -+ as->start_time = start_time; -+ as->mode = BTREE_INTERIOR_NO_UPDATE; -+ as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD); -+ as->btree_id = path->btree_id; -+ as->update_level = update_level; -+ INIT_LIST_HEAD(&as->list); -+ INIT_LIST_HEAD(&as->unwritten_list); -+ INIT_LIST_HEAD(&as->write_blocked_list); -+ bch2_keylist_init(&as->old_keys, as->_old_keys); -+ bch2_keylist_init(&as->new_keys, as->_new_keys); -+ bch2_keylist_init(&as->parent_keys, as->inline_keys); -+ -+ mutex_lock(&c->btree_interior_update_lock); -+ list_add_tail(&as->list, &c->btree_interior_update_list); -+ mutex_unlock(&c->btree_interior_update_lock); -+ -+ /* -+ * We don't want to allocate if we're in an error state, that can cause -+ * deadlock on emergency shutdown due to open buckets getting stuck in -+ * the btree_reserve_cache after allocator shutdown has cleared it out. -+ * This check needs to come after adding us to the btree_interior_update -+ * list but before calling bch2_btree_reserve_get, to synchronize with -+ * __bch2_fs_read_only(). -+ */ -+ ret = bch2_journal_error(&c->journal); -+ if (ret) -+ goto err; -+ -+ ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, -+ BTREE_UPDATE_JOURNAL_RES, -+ journal_flags|JOURNAL_RES_GET_NONBLOCK); -+ if (ret) { -+ if (flags & BTREE_INSERT_JOURNAL_RECLAIM) { -+ ret = -BCH_ERR_journal_reclaim_would_deadlock; -+ goto err; -+ } -+ -+ ret = drop_locks_do(trans, -+ bch2_journal_preres_get(&c->journal, &as->journal_preres, -+ BTREE_UPDATE_JOURNAL_RES, -+ journal_flags)); -+ if (ret == -BCH_ERR_journal_preres_get_blocked) { -+ trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags); -+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get); -+ } -+ if (ret) -+ goto err; -+ } -+ -+ ret = bch2_disk_reservation_get(c, &as->disk_res, -+ (nr_nodes[0] + nr_nodes[1]) * btree_sectors(c), -+ c->opts.metadata_replicas, -+ disk_res_flags); -+ if (ret) -+ goto err; -+ -+ ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL); -+ if (bch2_err_matches(ret, ENOSPC) || -+ bch2_err_matches(ret, ENOMEM)) { -+ struct closure cl; -+ -+ /* -+ * XXX: this should probably be a separate BTREE_INSERT_NONBLOCK -+ * flag -+ */ -+ if (bch2_err_matches(ret, ENOSPC) && -+ (flags & BTREE_INSERT_JOURNAL_RECLAIM) && -+ watermark != BCH_WATERMARK_reclaim) { -+ ret = -BCH_ERR_journal_reclaim_would_deadlock; -+ goto err; -+ } -+ -+ closure_init_stack(&cl); -+ -+ do { -+ ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl); -+ -+ bch2_trans_unlock(trans); -+ closure_sync(&cl); -+ } while (bch2_err_matches(ret, BCH_ERR_operation_blocked)); -+ } -+ -+ if (ret) { -+ trace_and_count(c, btree_reserve_get_fail, trans->fn, -+ _RET_IP_, nr_nodes[0] + nr_nodes[1], ret); -+ goto err; -+ } -+ -+ ret = bch2_trans_relock(trans); -+ if (ret) -+ goto err; -+ -+ bch2_trans_verify_not_restarted(trans, restart_count); -+ return as; -+err: -+ bch2_btree_update_free(as, trans); -+ return ERR_PTR(ret); -+} -+ -+/* Btree root updates: */ -+ -+static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b) -+{ -+ /* Root nodes cannot be reaped */ -+ mutex_lock(&c->btree_cache.lock); -+ list_del_init(&b->list); -+ mutex_unlock(&c->btree_cache.lock); -+ -+ mutex_lock(&c->btree_root_lock); -+ BUG_ON(btree_node_root(c, b) && -+ (b->c.level < btree_node_root(c, b)->c.level || -+ !btree_node_dying(btree_node_root(c, b)))); -+ -+ bch2_btree_id_root(c, b->c.btree_id)->b = b; -+ mutex_unlock(&c->btree_root_lock); -+ -+ bch2_recalc_btree_reserve(c); -+} -+ -+static void bch2_btree_set_root(struct btree_update *as, -+ struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree *b) -+{ -+ struct bch_fs *c = as->c; -+ struct btree *old; -+ -+ trace_and_count(c, btree_node_set_root, c, b); -+ -+ old = btree_node_root(c, b); -+ -+ /* -+ * Ensure no one is using the old root while we switch to the -+ * new root: -+ */ -+ bch2_btree_node_lock_write_nofail(trans, path, &old->c); -+ -+ bch2_btree_set_root_inmem(c, b); -+ -+ btree_update_updated_root(as, b); -+ -+ /* -+ * Unlock old root after new root is visible: -+ * -+ * The new root isn't persistent, but that's ok: we still have -+ * an intent lock on the new root, and any updates that would -+ * depend on the new root would have to update the new root. -+ */ -+ bch2_btree_node_unlock_write(trans, path, old); -+} -+ -+/* Interior node updates: */ -+ -+static void bch2_insert_fixup_btree_ptr(struct btree_update *as, -+ struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree *b, -+ struct btree_node_iter *node_iter, -+ struct bkey_i *insert) -+{ -+ struct bch_fs *c = as->c; -+ struct bkey_packed *k; -+ struct printbuf buf = PRINTBUF; -+ unsigned long old, new, v; -+ -+ BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 && -+ !btree_ptr_sectors_written(insert)); -+ -+ if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))) -+ bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p); -+ -+ if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert), -+ btree_node_type(b), WRITE, &buf) ?: -+ bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) { -+ printbuf_reset(&buf); -+ prt_printf(&buf, "inserting invalid bkey\n "); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); -+ prt_printf(&buf, "\n "); -+ bch2_bkey_invalid(c, bkey_i_to_s_c(insert), -+ btree_node_type(b), WRITE, &buf); -+ bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf); -+ -+ bch2_fs_inconsistent(c, "%s", buf.buf); -+ dump_stack(); -+ } -+ -+ BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) > -+ ARRAY_SIZE(as->journal_entries)); -+ -+ as->journal_u64s += -+ journal_entry_set((void *) &as->journal_entries[as->journal_u64s], -+ BCH_JSET_ENTRY_btree_keys, -+ b->c.btree_id, b->c.level, -+ insert, insert->k.u64s); -+ -+ while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) && -+ bkey_iter_pos_cmp(b, k, &insert->k.p) < 0) -+ bch2_btree_node_iter_advance(node_iter, b); -+ -+ bch2_btree_bset_insert_key(trans, path, b, node_iter, insert); -+ set_btree_node_dirty_acct(c, b); -+ -+ v = READ_ONCE(b->flags); -+ do { -+ old = new = v; -+ -+ new &= ~BTREE_WRITE_TYPE_MASK; -+ new |= BTREE_WRITE_interior; -+ new |= 1 << BTREE_NODE_need_write; -+ } while ((v = cmpxchg(&b->flags, old, new)) != old); -+ -+ printbuf_exit(&buf); -+} -+ -+static void -+__bch2_btree_insert_keys_interior(struct btree_update *as, -+ struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree *b, -+ struct btree_node_iter node_iter, -+ struct keylist *keys) -+{ -+ struct bkey_i *insert = bch2_keylist_front(keys); -+ struct bkey_packed *k; -+ -+ BUG_ON(btree_node_type(b) != BKEY_TYPE_btree); -+ -+ while ((k = bch2_btree_node_iter_prev_all(&node_iter, b)) && -+ (bkey_cmp_left_packed(b, k, &insert->k.p) >= 0)) -+ ; -+ -+ while (!bch2_keylist_empty(keys)) { -+ insert = bch2_keylist_front(keys); -+ -+ if (bpos_gt(insert->k.p, b->key.k.p)) -+ break; -+ -+ bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert); -+ bch2_keylist_pop_front(keys); -+ } -+} -+ -+/* -+ * Move keys from n1 (original replacement node, now lower node) to n2 (higher -+ * node) -+ */ -+static void __btree_split_node(struct btree_update *as, -+ struct btree_trans *trans, -+ struct btree *b, -+ struct btree *n[2]) -+{ -+ struct bkey_packed *k; -+ struct bpos n1_pos = POS_MIN; -+ struct btree_node_iter iter; -+ struct bset *bsets[2]; -+ struct bkey_format_state format[2]; -+ struct bkey_packed *out[2]; -+ struct bkey uk; -+ unsigned u64s, n1_u64s = (b->nr.live_u64s * 3) / 5; -+ int i; -+ -+ for (i = 0; i < 2; i++) { -+ BUG_ON(n[i]->nsets != 1); -+ -+ bsets[i] = btree_bset_first(n[i]); -+ out[i] = bsets[i]->start; -+ -+ SET_BTREE_NODE_SEQ(n[i]->data, BTREE_NODE_SEQ(b->data) + 1); -+ bch2_bkey_format_init(&format[i]); -+ } -+ -+ u64s = 0; -+ for_each_btree_node_key(b, k, &iter) { -+ if (bkey_deleted(k)) -+ continue; -+ -+ i = u64s >= n1_u64s; -+ u64s += k->u64s; -+ uk = bkey_unpack_key(b, k); -+ if (!i) -+ n1_pos = uk.p; -+ bch2_bkey_format_add_key(&format[i], &uk); -+ } -+ -+ btree_set_min(n[0], b->data->min_key); -+ btree_set_max(n[0], n1_pos); -+ btree_set_min(n[1], bpos_successor(n1_pos)); -+ btree_set_max(n[1], b->data->max_key); -+ -+ for (i = 0; i < 2; i++) { -+ bch2_bkey_format_add_pos(&format[i], n[i]->data->min_key); -+ bch2_bkey_format_add_pos(&format[i], n[i]->data->max_key); -+ -+ n[i]->data->format = bch2_bkey_format_done(&format[i]); -+ btree_node_set_format(n[i], n[i]->data->format); -+ } -+ -+ u64s = 0; -+ for_each_btree_node_key(b, k, &iter) { -+ if (bkey_deleted(k)) -+ continue; -+ -+ i = u64s >= n1_u64s; -+ u64s += k->u64s; -+ -+ if (bch2_bkey_transform(&n[i]->format, out[i], bkey_packed(k) -+ ? &b->format: &bch2_bkey_format_current, k)) -+ out[i]->format = KEY_FORMAT_LOCAL_BTREE; -+ else -+ bch2_bkey_unpack(b, (void *) out[i], k); -+ -+ out[i]->needs_whiteout = false; -+ -+ btree_keys_account_key_add(&n[i]->nr, 0, out[i]); -+ out[i] = bkey_p_next(out[i]); -+ } -+ -+ for (i = 0; i < 2; i++) { -+ bsets[i]->u64s = cpu_to_le16((u64 *) out[i] - bsets[i]->_data); -+ -+ BUG_ON(!bsets[i]->u64s); -+ -+ set_btree_bset_end(n[i], n[i]->set); -+ -+ btree_node_reset_sib_u64s(n[i]); -+ -+ bch2_verify_btree_nr_keys(n[i]); -+ -+ if (b->c.level) -+ btree_node_interior_verify(as->c, n[i]); -+ } -+} -+ -+/* -+ * For updates to interior nodes, we've got to do the insert before we split -+ * because the stuff we're inserting has to be inserted atomically. Post split, -+ * the keys might have to go in different nodes and the split would no longer be -+ * atomic. -+ * -+ * Worse, if the insert is from btree node coalescing, if we do the insert after -+ * we do the split (and pick the pivot) - the pivot we pick might be between -+ * nodes that were coalesced, and thus in the middle of a child node post -+ * coalescing: -+ */ -+static void btree_split_insert_keys(struct btree_update *as, -+ struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree *b, -+ struct keylist *keys) -+{ -+ if (!bch2_keylist_empty(keys) && -+ bpos_le(bch2_keylist_front(keys)->k.p, b->data->max_key)) { -+ struct btree_node_iter node_iter; -+ -+ bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p); -+ -+ __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); -+ -+ btree_node_interior_verify(as->c, b); -+ } -+} -+ -+static int btree_split(struct btree_update *as, struct btree_trans *trans, -+ struct btree_path *path, struct btree *b, -+ struct keylist *keys, unsigned flags) -+{ -+ struct bch_fs *c = as->c; -+ struct btree *parent = btree_node_parent(path, b); -+ struct btree *n1, *n2 = NULL, *n3 = NULL; -+ struct btree_path *path1 = NULL, *path2 = NULL; -+ u64 start_time = local_clock(); -+ int ret = 0; -+ -+ BUG_ON(!parent && (b != btree_node_root(c, b))); -+ BUG_ON(parent && !btree_node_intent_locked(path, b->c.level + 1)); -+ -+ bch2_btree_interior_update_will_free_node(as, b); -+ -+ if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) { -+ struct btree *n[2]; -+ -+ trace_and_count(c, btree_node_split, c, b); -+ -+ n[0] = n1 = bch2_btree_node_alloc(as, trans, b->c.level); -+ n[1] = n2 = bch2_btree_node_alloc(as, trans, b->c.level); -+ -+ __btree_split_node(as, trans, b, n); -+ -+ if (keys) { -+ btree_split_insert_keys(as, trans, path, n1, keys); -+ btree_split_insert_keys(as, trans, path, n2, keys); -+ BUG_ON(!bch2_keylist_empty(keys)); -+ } -+ -+ bch2_btree_build_aux_trees(n2); -+ bch2_btree_build_aux_trees(n1); -+ -+ bch2_btree_update_add_new_node(as, n1); -+ bch2_btree_update_add_new_node(as, n2); -+ six_unlock_write(&n2->c.lock); -+ six_unlock_write(&n1->c.lock); -+ -+ path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p); -+ six_lock_increment(&n1->c.lock, SIX_LOCK_intent); -+ mark_btree_node_locked(trans, path1, n1->c.level, BTREE_NODE_INTENT_LOCKED); -+ bch2_btree_path_level_init(trans, path1, n1); -+ -+ path2 = get_unlocked_mut_path(trans, path->btree_id, n2->c.level, n2->key.k.p); -+ six_lock_increment(&n2->c.lock, SIX_LOCK_intent); -+ mark_btree_node_locked(trans, path2, n2->c.level, BTREE_NODE_INTENT_LOCKED); -+ bch2_btree_path_level_init(trans, path2, n2); -+ -+ /* -+ * Note that on recursive parent_keys == keys, so we -+ * can't start adding new keys to parent_keys before emptying it -+ * out (which we did with btree_split_insert_keys() above) -+ */ -+ bch2_keylist_add(&as->parent_keys, &n1->key); -+ bch2_keylist_add(&as->parent_keys, &n2->key); -+ -+ if (!parent) { -+ /* Depth increases, make a new root */ -+ n3 = __btree_root_alloc(as, trans, b->c.level + 1); -+ -+ bch2_btree_update_add_new_node(as, n3); -+ six_unlock_write(&n3->c.lock); -+ -+ path2->locks_want++; -+ BUG_ON(btree_node_locked(path2, n3->c.level)); -+ six_lock_increment(&n3->c.lock, SIX_LOCK_intent); -+ mark_btree_node_locked(trans, path2, n3->c.level, BTREE_NODE_INTENT_LOCKED); -+ bch2_btree_path_level_init(trans, path2, n3); -+ -+ n3->sib_u64s[0] = U16_MAX; -+ n3->sib_u64s[1] = U16_MAX; -+ -+ btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); -+ } -+ } else { -+ trace_and_count(c, btree_node_compact, c, b); -+ -+ n1 = bch2_btree_node_alloc_replacement(as, trans, b); -+ -+ if (keys) { -+ btree_split_insert_keys(as, trans, path, n1, keys); -+ BUG_ON(!bch2_keylist_empty(keys)); -+ } -+ -+ bch2_btree_build_aux_trees(n1); -+ bch2_btree_update_add_new_node(as, n1); -+ six_unlock_write(&n1->c.lock); -+ -+ path1 = get_unlocked_mut_path(trans, path->btree_id, n1->c.level, n1->key.k.p); -+ six_lock_increment(&n1->c.lock, SIX_LOCK_intent); -+ mark_btree_node_locked(trans, path1, n1->c.level, BTREE_NODE_INTENT_LOCKED); -+ bch2_btree_path_level_init(trans, path1, n1); -+ -+ if (parent) -+ bch2_keylist_add(&as->parent_keys, &n1->key); -+ } -+ -+ /* New nodes all written, now make them visible: */ -+ -+ if (parent) { -+ /* Split a non root node */ -+ ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); -+ if (ret) -+ goto err; -+ } else if (n3) { -+ bch2_btree_set_root(as, trans, path, n3); -+ } else { -+ /* Root filled up but didn't need to be split */ -+ bch2_btree_set_root(as, trans, path, n1); -+ } -+ -+ if (n3) { -+ bch2_btree_update_get_open_buckets(as, n3); -+ bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0); -+ } -+ if (n2) { -+ bch2_btree_update_get_open_buckets(as, n2); -+ bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0); -+ } -+ bch2_btree_update_get_open_buckets(as, n1); -+ bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0); -+ -+ /* -+ * The old node must be freed (in memory) _before_ unlocking the new -+ * nodes - else another thread could re-acquire a read lock on the old -+ * node after another thread has locked and updated the new node, thus -+ * seeing stale data: -+ */ -+ bch2_btree_node_free_inmem(trans, path, b); -+ -+ if (n3) -+ bch2_trans_node_add(trans, n3); -+ if (n2) -+ bch2_trans_node_add(trans, n2); -+ bch2_trans_node_add(trans, n1); -+ -+ if (n3) -+ six_unlock_intent(&n3->c.lock); -+ if (n2) -+ six_unlock_intent(&n2->c.lock); -+ six_unlock_intent(&n1->c.lock); -+out: -+ if (path2) { -+ __bch2_btree_path_unlock(trans, path2); -+ bch2_path_put(trans, path2, true); -+ } -+ if (path1) { -+ __bch2_btree_path_unlock(trans, path1); -+ bch2_path_put(trans, path1, true); -+ } -+ -+ bch2_trans_verify_locks(trans); -+ -+ bch2_time_stats_update(&c->times[n2 -+ ? BCH_TIME_btree_node_split -+ : BCH_TIME_btree_node_compact], -+ start_time); -+ return ret; -+err: -+ if (n3) -+ bch2_btree_node_free_never_used(as, trans, n3); -+ if (n2) -+ bch2_btree_node_free_never_used(as, trans, n2); -+ bch2_btree_node_free_never_used(as, trans, n1); -+ goto out; -+} -+ -+static void -+bch2_btree_insert_keys_interior(struct btree_update *as, -+ struct btree_trans *trans, -+ struct btree_path *path, -+ struct btree *b, -+ struct keylist *keys) -+{ -+ struct btree_path *linked; -+ -+ __bch2_btree_insert_keys_interior(as, trans, path, b, -+ path->l[b->c.level].iter, keys); -+ -+ btree_update_updated_node(as, b); -+ -+ trans_for_each_path_with_node(trans, b, linked) -+ bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); -+ -+ bch2_trans_verify_paths(trans); -+} -+ -+/** -+ * bch2_btree_insert_node - insert bkeys into a given btree node -+ * -+ * @as: btree_update object -+ * @trans: btree_trans object -+ * @path: path that points to current node -+ * @b: node to insert keys into -+ * @keys: list of keys to insert -+ * @flags: transaction commit flags -+ * -+ * Returns: 0 on success, typically transaction restart error on failure -+ * -+ * Inserts as many keys as it can into a given btree node, splitting it if full. -+ * If a split occurred, this function will return early. This can only happen -+ * for leaf nodes -- inserts into interior nodes have to be atomic. -+ */ -+static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans, -+ struct btree_path *path, struct btree *b, -+ struct keylist *keys, unsigned flags) -+{ -+ struct bch_fs *c = as->c; -+ int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s); -+ int old_live_u64s = b->nr.live_u64s; -+ int live_u64s_added, u64s_added; -+ int ret; -+ -+ lockdep_assert_held(&c->gc_lock); -+ BUG_ON(!btree_node_intent_locked(path, b->c.level)); -+ BUG_ON(!b->c.level); -+ BUG_ON(!as || as->b); -+ bch2_verify_keylist_sorted(keys); -+ -+ ret = bch2_btree_node_lock_write(trans, path, &b->c); -+ if (ret) -+ return ret; -+ -+ bch2_btree_node_prep_for_write(trans, path, b); -+ -+ if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) { -+ bch2_btree_node_unlock_write(trans, path, b); -+ goto split; -+ } -+ -+ btree_node_interior_verify(c, b); -+ -+ bch2_btree_insert_keys_interior(as, trans, path, b, keys); -+ -+ live_u64s_added = (int) b->nr.live_u64s - old_live_u64s; -+ u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s; -+ -+ if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0) -+ b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added); -+ if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0) -+ b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added); -+ -+ if (u64s_added > live_u64s_added && -+ bch2_maybe_compact_whiteouts(c, b)) -+ bch2_trans_node_reinit_iter(trans, b); -+ -+ bch2_btree_node_unlock_write(trans, path, b); -+ -+ btree_node_interior_verify(c, b); -+ return 0; -+split: -+ /* -+ * We could attempt to avoid the transaction restart, by calling -+ * bch2_btree_path_upgrade() and allocating more nodes: -+ */ -+ if (b->c.level >= as->update_level) { -+ trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b); -+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race); -+ } -+ -+ return btree_split(as, trans, path, b, keys, flags); -+} -+ -+int bch2_btree_split_leaf(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned flags) -+{ -+ struct btree *b = path_l(path)->b; -+ struct btree_update *as; -+ unsigned l; -+ int ret = 0; -+ -+ as = bch2_btree_update_start(trans, path, path->level, -+ true, flags); -+ if (IS_ERR(as)) -+ return PTR_ERR(as); -+ -+ ret = btree_split(as, trans, path, b, NULL, flags); -+ if (ret) { -+ bch2_btree_update_free(as, trans); -+ return ret; -+ } -+ -+ bch2_btree_update_done(as, trans); -+ -+ for (l = path->level + 1; btree_node_intent_locked(path, l) && !ret; l++) -+ ret = bch2_foreground_maybe_merge(trans, path, l, flags); -+ -+ return ret; -+} -+ -+int __bch2_foreground_maybe_merge(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned level, -+ unsigned flags, -+ enum btree_node_sibling sib) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_path *sib_path = NULL, *new_path = NULL; -+ struct btree_update *as; -+ struct bkey_format_state new_s; -+ struct bkey_format new_f; -+ struct bkey_i delete; -+ struct btree *b, *m, *n, *prev, *next, *parent; -+ struct bpos sib_pos; -+ size_t sib_u64s; -+ u64 start_time = local_clock(); -+ int ret = 0; -+ -+ BUG_ON(!path->should_be_locked); -+ BUG_ON(!btree_node_locked(path, level)); -+ -+ b = path->l[level].b; -+ -+ if ((sib == btree_prev_sib && bpos_eq(b->data->min_key, POS_MIN)) || -+ (sib == btree_next_sib && bpos_eq(b->data->max_key, SPOS_MAX))) { -+ b->sib_u64s[sib] = U16_MAX; -+ return 0; -+ } -+ -+ sib_pos = sib == btree_prev_sib -+ ? bpos_predecessor(b->data->min_key) -+ : bpos_successor(b->data->max_key); -+ -+ sib_path = bch2_path_get(trans, path->btree_id, sib_pos, -+ U8_MAX, level, BTREE_ITER_INTENT, _THIS_IP_); -+ ret = bch2_btree_path_traverse(trans, sib_path, false); -+ if (ret) -+ goto err; -+ -+ btree_path_set_should_be_locked(sib_path); -+ -+ m = sib_path->l[level].b; -+ -+ if (btree_node_parent(path, b) != -+ btree_node_parent(sib_path, m)) { -+ b->sib_u64s[sib] = U16_MAX; -+ goto out; -+ } -+ -+ if (sib == btree_prev_sib) { -+ prev = m; -+ next = b; -+ } else { -+ prev = b; -+ next = m; -+ } -+ -+ if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) { -+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; -+ -+ bch2_bpos_to_text(&buf1, prev->data->max_key); -+ bch2_bpos_to_text(&buf2, next->data->min_key); -+ bch_err(c, -+ "%s(): btree topology error:\n" -+ " prev ends at %s\n" -+ " next starts at %s", -+ __func__, buf1.buf, buf2.buf); -+ printbuf_exit(&buf1); -+ printbuf_exit(&buf2); -+ bch2_topology_error(c); -+ ret = -EIO; -+ goto err; -+ } -+ -+ bch2_bkey_format_init(&new_s); -+ bch2_bkey_format_add_pos(&new_s, prev->data->min_key); -+ __bch2_btree_calc_format(&new_s, prev); -+ __bch2_btree_calc_format(&new_s, next); -+ bch2_bkey_format_add_pos(&new_s, next->data->max_key); -+ new_f = bch2_bkey_format_done(&new_s); -+ -+ sib_u64s = btree_node_u64s_with_format(b, &new_f) + -+ btree_node_u64s_with_format(m, &new_f); -+ -+ if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) { -+ sib_u64s -= BTREE_FOREGROUND_MERGE_HYSTERESIS(c); -+ sib_u64s /= 2; -+ sib_u64s += BTREE_FOREGROUND_MERGE_HYSTERESIS(c); -+ } -+ -+ sib_u64s = min(sib_u64s, btree_max_u64s(c)); -+ sib_u64s = min(sib_u64s, (size_t) U16_MAX - 1); -+ b->sib_u64s[sib] = sib_u64s; -+ -+ if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold) -+ goto out; -+ -+ parent = btree_node_parent(path, b); -+ as = bch2_btree_update_start(trans, path, level, false, -+ BTREE_INSERT_NOFAIL|flags); -+ ret = PTR_ERR_OR_ZERO(as); -+ if (ret) -+ goto err; -+ -+ trace_and_count(c, btree_node_merge, c, b); -+ -+ bch2_btree_interior_update_will_free_node(as, b); -+ bch2_btree_interior_update_will_free_node(as, m); -+ -+ n = bch2_btree_node_alloc(as, trans, b->c.level); -+ -+ SET_BTREE_NODE_SEQ(n->data, -+ max(BTREE_NODE_SEQ(b->data), -+ BTREE_NODE_SEQ(m->data)) + 1); -+ -+ btree_set_min(n, prev->data->min_key); -+ btree_set_max(n, next->data->max_key); -+ -+ n->data->format = new_f; -+ btree_node_set_format(n, new_f); -+ -+ bch2_btree_sort_into(c, n, prev); -+ bch2_btree_sort_into(c, n, next); -+ -+ bch2_btree_build_aux_trees(n); -+ bch2_btree_update_add_new_node(as, n); -+ six_unlock_write(&n->c.lock); -+ -+ new_path = get_unlocked_mut_path(trans, path->btree_id, n->c.level, n->key.k.p); -+ six_lock_increment(&n->c.lock, SIX_LOCK_intent); -+ mark_btree_node_locked(trans, new_path, n->c.level, BTREE_NODE_INTENT_LOCKED); -+ bch2_btree_path_level_init(trans, new_path, n); -+ -+ bkey_init(&delete.k); -+ delete.k.p = prev->key.k.p; -+ bch2_keylist_add(&as->parent_keys, &delete); -+ bch2_keylist_add(&as->parent_keys, &n->key); -+ -+ bch2_trans_verify_paths(trans); -+ -+ ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); -+ if (ret) -+ goto err_free_update; -+ -+ bch2_trans_verify_paths(trans); -+ -+ bch2_btree_update_get_open_buckets(as, n); -+ bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); -+ -+ bch2_btree_node_free_inmem(trans, path, b); -+ bch2_btree_node_free_inmem(trans, sib_path, m); -+ -+ bch2_trans_node_add(trans, n); -+ -+ bch2_trans_verify_paths(trans); -+ -+ six_unlock_intent(&n->c.lock); -+ -+ bch2_btree_update_done(as, trans); -+ -+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time); -+out: -+err: -+ if (new_path) -+ bch2_path_put(trans, new_path, true); -+ bch2_path_put(trans, sib_path, true); -+ bch2_trans_verify_locks(trans); -+ return ret; -+err_free_update: -+ bch2_btree_node_free_never_used(as, trans, n); -+ bch2_btree_update_free(as, trans); -+ goto out; -+} -+ -+int bch2_btree_node_rewrite(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct btree *b, -+ unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_path *new_path = NULL; -+ struct btree *n, *parent; -+ struct btree_update *as; -+ int ret; -+ -+ flags |= BTREE_INSERT_NOFAIL; -+ -+ parent = btree_node_parent(iter->path, b); -+ as = bch2_btree_update_start(trans, iter->path, b->c.level, -+ false, flags); -+ ret = PTR_ERR_OR_ZERO(as); -+ if (ret) -+ goto out; -+ -+ bch2_btree_interior_update_will_free_node(as, b); -+ -+ n = bch2_btree_node_alloc_replacement(as, trans, b); -+ -+ bch2_btree_build_aux_trees(n); -+ bch2_btree_update_add_new_node(as, n); -+ six_unlock_write(&n->c.lock); -+ -+ new_path = get_unlocked_mut_path(trans, iter->btree_id, n->c.level, n->key.k.p); -+ six_lock_increment(&n->c.lock, SIX_LOCK_intent); -+ mark_btree_node_locked(trans, new_path, n->c.level, BTREE_NODE_INTENT_LOCKED); -+ bch2_btree_path_level_init(trans, new_path, n); -+ -+ trace_and_count(c, btree_node_rewrite, c, b); -+ -+ if (parent) { -+ bch2_keylist_add(&as->parent_keys, &n->key); -+ ret = bch2_btree_insert_node(as, trans, iter->path, parent, -+ &as->parent_keys, flags); -+ if (ret) -+ goto err; -+ } else { -+ bch2_btree_set_root(as, trans, iter->path, n); -+ } -+ -+ bch2_btree_update_get_open_buckets(as, n); -+ bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); -+ -+ bch2_btree_node_free_inmem(trans, iter->path, b); -+ -+ bch2_trans_node_add(trans, n); -+ six_unlock_intent(&n->c.lock); -+ -+ bch2_btree_update_done(as, trans); -+out: -+ if (new_path) -+ bch2_path_put(trans, new_path, true); -+ bch2_trans_downgrade(trans); -+ return ret; -+err: -+ bch2_btree_node_free_never_used(as, trans, n); -+ bch2_btree_update_free(as, trans); -+ goto out; -+} -+ -+struct async_btree_rewrite { -+ struct bch_fs *c; -+ struct work_struct work; -+ struct list_head list; -+ enum btree_id btree_id; -+ unsigned level; -+ struct bpos pos; -+ __le64 seq; -+}; -+ -+static int async_btree_node_rewrite_trans(struct btree_trans *trans, -+ struct async_btree_rewrite *a) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct btree *b; -+ int ret; -+ -+ bch2_trans_node_iter_init(trans, &iter, a->btree_id, a->pos, -+ BTREE_MAX_DEPTH, a->level, 0); -+ b = bch2_btree_iter_peek_node(&iter); -+ ret = PTR_ERR_OR_ZERO(b); -+ if (ret) -+ goto out; -+ -+ if (!b || b->data->keys.seq != a->seq) { -+ struct printbuf buf = PRINTBUF; -+ -+ if (b) -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); -+ else -+ prt_str(&buf, "(null"); -+ bch_info(c, "%s: node to rewrite not found:, searching for seq %llu, got\n%s", -+ __func__, a->seq, buf.buf); -+ printbuf_exit(&buf); -+ goto out; -+ } -+ -+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0); -+out: -+ bch2_trans_iter_exit(trans, &iter); -+ -+ return ret; -+} -+ -+static void async_btree_node_rewrite_work(struct work_struct *work) -+{ -+ struct async_btree_rewrite *a = -+ container_of(work, struct async_btree_rewrite, work); -+ struct bch_fs *c = a->c; -+ int ret; -+ -+ ret = bch2_trans_do(c, NULL, NULL, 0, -+ async_btree_node_rewrite_trans(trans, a)); -+ if (ret) -+ bch_err_fn(c, ret); -+ bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite); -+ kfree(a); -+} -+ -+void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) -+{ -+ struct async_btree_rewrite *a; -+ int ret; -+ -+ a = kmalloc(sizeof(*a), GFP_NOFS); -+ if (!a) { -+ bch_err(c, "%s: error allocating memory", __func__); -+ return; -+ } -+ -+ a->c = c; -+ a->btree_id = b->c.btree_id; -+ a->level = b->c.level; -+ a->pos = b->key.k.p; -+ a->seq = b->data->keys.seq; -+ INIT_WORK(&a->work, async_btree_node_rewrite_work); -+ -+ if (unlikely(!test_bit(BCH_FS_MAY_GO_RW, &c->flags))) { -+ mutex_lock(&c->pending_node_rewrites_lock); -+ list_add(&a->list, &c->pending_node_rewrites); -+ mutex_unlock(&c->pending_node_rewrites_lock); -+ return; -+ } -+ -+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { -+ if (test_bit(BCH_FS_STARTED, &c->flags)) { -+ bch_err(c, "%s: error getting c->writes ref", __func__); -+ kfree(a); -+ return; -+ } -+ -+ ret = bch2_fs_read_write_early(c); -+ if (ret) { -+ bch_err_msg(c, ret, "going read-write"); -+ kfree(a); -+ return; -+ } -+ -+ bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); -+ } -+ -+ queue_work(c->btree_interior_update_worker, &a->work); -+} -+ -+void bch2_do_pending_node_rewrites(struct bch_fs *c) -+{ -+ struct async_btree_rewrite *a, *n; -+ -+ mutex_lock(&c->pending_node_rewrites_lock); -+ list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) { -+ list_del(&a->list); -+ -+ bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); -+ queue_work(c->btree_interior_update_worker, &a->work); -+ } -+ mutex_unlock(&c->pending_node_rewrites_lock); -+} -+ -+void bch2_free_pending_node_rewrites(struct bch_fs *c) -+{ -+ struct async_btree_rewrite *a, *n; -+ -+ mutex_lock(&c->pending_node_rewrites_lock); -+ list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) { -+ list_del(&a->list); -+ -+ kfree(a); -+ } -+ mutex_unlock(&c->pending_node_rewrites_lock); -+} -+ -+static int __bch2_btree_node_update_key(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct btree *b, struct btree *new_hash, -+ struct bkey_i *new_key, -+ unsigned commit_flags, -+ bool skip_triggers) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter2 = { NULL }; -+ struct btree *parent; -+ int ret; -+ -+ if (!skip_triggers) { -+ ret = bch2_trans_mark_old(trans, b->c.btree_id, b->c.level + 1, -+ bkey_i_to_s_c(&b->key), 0); -+ if (ret) -+ return ret; -+ -+ ret = bch2_trans_mark_new(trans, b->c.btree_id, b->c.level + 1, -+ new_key, 0); -+ if (ret) -+ return ret; -+ } -+ -+ if (new_hash) { -+ bkey_copy(&new_hash->key, new_key); -+ ret = bch2_btree_node_hash_insert(&c->btree_cache, -+ new_hash, b->c.level, b->c.btree_id); -+ BUG_ON(ret); -+ } -+ -+ parent = btree_node_parent(iter->path, b); -+ if (parent) { -+ bch2_trans_copy_iter(&iter2, iter); -+ -+ iter2.path = bch2_btree_path_make_mut(trans, iter2.path, -+ iter2.flags & BTREE_ITER_INTENT, -+ _THIS_IP_); -+ -+ BUG_ON(iter2.path->level != b->c.level); -+ BUG_ON(!bpos_eq(iter2.path->pos, new_key->k.p)); -+ -+ btree_path_set_level_up(trans, iter2.path); -+ -+ trans->paths_sorted = false; -+ -+ ret = bch2_btree_iter_traverse(&iter2) ?: -+ bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_NORUN); -+ if (ret) -+ goto err; -+ } else { -+ BUG_ON(btree_node_root(c, b) != b); -+ -+ ret = darray_make_room(&trans->extra_journal_entries, -+ jset_u64s(new_key->k.u64s)); -+ if (ret) -+ return ret; -+ -+ journal_entry_set((void *) &darray_top(trans->extra_journal_entries), -+ BCH_JSET_ENTRY_btree_root, -+ b->c.btree_id, b->c.level, -+ new_key, new_key->k.u64s); -+ trans->extra_journal_entries.nr += jset_u64s(new_key->k.u64s); -+ } -+ -+ ret = bch2_trans_commit(trans, NULL, NULL, commit_flags); -+ if (ret) -+ goto err; -+ -+ bch2_btree_node_lock_write_nofail(trans, iter->path, &b->c); -+ -+ if (new_hash) { -+ mutex_lock(&c->btree_cache.lock); -+ bch2_btree_node_hash_remove(&c->btree_cache, new_hash); -+ bch2_btree_node_hash_remove(&c->btree_cache, b); -+ -+ bkey_copy(&b->key, new_key); -+ ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); -+ BUG_ON(ret); -+ mutex_unlock(&c->btree_cache.lock); -+ } else { -+ bkey_copy(&b->key, new_key); -+ } -+ -+ bch2_btree_node_unlock_write(trans, iter->path, b); -+out: -+ bch2_trans_iter_exit(trans, &iter2); -+ return ret; -+err: -+ if (new_hash) { -+ mutex_lock(&c->btree_cache.lock); -+ bch2_btree_node_hash_remove(&c->btree_cache, b); -+ mutex_unlock(&c->btree_cache.lock); -+ } -+ goto out; -+} -+ -+int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *iter, -+ struct btree *b, struct bkey_i *new_key, -+ unsigned commit_flags, bool skip_triggers) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree *new_hash = NULL; -+ struct btree_path *path = iter->path; -+ struct closure cl; -+ int ret = 0; -+ -+ ret = bch2_btree_path_upgrade(trans, path, b->c.level + 1); -+ if (ret) -+ return ret; -+ -+ closure_init_stack(&cl); -+ -+ /* -+ * check btree_ptr_hash_val() after @b is locked by -+ * btree_iter_traverse(): -+ */ -+ if (btree_ptr_hash_val(new_key) != b->hash_val) { -+ ret = bch2_btree_cache_cannibalize_lock(c, &cl); -+ if (ret) { -+ ret = drop_locks_do(trans, (closure_sync(&cl), 0)); -+ if (ret) -+ return ret; -+ } -+ -+ new_hash = bch2_btree_node_mem_alloc(trans, false); -+ } -+ -+ path->intent_ref++; -+ ret = __bch2_btree_node_update_key(trans, iter, b, new_hash, new_key, -+ commit_flags, skip_triggers); -+ --path->intent_ref; -+ -+ if (new_hash) { -+ mutex_lock(&c->btree_cache.lock); -+ list_move(&new_hash->list, &c->btree_cache.freeable); -+ mutex_unlock(&c->btree_cache.lock); -+ -+ six_unlock_write(&new_hash->c.lock); -+ six_unlock_intent(&new_hash->c.lock); -+ } -+ closure_sync(&cl); -+ bch2_btree_cache_cannibalize_unlock(c); -+ return ret; -+} -+ -+int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, -+ struct btree *b, struct bkey_i *new_key, -+ unsigned commit_flags, bool skip_triggers) -+{ -+ struct btree_iter iter; -+ int ret; -+ -+ bch2_trans_node_iter_init(trans, &iter, b->c.btree_id, b->key.k.p, -+ BTREE_MAX_DEPTH, b->c.level, -+ BTREE_ITER_INTENT); -+ ret = bch2_btree_iter_traverse(&iter); -+ if (ret) -+ goto out; -+ -+ /* has node been freed? */ -+ if (iter.path->l[b->c.level].b != b) { -+ /* node has been freed: */ -+ BUG_ON(!btree_node_dying(b)); -+ goto out; -+ } -+ -+ BUG_ON(!btree_node_hashed(b)); -+ -+ ret = bch2_btree_node_update_key(trans, &iter, b, new_key, -+ commit_flags, skip_triggers); -+out: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+/* Init code: */ -+ -+/* -+ * Only for filesystem bringup, when first reading the btree roots or allocating -+ * btree roots when initializing a new filesystem: -+ */ -+void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b) -+{ -+ BUG_ON(btree_node_root(c, b)); -+ -+ bch2_btree_set_root_inmem(c, b); -+} -+ -+static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id) -+{ -+ struct bch_fs *c = trans->c; -+ struct closure cl; -+ struct btree *b; -+ int ret; -+ -+ closure_init_stack(&cl); -+ -+ do { -+ ret = bch2_btree_cache_cannibalize_lock(c, &cl); -+ closure_sync(&cl); -+ } while (ret); -+ -+ b = bch2_btree_node_mem_alloc(trans, false); -+ bch2_btree_cache_cannibalize_unlock(c); -+ -+ set_btree_node_fake(b); -+ set_btree_node_need_rewrite(b); -+ b->c.level = 0; -+ b->c.btree_id = id; -+ -+ bkey_btree_ptr_init(&b->key); -+ b->key.k.p = SPOS_MAX; -+ *((u64 *) bkey_i_to_btree_ptr(&b->key)->v.start) = U64_MAX - id; -+ -+ bch2_bset_init_first(b, &b->data->keys); -+ bch2_btree_build_aux_trees(b); -+ -+ b->data->flags = 0; -+ btree_set_min(b, POS_MIN); -+ btree_set_max(b, SPOS_MAX); -+ b->data->format = bch2_btree_calc_format(b); -+ btree_node_set_format(b, b->data->format); -+ -+ ret = bch2_btree_node_hash_insert(&c->btree_cache, b, -+ b->c.level, b->c.btree_id); -+ BUG_ON(ret); -+ -+ bch2_btree_set_root_inmem(c, b); -+ -+ six_unlock_write(&b->c.lock); -+ six_unlock_intent(&b->c.lock); -+ return 0; -+} -+ -+void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) -+{ -+ bch2_trans_run(c, __bch2_btree_root_alloc(trans, id)); -+} -+ -+void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ struct btree_update *as; -+ -+ mutex_lock(&c->btree_interior_update_lock); -+ list_for_each_entry(as, &c->btree_interior_update_list, list) -+ prt_printf(out, "%p m %u w %u r %u j %llu\n", -+ as, -+ as->mode, -+ as->nodes_written, -+ closure_nr_remaining(&as->cl), -+ as->journal.seq); -+ mutex_unlock(&c->btree_interior_update_lock); -+} -+ -+static bool bch2_btree_interior_updates_pending(struct bch_fs *c) -+{ -+ bool ret; -+ -+ mutex_lock(&c->btree_interior_update_lock); -+ ret = !list_empty(&c->btree_interior_update_list); -+ mutex_unlock(&c->btree_interior_update_lock); -+ -+ return ret; -+} -+ -+bool bch2_btree_interior_updates_flush(struct bch_fs *c) -+{ -+ bool ret = bch2_btree_interior_updates_pending(c); -+ -+ if (ret) -+ closure_wait_event(&c->btree_interior_update_wait, -+ !bch2_btree_interior_updates_pending(c)); -+ return ret; -+} -+ -+void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry) -+{ -+ struct btree_root *r = bch2_btree_id_root(c, entry->btree_id); -+ -+ mutex_lock(&c->btree_root_lock); -+ -+ r->level = entry->level; -+ r->alive = true; -+ bkey_copy(&r->key, (struct bkey_i *) entry->start); -+ -+ mutex_unlock(&c->btree_root_lock); -+} -+ -+struct jset_entry * -+bch2_btree_roots_to_journal_entries(struct bch_fs *c, -+ struct jset_entry *end, -+ unsigned long skip) -+{ -+ unsigned i; -+ -+ mutex_lock(&c->btree_root_lock); -+ -+ for (i = 0; i < btree_id_nr_alive(c); i++) { -+ struct btree_root *r = bch2_btree_id_root(c, i); -+ -+ if (r->alive && !test_bit(i, &skip)) { -+ journal_entry_set(end, BCH_JSET_ENTRY_btree_root, -+ i, r->level, &r->key, r->key.k.u64s); -+ end = vstruct_next(end); -+ } -+ } -+ -+ mutex_unlock(&c->btree_root_lock); -+ -+ return end; -+} -+ -+void bch2_fs_btree_interior_update_exit(struct bch_fs *c) -+{ -+ if (c->btree_interior_update_worker) -+ destroy_workqueue(c->btree_interior_update_worker); -+ mempool_exit(&c->btree_interior_update_pool); -+} -+ -+void bch2_fs_btree_interior_update_init_early(struct bch_fs *c) -+{ -+ mutex_init(&c->btree_reserve_cache_lock); -+ INIT_LIST_HEAD(&c->btree_interior_update_list); -+ INIT_LIST_HEAD(&c->btree_interior_updates_unwritten); -+ mutex_init(&c->btree_interior_update_lock); -+ INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work); -+ -+ INIT_LIST_HEAD(&c->pending_node_rewrites); -+ mutex_init(&c->pending_node_rewrites_lock); -+} -+ -+int bch2_fs_btree_interior_update_init(struct bch_fs *c) -+{ -+ c->btree_interior_update_worker = -+ alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 1); -+ if (!c->btree_interior_update_worker) -+ return -BCH_ERR_ENOMEM_btree_interior_update_worker_init; -+ -+ if (mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1, -+ sizeof(struct btree_update))) -+ return -BCH_ERR_ENOMEM_btree_interior_update_pool_init; -+ -+ return 0; -+} -diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h -new file mode 100644 -index 000000000000..4df21512d640 ---- /dev/null -+++ b/fs/bcachefs/btree_update_interior.h -@@ -0,0 +1,337 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BTREE_UPDATE_INTERIOR_H -+#define _BCACHEFS_BTREE_UPDATE_INTERIOR_H -+ -+#include "btree_cache.h" -+#include "btree_locking.h" -+#include "btree_update.h" -+ -+void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *); -+bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *, -+ struct bkey_format *); -+ -+#define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES) -+ -+#define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1)) -+ -+/* -+ * Tracks an in progress split/rewrite of a btree node and the update to the -+ * parent node: -+ * -+ * When we split/rewrite a node, we do all the updates in memory without -+ * waiting for any writes to complete - we allocate the new node(s) and update -+ * the parent node, possibly recursively up to the root. -+ * -+ * The end result is that we have one or more new nodes being written - -+ * possibly several, if there were multiple splits - and then a write (updating -+ * an interior node) which will make all these new nodes visible. -+ * -+ * Additionally, as we split/rewrite nodes we free the old nodes - but the old -+ * nodes can't be freed (their space on disk can't be reclaimed) until the -+ * update to the interior node that makes the new node visible completes - -+ * until then, the old nodes are still reachable on disk. -+ * -+ */ -+struct btree_update { -+ struct closure cl; -+ struct bch_fs *c; -+ u64 start_time; -+ -+ struct list_head list; -+ struct list_head unwritten_list; -+ -+ /* What kind of update are we doing? */ -+ enum { -+ BTREE_INTERIOR_NO_UPDATE, -+ BTREE_INTERIOR_UPDATING_NODE, -+ BTREE_INTERIOR_UPDATING_ROOT, -+ BTREE_INTERIOR_UPDATING_AS, -+ } mode; -+ -+ unsigned nodes_written:1; -+ unsigned took_gc_lock:1; -+ -+ enum btree_id btree_id; -+ unsigned update_level; -+ -+ struct disk_reservation disk_res; -+ struct journal_preres journal_preres; -+ -+ /* -+ * BTREE_INTERIOR_UPDATING_NODE: -+ * The update that made the new nodes visible was a regular update to an -+ * existing interior node - @b. We can't write out the update to @b -+ * until the new nodes we created are finished writing, so we block @b -+ * from writing by putting this btree_interior update on the -+ * @b->write_blocked list with @write_blocked_list: -+ */ -+ struct btree *b; -+ struct list_head write_blocked_list; -+ -+ /* -+ * We may be freeing nodes that were dirty, and thus had journal entries -+ * pinned: we need to transfer the oldest of those pins to the -+ * btree_update operation, and release it when the new node(s) -+ * are all persistent and reachable: -+ */ -+ struct journal_entry_pin journal; -+ -+ /* Preallocated nodes we reserve when we start the update: */ -+ struct prealloc_nodes { -+ struct btree *b[BTREE_UPDATE_NODES_MAX]; -+ unsigned nr; -+ } prealloc_nodes[2]; -+ -+ /* Nodes being freed: */ -+ struct keylist old_keys; -+ u64 _old_keys[BTREE_UPDATE_NODES_MAX * -+ BKEY_BTREE_PTR_U64s_MAX]; -+ -+ /* Nodes being added: */ -+ struct keylist new_keys; -+ u64 _new_keys[BTREE_UPDATE_NODES_MAX * -+ BKEY_BTREE_PTR_U64s_MAX]; -+ -+ /* New nodes, that will be made reachable by this update: */ -+ struct btree *new_nodes[BTREE_UPDATE_NODES_MAX]; -+ unsigned nr_new_nodes; -+ -+ struct btree *old_nodes[BTREE_UPDATE_NODES_MAX]; -+ __le64 old_nodes_seq[BTREE_UPDATE_NODES_MAX]; -+ unsigned nr_old_nodes; -+ -+ open_bucket_idx_t open_buckets[BTREE_UPDATE_NODES_MAX * -+ BCH_REPLICAS_MAX]; -+ open_bucket_idx_t nr_open_buckets; -+ -+ unsigned journal_u64s; -+ u64 journal_entries[BTREE_UPDATE_JOURNAL_RES]; -+ -+ /* Only here to reduce stack usage on recursive splits: */ -+ struct keylist parent_keys; -+ /* -+ * Enough room for btree_split's keys without realloc - btree node -+ * pointers never have crc/compression info, so we only need to acount -+ * for the pointers for three keys -+ */ -+ u64 inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3]; -+}; -+ -+struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *, -+ struct btree_trans *, -+ struct btree *, -+ struct bkey_format); -+ -+int bch2_btree_split_leaf(struct btree_trans *, struct btree_path *, unsigned); -+ -+int __bch2_foreground_maybe_merge(struct btree_trans *, struct btree_path *, -+ unsigned, unsigned, enum btree_node_sibling); -+ -+static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned level, unsigned flags, -+ enum btree_node_sibling sib) -+{ -+ struct btree *b; -+ -+ EBUG_ON(!btree_node_locked(path, level)); -+ -+ b = path->l[level].b; -+ if (b->sib_u64s[sib] > trans->c->btree_foreground_merge_threshold) -+ return 0; -+ -+ return __bch2_foreground_maybe_merge(trans, path, level, flags, sib); -+} -+ -+static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, -+ struct btree_path *path, -+ unsigned level, -+ unsigned flags) -+{ -+ return bch2_foreground_maybe_merge_sibling(trans, path, level, flags, -+ btree_prev_sib) ?: -+ bch2_foreground_maybe_merge_sibling(trans, path, level, flags, -+ btree_next_sib); -+} -+ -+int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *, -+ struct btree *, unsigned); -+void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *); -+int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *, -+ struct btree *, struct bkey_i *, -+ unsigned, bool); -+int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *, -+ struct bkey_i *, unsigned, bool); -+ -+void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *); -+void bch2_btree_root_alloc(struct bch_fs *, enum btree_id); -+ -+static inline unsigned btree_update_reserve_required(struct bch_fs *c, -+ struct btree *b) -+{ -+ unsigned depth = btree_node_root(c, b)->c.level + 1; -+ -+ /* -+ * Number of nodes we might have to allocate in a worst case btree -+ * split operation - we split all the way up to the root, then allocate -+ * a new root, unless we're already at max depth: -+ */ -+ if (depth < BTREE_MAX_DEPTH) -+ return (depth - b->c.level) * 2 + 1; -+ else -+ return (depth - b->c.level) * 2 - 1; -+} -+ -+static inline void btree_node_reset_sib_u64s(struct btree *b) -+{ -+ b->sib_u64s[0] = b->nr.live_u64s; -+ b->sib_u64s[1] = b->nr.live_u64s; -+} -+ -+static inline void *btree_data_end(struct bch_fs *c, struct btree *b) -+{ -+ return (void *) b->data + btree_bytes(c); -+} -+ -+static inline struct bkey_packed *unwritten_whiteouts_start(struct bch_fs *c, -+ struct btree *b) -+{ -+ return (void *) ((u64 *) btree_data_end(c, b) - b->whiteout_u64s); -+} -+ -+static inline struct bkey_packed *unwritten_whiteouts_end(struct bch_fs *c, -+ struct btree *b) -+{ -+ return btree_data_end(c, b); -+} -+ -+static inline void *write_block(struct btree *b) -+{ -+ return (void *) b->data + (b->written << 9); -+} -+ -+static inline bool __btree_addr_written(struct btree *b, void *p) -+{ -+ return p < write_block(b); -+} -+ -+static inline bool bset_written(struct btree *b, struct bset *i) -+{ -+ return __btree_addr_written(b, i); -+} -+ -+static inline bool bkey_written(struct btree *b, struct bkey_packed *k) -+{ -+ return __btree_addr_written(b, k); -+} -+ -+static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c, -+ struct btree *b, -+ void *end) -+{ -+ ssize_t used = bset_byte_offset(b, end) / sizeof(u64) + -+ b->whiteout_u64s; -+ ssize_t total = c->opts.btree_node_size >> 3; -+ -+ /* Always leave one extra u64 for bch2_varint_decode: */ -+ used++; -+ -+ return total - used; -+} -+ -+static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c, -+ struct btree *b) -+{ -+ ssize_t remaining = __bch_btree_u64s_remaining(c, b, -+ btree_bkey_last(b, bset_tree_last(b))); -+ -+ BUG_ON(remaining < 0); -+ -+ if (bset_written(b, btree_bset_last(b))) -+ return 0; -+ -+ return remaining; -+} -+ -+#define BTREE_WRITE_SET_U64s_BITS 9 -+ -+static inline unsigned btree_write_set_buffer(struct btree *b) -+{ -+ /* -+ * Could buffer up larger amounts of keys for btrees with larger keys, -+ * pending benchmarking: -+ */ -+ return 8 << BTREE_WRITE_SET_U64s_BITS; -+} -+ -+static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, -+ struct btree *b) -+{ -+ struct bset_tree *t = bset_tree_last(b); -+ struct btree_node_entry *bne = max(write_block(b), -+ (void *) btree_bkey_last(b, bset_tree_last(b))); -+ ssize_t remaining_space = -+ __bch_btree_u64s_remaining(c, b, bne->keys.start); -+ -+ if (unlikely(bset_written(b, bset(b, t)))) { -+ if (remaining_space > (ssize_t) (block_bytes(c) >> 3)) -+ return bne; -+ } else { -+ if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) && -+ remaining_space > (ssize_t) (btree_write_set_buffer(b) >> 3)) -+ return bne; -+ } -+ -+ return NULL; -+} -+ -+static inline void push_whiteout(struct bch_fs *c, struct btree *b, -+ struct bpos pos) -+{ -+ struct bkey_packed k; -+ -+ BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s); -+ EBUG_ON(btree_node_just_written(b)); -+ -+ if (!bkey_pack_pos(&k, pos, b)) { -+ struct bkey *u = (void *) &k; -+ -+ bkey_init(u); -+ u->p = pos; -+ } -+ -+ k.needs_whiteout = true; -+ -+ b->whiteout_u64s += k.u64s; -+ bkey_p_copy(unwritten_whiteouts_start(c, b), &k); -+} -+ -+/* -+ * write lock must be held on @b (else the dirty bset that we were going to -+ * insert into could be written out from under us) -+ */ -+static inline bool bch2_btree_node_insert_fits(struct bch_fs *c, -+ struct btree *b, unsigned u64s) -+{ -+ if (unlikely(btree_node_need_rewrite(b))) -+ return false; -+ -+ return u64s <= bch_btree_keys_u64s_remaining(c, b); -+} -+ -+void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *); -+ -+bool bch2_btree_interior_updates_flush(struct bch_fs *); -+ -+void bch2_journal_entry_to_btree_root(struct bch_fs *, struct jset_entry *); -+struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *, -+ struct jset_entry *, unsigned long); -+ -+void bch2_do_pending_node_rewrites(struct bch_fs *); -+void bch2_free_pending_node_rewrites(struct bch_fs *); -+ -+void bch2_fs_btree_interior_update_exit(struct bch_fs *); -+void bch2_fs_btree_interior_update_init_early(struct bch_fs *); -+int bch2_fs_btree_interior_update_init(struct bch_fs *); -+ -+#endif /* _BCACHEFS_BTREE_UPDATE_INTERIOR_H */ -diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c -new file mode 100644 -index 000000000000..4e6241db518b ---- /dev/null -+++ b/fs/bcachefs/btree_write_buffer.c -@@ -0,0 +1,375 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "btree_locking.h" -+#include "btree_update.h" -+#include "btree_update_interior.h" -+#include "btree_write_buffer.h" -+#include "error.h" -+#include "journal.h" -+#include "journal_reclaim.h" -+ -+#include -+ -+static int btree_write_buffered_key_cmp(const void *_l, const void *_r) -+{ -+ const struct btree_write_buffered_key *l = _l; -+ const struct btree_write_buffered_key *r = _r; -+ -+ return cmp_int(l->btree, r->btree) ?: -+ bpos_cmp(l->k.k.p, r->k.k.p) ?: -+ cmp_int(l->journal_seq, r->journal_seq) ?: -+ cmp_int(l->journal_offset, r->journal_offset); -+} -+ -+static int btree_write_buffered_journal_cmp(const void *_l, const void *_r) -+{ -+ const struct btree_write_buffered_key *l = _l; -+ const struct btree_write_buffered_key *r = _r; -+ -+ return cmp_int(l->journal_seq, r->journal_seq); -+} -+ -+static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct btree_write_buffered_key *wb, -+ unsigned commit_flags, -+ bool *write_locked, -+ size_t *fast) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_path *path; -+ int ret; -+ -+ ret = bch2_btree_iter_traverse(iter); -+ if (ret) -+ return ret; -+ -+ path = iter->path; -+ -+ if (!*write_locked) { -+ ret = bch2_btree_node_lock_write(trans, path, &path->l[0].b->c); -+ if (ret) -+ return ret; -+ -+ bch2_btree_node_prep_for_write(trans, path, path->l[0].b); -+ *write_locked = true; -+ } -+ -+ if (!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s)) { -+ bch2_btree_node_unlock_write(trans, path, path->l[0].b); -+ *write_locked = false; -+ goto trans_commit; -+ } -+ -+ bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq); -+ (*fast)++; -+ -+ if (path->ref > 1) { -+ /* -+ * We can't clone a path that has write locks: if the path is -+ * shared, unlock before set_pos(), traverse(): -+ */ -+ bch2_btree_node_unlock_write(trans, path, path->l[0].b); -+ *write_locked = false; -+ } -+ return 0; -+trans_commit: -+ return bch2_trans_update_seq(trans, wb->journal_seq, iter, &wb->k, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ commit_flags| -+ BTREE_INSERT_NOCHECK_RW| -+ BTREE_INSERT_NOFAIL| -+ BTREE_INSERT_JOURNAL_RECLAIM); -+} -+ -+static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb) -+{ -+ union btree_write_buffer_state old, new; -+ u64 v = READ_ONCE(wb->state.v); -+ -+ do { -+ old.v = new.v = v; -+ -+ new.nr = 0; -+ new.idx++; -+ } while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v); -+ -+ while (old.idx == 0 ? wb->state.ref0 : wb->state.ref1) -+ cpu_relax(); -+ -+ smp_mb(); -+ -+ return old; -+} -+ -+/* -+ * Update a btree with a write buffered key using the journal seq of the -+ * original write buffer insert. -+ * -+ * It is not safe to rejournal the key once it has been inserted into the write -+ * buffer because that may break recovery ordering. For example, the key may -+ * have already been modified in the active write buffer in a seq that comes -+ * before the current transaction. If we were to journal this key again and -+ * crash, recovery would process updates in the wrong order. -+ */ -+static int -+btree_write_buffered_insert(struct btree_trans *trans, -+ struct btree_write_buffered_key *wb) -+{ -+ struct btree_iter iter; -+ int ret; -+ -+ bch2_trans_iter_init(trans, &iter, wb->btree, bkey_start_pos(&wb->k.k), -+ BTREE_ITER_CACHED|BTREE_ITER_INTENT); -+ -+ ret = bch2_btree_iter_traverse(&iter) ?: -+ bch2_trans_update_seq(trans, wb->journal_seq, &iter, &wb->k, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int __bch2_btree_write_buffer_flush(struct btree_trans *trans, unsigned commit_flags, -+ bool locked) -+{ -+ struct bch_fs *c = trans->c; -+ struct journal *j = &c->journal; -+ struct btree_write_buffer *wb = &c->btree_write_buffer; -+ struct journal_entry_pin pin; -+ struct btree_write_buffered_key *i, *keys; -+ struct btree_iter iter = { NULL }; -+ size_t nr = 0, skipped = 0, fast = 0, slowpath = 0; -+ bool write_locked = false; -+ union btree_write_buffer_state s; -+ int ret = 0; -+ -+ memset(&pin, 0, sizeof(pin)); -+ -+ if (!locked && !mutex_trylock(&wb->flush_lock)) -+ return 0; -+ -+ bch2_journal_pin_copy(j, &pin, &wb->journal_pin, NULL); -+ bch2_journal_pin_drop(j, &wb->journal_pin); -+ -+ s = btree_write_buffer_switch(wb); -+ keys = wb->keys[s.idx]; -+ nr = s.nr; -+ -+ if (race_fault()) -+ goto slowpath; -+ -+ /* -+ * We first sort so that we can detect and skip redundant updates, and -+ * then we attempt to flush in sorted btree order, as this is most -+ * efficient. -+ * -+ * However, since we're not flushing in the order they appear in the -+ * journal we won't be able to drop our journal pin until everything is -+ * flushed - which means this could deadlock the journal if we weren't -+ * passing BTREE_INSERT_JOURNAL_RECLAIM. This causes the update to fail -+ * if it would block taking a journal reservation. -+ * -+ * If that happens, simply skip the key so we can optimistically insert -+ * as many keys as possible in the fast path. -+ */ -+ sort(keys, nr, sizeof(keys[0]), -+ btree_write_buffered_key_cmp, NULL); -+ -+ for (i = keys; i < keys + nr; i++) { -+ if (i + 1 < keys + nr && -+ i[0].btree == i[1].btree && -+ bpos_eq(i[0].k.k.p, i[1].k.k.p)) { -+ skipped++; -+ i->journal_seq = 0; -+ continue; -+ } -+ -+ if (write_locked && -+ (iter.path->btree_id != i->btree || -+ bpos_gt(i->k.k.p, iter.path->l[0].b->key.k.p))) { -+ bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b); -+ write_locked = false; -+ } -+ -+ if (!iter.path || iter.path->btree_id != i->btree) { -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_iter_init(trans, &iter, i->btree, i->k.k.p, -+ BTREE_ITER_INTENT|BTREE_ITER_ALL_SNAPSHOTS); -+ } -+ -+ bch2_btree_iter_set_pos(&iter, i->k.k.p); -+ iter.path->preserve = false; -+ -+ do { -+ ret = bch2_btree_write_buffer_flush_one(trans, &iter, i, -+ commit_flags, &write_locked, &fast); -+ if (!write_locked) -+ bch2_trans_begin(trans); -+ } while (bch2_err_matches(ret, BCH_ERR_transaction_restart)); -+ -+ if (ret == -BCH_ERR_journal_reclaim_would_deadlock) { -+ slowpath++; -+ continue; -+ } -+ if (ret) -+ break; -+ -+ i->journal_seq = 0; -+ } -+ -+ if (write_locked) -+ bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b); -+ bch2_trans_iter_exit(trans, &iter); -+ -+ trace_write_buffer_flush(trans, nr, skipped, fast, wb->size); -+ -+ if (slowpath) -+ goto slowpath; -+ -+ bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)); -+out: -+ bch2_journal_pin_drop(j, &pin); -+ mutex_unlock(&wb->flush_lock); -+ return ret; -+slowpath: -+ trace_write_buffer_flush_slowpath(trans, i - keys, nr); -+ -+ /* -+ * Now sort the rest by journal seq and bump the journal pin as we go. -+ * The slowpath zapped the seq of keys that were successfully flushed so -+ * we can skip those here. -+ */ -+ sort(keys, nr, sizeof(keys[0]), -+ btree_write_buffered_journal_cmp, -+ NULL); -+ -+ commit_flags &= ~BCH_WATERMARK_MASK; -+ commit_flags |= BCH_WATERMARK_reclaim; -+ -+ for (i = keys; i < keys + nr; i++) { -+ if (!i->journal_seq) -+ continue; -+ -+ if (i->journal_seq > pin.seq) { -+ struct journal_entry_pin pin2; -+ -+ memset(&pin2, 0, sizeof(pin2)); -+ -+ bch2_journal_pin_add(j, i->journal_seq, &pin2, NULL); -+ bch2_journal_pin_drop(j, &pin); -+ bch2_journal_pin_copy(j, &pin, &pin2, NULL); -+ bch2_journal_pin_drop(j, &pin2); -+ } -+ -+ ret = commit_do(trans, NULL, NULL, -+ commit_flags| -+ BTREE_INSERT_NOFAIL| -+ BTREE_INSERT_JOURNAL_RECLAIM, -+ btree_write_buffered_insert(trans, i)); -+ if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret))) -+ break; -+ } -+ -+ goto out; -+} -+ -+int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans) -+{ -+ bch2_trans_unlock(trans); -+ mutex_lock(&trans->c->btree_write_buffer.flush_lock); -+ return __bch2_btree_write_buffer_flush(trans, 0, true); -+} -+ -+int bch2_btree_write_buffer_flush(struct btree_trans *trans) -+{ -+ return __bch2_btree_write_buffer_flush(trans, 0, false); -+} -+ -+static int bch2_btree_write_buffer_journal_flush(struct journal *j, -+ struct journal_entry_pin *_pin, u64 seq) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct btree_write_buffer *wb = &c->btree_write_buffer; -+ -+ mutex_lock(&wb->flush_lock); -+ -+ return bch2_trans_run(c, -+ __bch2_btree_write_buffer_flush(trans, BTREE_INSERT_NOCHECK_RW, true)); -+} -+ -+static inline u64 btree_write_buffer_ref(int idx) -+{ -+ return ((union btree_write_buffer_state) { -+ .ref0 = idx == 0, -+ .ref1 = idx == 1, -+ }).v; -+} -+ -+int bch2_btree_insert_keys_write_buffer(struct btree_trans *trans) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_write_buffer *wb = &c->btree_write_buffer; -+ struct btree_write_buffered_key *i; -+ union btree_write_buffer_state old, new; -+ int ret = 0; -+ u64 v; -+ -+ trans_for_each_wb_update(trans, i) { -+ EBUG_ON(i->k.k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); -+ -+ i->journal_seq = trans->journal_res.seq; -+ i->journal_offset = trans->journal_res.offset; -+ } -+ -+ preempt_disable(); -+ v = READ_ONCE(wb->state.v); -+ do { -+ old.v = new.v = v; -+ -+ new.v += btree_write_buffer_ref(new.idx); -+ new.nr += trans->nr_wb_updates; -+ if (new.nr > wb->size) { -+ ret = -BCH_ERR_btree_insert_need_flush_buffer; -+ goto out; -+ } -+ } while ((v = atomic64_cmpxchg_acquire(&wb->state.counter, old.v, new.v)) != old.v); -+ -+ memcpy(wb->keys[new.idx] + old.nr, -+ trans->wb_updates, -+ sizeof(trans->wb_updates[0]) * trans->nr_wb_updates); -+ -+ bch2_journal_pin_add(&c->journal, trans->journal_res.seq, &wb->journal_pin, -+ bch2_btree_write_buffer_journal_flush); -+ -+ atomic64_sub_return_release(btree_write_buffer_ref(new.idx), &wb->state.counter); -+out: -+ preempt_enable(); -+ return ret; -+} -+ -+void bch2_fs_btree_write_buffer_exit(struct bch_fs *c) -+{ -+ struct btree_write_buffer *wb = &c->btree_write_buffer; -+ -+ BUG_ON(wb->state.nr && !bch2_journal_error(&c->journal)); -+ -+ kvfree(wb->keys[1]); -+ kvfree(wb->keys[0]); -+} -+ -+int bch2_fs_btree_write_buffer_init(struct bch_fs *c) -+{ -+ struct btree_write_buffer *wb = &c->btree_write_buffer; -+ -+ mutex_init(&wb->flush_lock); -+ wb->size = c->opts.btree_write_buffer_size; -+ -+ wb->keys[0] = kvmalloc_array(wb->size, sizeof(*wb->keys[0]), GFP_KERNEL); -+ wb->keys[1] = kvmalloc_array(wb->size, sizeof(*wb->keys[1]), GFP_KERNEL); -+ if (!wb->keys[0] || !wb->keys[1]) -+ return -BCH_ERR_ENOMEM_fs_btree_write_buffer_init; -+ -+ return 0; -+} -diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h -new file mode 100644 -index 000000000000..322df1c8304e ---- /dev/null -+++ b/fs/bcachefs/btree_write_buffer.h -@@ -0,0 +1,14 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_H -+#define _BCACHEFS_BTREE_WRITE_BUFFER_H -+ -+int __bch2_btree_write_buffer_flush(struct btree_trans *, unsigned, bool); -+int bch2_btree_write_buffer_flush_sync(struct btree_trans *); -+int bch2_btree_write_buffer_flush(struct btree_trans *); -+ -+int bch2_btree_insert_keys_write_buffer(struct btree_trans *); -+ -+void bch2_fs_btree_write_buffer_exit(struct bch_fs *); -+int bch2_fs_btree_write_buffer_init(struct bch_fs *); -+ -+#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_H */ -diff --git a/fs/bcachefs/btree_write_buffer_types.h b/fs/bcachefs/btree_write_buffer_types.h -new file mode 100644 -index 000000000000..99993ba77aea ---- /dev/null -+++ b/fs/bcachefs/btree_write_buffer_types.h -@@ -0,0 +1,44 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H -+#define _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H -+ -+#include "journal_types.h" -+ -+#define BTREE_WRITE_BUFERED_VAL_U64s_MAX 4 -+#define BTREE_WRITE_BUFERED_U64s_MAX (BKEY_U64s + BTREE_WRITE_BUFERED_VAL_U64s_MAX) -+ -+struct btree_write_buffered_key { -+ u64 journal_seq; -+ unsigned journal_offset; -+ enum btree_id btree; -+ __BKEY_PADDED(k, BTREE_WRITE_BUFERED_VAL_U64s_MAX); -+}; -+ -+union btree_write_buffer_state { -+ struct { -+ atomic64_t counter; -+ }; -+ -+ struct { -+ u64 v; -+ }; -+ -+ struct { -+ u64 nr:23; -+ u64 idx:1; -+ u64 ref0:20; -+ u64 ref1:20; -+ }; -+}; -+ -+struct btree_write_buffer { -+ struct mutex flush_lock; -+ struct journal_entry_pin journal_pin; -+ -+ union btree_write_buffer_state state; -+ size_t size; -+ -+ struct btree_write_buffered_key *keys[2]; -+}; -+ -+#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_TYPES_H */ -diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c -new file mode 100644 -index 000000000000..58d8c6ffd955 ---- /dev/null -+++ b/fs/bcachefs/buckets.c -@@ -0,0 +1,2168 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Code for manipulating bucket marks for garbage collection. -+ * -+ * Copyright 2014 Datera, Inc. -+ */ -+ -+#include "bcachefs.h" -+#include "alloc_background.h" -+#include "backpointers.h" -+#include "bset.h" -+#include "btree_gc.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "buckets_waiting_for_journal.h" -+#include "ec.h" -+#include "error.h" -+#include "inode.h" -+#include "movinggc.h" -+#include "recovery.h" -+#include "reflink.h" -+#include "replicas.h" -+#include "subvolume.h" -+#include "trace.h" -+ -+#include -+ -+static inline void fs_usage_data_type_to_base(struct bch_fs_usage *fs_usage, -+ enum bch_data_type data_type, -+ s64 sectors) -+{ -+ switch (data_type) { -+ case BCH_DATA_btree: -+ fs_usage->btree += sectors; -+ break; -+ case BCH_DATA_user: -+ case BCH_DATA_parity: -+ fs_usage->data += sectors; -+ break; -+ case BCH_DATA_cached: -+ fs_usage->cached += sectors; -+ break; -+ default: -+ break; -+ } -+} -+ -+void bch2_fs_usage_initialize(struct bch_fs *c) -+{ -+ struct bch_fs_usage *usage; -+ struct bch_dev *ca; -+ unsigned i; -+ -+ percpu_down_write(&c->mark_lock); -+ usage = c->usage_base; -+ -+ for (i = 0; i < ARRAY_SIZE(c->usage); i++) -+ bch2_fs_usage_acc_to_base(c, i); -+ -+ for (i = 0; i < BCH_REPLICAS_MAX; i++) -+ usage->reserved += usage->persistent_reserved[i]; -+ -+ for (i = 0; i < c->replicas.nr; i++) { -+ struct bch_replicas_entry *e = -+ cpu_replicas_entry(&c->replicas, i); -+ -+ fs_usage_data_type_to_base(usage, e->data_type, usage->replicas[i]); -+ } -+ -+ for_each_member_device(ca, c, i) { -+ struct bch_dev_usage dev = bch2_dev_usage_read(ca); -+ -+ usage->hidden += (dev.d[BCH_DATA_sb].buckets + -+ dev.d[BCH_DATA_journal].buckets) * -+ ca->mi.bucket_size; -+ } -+ -+ percpu_up_write(&c->mark_lock); -+} -+ -+static inline struct bch_dev_usage *dev_usage_ptr(struct bch_dev *ca, -+ unsigned journal_seq, -+ bool gc) -+{ -+ BUG_ON(!gc && !journal_seq); -+ -+ return this_cpu_ptr(gc -+ ? ca->usage_gc -+ : ca->usage[journal_seq & JOURNAL_BUF_MASK]); -+} -+ -+void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage) -+{ -+ struct bch_fs *c = ca->fs; -+ unsigned seq, i, u64s = dev_usage_u64s(); -+ -+ do { -+ seq = read_seqcount_begin(&c->usage_lock); -+ memcpy(usage, ca->usage_base, u64s * sizeof(u64)); -+ for (i = 0; i < ARRAY_SIZE(ca->usage); i++) -+ acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage[i], u64s); -+ } while (read_seqcount_retry(&c->usage_lock, seq)); -+} -+ -+u64 bch2_fs_usage_read_one(struct bch_fs *c, u64 *v) -+{ -+ ssize_t offset = v - (u64 *) c->usage_base; -+ unsigned i, seq; -+ u64 ret; -+ -+ BUG_ON(offset < 0 || offset >= fs_usage_u64s(c)); -+ percpu_rwsem_assert_held(&c->mark_lock); -+ -+ do { -+ seq = read_seqcount_begin(&c->usage_lock); -+ ret = *v; -+ -+ for (i = 0; i < ARRAY_SIZE(c->usage); i++) -+ ret += percpu_u64_get((u64 __percpu *) c->usage[i] + offset); -+ } while (read_seqcount_retry(&c->usage_lock, seq)); -+ -+ return ret; -+} -+ -+struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *c) -+{ -+ struct bch_fs_usage_online *ret; -+ unsigned nr_replicas = READ_ONCE(c->replicas.nr); -+ unsigned seq, i; -+retry: -+ ret = kmalloc(__fs_usage_online_u64s(nr_replicas) * sizeof(u64), GFP_KERNEL); -+ if (unlikely(!ret)) -+ return NULL; -+ -+ percpu_down_read(&c->mark_lock); -+ -+ if (nr_replicas != c->replicas.nr) { -+ nr_replicas = c->replicas.nr; -+ percpu_up_read(&c->mark_lock); -+ kfree(ret); -+ goto retry; -+ } -+ -+ ret->online_reserved = percpu_u64_get(c->online_reserved); -+ -+ do { -+ seq = read_seqcount_begin(&c->usage_lock); -+ unsafe_memcpy(&ret->u, c->usage_base, -+ __fs_usage_u64s(nr_replicas) * sizeof(u64), -+ "embedded variable length struct"); -+ for (i = 0; i < ARRAY_SIZE(c->usage); i++) -+ acc_u64s_percpu((u64 *) &ret->u, (u64 __percpu *) c->usage[i], -+ __fs_usage_u64s(nr_replicas)); -+ } while (read_seqcount_retry(&c->usage_lock, seq)); -+ -+ return ret; -+} -+ -+void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx) -+{ -+ struct bch_dev *ca; -+ unsigned i, u64s = fs_usage_u64s(c); -+ -+ BUG_ON(idx >= ARRAY_SIZE(c->usage)); -+ -+ preempt_disable(); -+ write_seqcount_begin(&c->usage_lock); -+ -+ acc_u64s_percpu((u64 *) c->usage_base, -+ (u64 __percpu *) c->usage[idx], u64s); -+ percpu_memset(c->usage[idx], 0, u64s * sizeof(u64)); -+ -+ rcu_read_lock(); -+ for_each_member_device_rcu(ca, c, i, NULL) { -+ u64s = dev_usage_u64s(); -+ -+ acc_u64s_percpu((u64 *) ca->usage_base, -+ (u64 __percpu *) ca->usage[idx], u64s); -+ percpu_memset(ca->usage[idx], 0, u64s * sizeof(u64)); -+ } -+ rcu_read_unlock(); -+ -+ write_seqcount_end(&c->usage_lock); -+ preempt_enable(); -+} -+ -+void bch2_fs_usage_to_text(struct printbuf *out, -+ struct bch_fs *c, -+ struct bch_fs_usage_online *fs_usage) -+{ -+ unsigned i; -+ -+ prt_printf(out, "capacity:\t\t\t%llu\n", c->capacity); -+ -+ prt_printf(out, "hidden:\t\t\t\t%llu\n", -+ fs_usage->u.hidden); -+ prt_printf(out, "data:\t\t\t\t%llu\n", -+ fs_usage->u.data); -+ prt_printf(out, "cached:\t\t\t\t%llu\n", -+ fs_usage->u.cached); -+ prt_printf(out, "reserved:\t\t\t%llu\n", -+ fs_usage->u.reserved); -+ prt_printf(out, "nr_inodes:\t\t\t%llu\n", -+ fs_usage->u.nr_inodes); -+ prt_printf(out, "online reserved:\t\t%llu\n", -+ fs_usage->online_reserved); -+ -+ for (i = 0; -+ i < ARRAY_SIZE(fs_usage->u.persistent_reserved); -+ i++) { -+ prt_printf(out, "%u replicas:\n", i + 1); -+ prt_printf(out, "\treserved:\t\t%llu\n", -+ fs_usage->u.persistent_reserved[i]); -+ } -+ -+ for (i = 0; i < c->replicas.nr; i++) { -+ struct bch_replicas_entry *e = -+ cpu_replicas_entry(&c->replicas, i); -+ -+ prt_printf(out, "\t"); -+ bch2_replicas_entry_to_text(out, e); -+ prt_printf(out, ":\t%llu\n", fs_usage->u.replicas[i]); -+ } -+} -+ -+static u64 reserve_factor(u64 r) -+{ -+ return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR); -+} -+ -+u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage_online *fs_usage) -+{ -+ return min(fs_usage->u.hidden + -+ fs_usage->u.btree + -+ fs_usage->u.data + -+ reserve_factor(fs_usage->u.reserved + -+ fs_usage->online_reserved), -+ c->capacity); -+} -+ -+static struct bch_fs_usage_short -+__bch2_fs_usage_read_short(struct bch_fs *c) -+{ -+ struct bch_fs_usage_short ret; -+ u64 data, reserved; -+ -+ ret.capacity = c->capacity - -+ bch2_fs_usage_read_one(c, &c->usage_base->hidden); -+ -+ data = bch2_fs_usage_read_one(c, &c->usage_base->data) + -+ bch2_fs_usage_read_one(c, &c->usage_base->btree); -+ reserved = bch2_fs_usage_read_one(c, &c->usage_base->reserved) + -+ percpu_u64_get(c->online_reserved); -+ -+ ret.used = min(ret.capacity, data + reserve_factor(reserved)); -+ ret.free = ret.capacity - ret.used; -+ -+ ret.nr_inodes = bch2_fs_usage_read_one(c, &c->usage_base->nr_inodes); -+ -+ return ret; -+} -+ -+struct bch_fs_usage_short -+bch2_fs_usage_read_short(struct bch_fs *c) -+{ -+ struct bch_fs_usage_short ret; -+ -+ percpu_down_read(&c->mark_lock); -+ ret = __bch2_fs_usage_read_short(c); -+ percpu_up_read(&c->mark_lock); -+ -+ return ret; -+} -+ -+void bch2_dev_usage_init(struct bch_dev *ca) -+{ -+ ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket; -+} -+ -+static inline int bucket_sectors_fragmented(struct bch_dev *ca, -+ struct bch_alloc_v4 a) -+{ -+ return a.dirty_sectors -+ ? max(0, (int) ca->mi.bucket_size - (int) a.dirty_sectors) -+ : 0; -+} -+ -+static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, -+ struct bch_alloc_v4 old, -+ struct bch_alloc_v4 new, -+ u64 journal_seq, bool gc) -+{ -+ struct bch_fs_usage *fs_usage; -+ struct bch_dev_usage *u; -+ -+ preempt_disable(); -+ fs_usage = fs_usage_ptr(c, journal_seq, gc); -+ -+ if (data_type_is_hidden(old.data_type)) -+ fs_usage->hidden -= ca->mi.bucket_size; -+ if (data_type_is_hidden(new.data_type)) -+ fs_usage->hidden += ca->mi.bucket_size; -+ -+ u = dev_usage_ptr(ca, journal_seq, gc); -+ -+ u->d[old.data_type].buckets--; -+ u->d[new.data_type].buckets++; -+ -+ u->buckets_ec -= (int) !!old.stripe; -+ u->buckets_ec += (int) !!new.stripe; -+ -+ u->d[old.data_type].sectors -= old.dirty_sectors; -+ u->d[new.data_type].sectors += new.dirty_sectors; -+ -+ u->d[BCH_DATA_cached].sectors += new.cached_sectors; -+ u->d[BCH_DATA_cached].sectors -= old.cached_sectors; -+ -+ u->d[old.data_type].fragmented -= bucket_sectors_fragmented(ca, old); -+ u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new); -+ -+ preempt_enable(); -+} -+ -+static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca, -+ struct bucket old, struct bucket new, -+ u64 journal_seq, bool gc) -+{ -+ struct bch_alloc_v4 old_a = { -+ .gen = old.gen, -+ .data_type = old.data_type, -+ .dirty_sectors = old.dirty_sectors, -+ .cached_sectors = old.cached_sectors, -+ .stripe = old.stripe, -+ }; -+ struct bch_alloc_v4 new_a = { -+ .gen = new.gen, -+ .data_type = new.data_type, -+ .dirty_sectors = new.dirty_sectors, -+ .cached_sectors = new.cached_sectors, -+ .stripe = new.stripe, -+ }; -+ -+ bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc); -+} -+ -+static inline int __update_replicas(struct bch_fs *c, -+ struct bch_fs_usage *fs_usage, -+ struct bch_replicas_entry *r, -+ s64 sectors) -+{ -+ int idx = bch2_replicas_entry_idx(c, r); -+ -+ if (idx < 0) -+ return -1; -+ -+ fs_usage_data_type_to_base(fs_usage, r->data_type, sectors); -+ fs_usage->replicas[idx] += sectors; -+ return 0; -+} -+ -+static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k, -+ struct bch_replicas_entry *r, s64 sectors, -+ unsigned journal_seq, bool gc) -+{ -+ struct bch_fs_usage *fs_usage; -+ int idx, ret = 0; -+ struct printbuf buf = PRINTBUF; -+ -+ percpu_down_read(&c->mark_lock); -+ -+ idx = bch2_replicas_entry_idx(c, r); -+ if (idx < 0 && -+ fsck_err(c, ptr_to_missing_replicas_entry, -+ "no replicas entry\n while marking %s", -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -+ percpu_up_read(&c->mark_lock); -+ ret = bch2_mark_replicas(c, r); -+ percpu_down_read(&c->mark_lock); -+ -+ if (ret) -+ goto err; -+ idx = bch2_replicas_entry_idx(c, r); -+ } -+ if (idx < 0) { -+ ret = -1; -+ goto err; -+ } -+ -+ preempt_disable(); -+ fs_usage = fs_usage_ptr(c, journal_seq, gc); -+ fs_usage_data_type_to_base(fs_usage, r->data_type, sectors); -+ fs_usage->replicas[idx] += sectors; -+ preempt_enable(); -+err: -+fsck_err: -+ percpu_up_read(&c->mark_lock); -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static inline int update_cached_sectors(struct bch_fs *c, -+ struct bkey_s_c k, -+ unsigned dev, s64 sectors, -+ unsigned journal_seq, bool gc) -+{ -+ struct bch_replicas_padded r; -+ -+ bch2_replicas_entry_cached(&r.e, dev); -+ -+ return update_replicas(c, k, &r.e, sectors, journal_seq, gc); -+} -+ -+static int __replicas_deltas_realloc(struct btree_trans *trans, unsigned more, -+ gfp_t gfp) -+{ -+ struct replicas_delta_list *d = trans->fs_usage_deltas; -+ unsigned new_size = d ? (d->size + more) * 2 : 128; -+ unsigned alloc_size = sizeof(*d) + new_size; -+ -+ WARN_ON_ONCE(alloc_size > REPLICAS_DELTA_LIST_MAX); -+ -+ if (!d || d->used + more > d->size) { -+ d = krealloc(d, alloc_size, gfp|__GFP_ZERO); -+ -+ if (unlikely(!d)) { -+ if (alloc_size > REPLICAS_DELTA_LIST_MAX) -+ return -ENOMEM; -+ -+ d = mempool_alloc(&trans->c->replicas_delta_pool, gfp); -+ if (!d) -+ return -ENOMEM; -+ -+ memset(d, 0, REPLICAS_DELTA_LIST_MAX); -+ -+ if (trans->fs_usage_deltas) -+ memcpy(d, trans->fs_usage_deltas, -+ trans->fs_usage_deltas->size + sizeof(*d)); -+ -+ new_size = REPLICAS_DELTA_LIST_MAX - sizeof(*d); -+ kfree(trans->fs_usage_deltas); -+ } -+ -+ d->size = new_size; -+ trans->fs_usage_deltas = d; -+ } -+ -+ return 0; -+} -+ -+int bch2_replicas_deltas_realloc(struct btree_trans *trans, unsigned more) -+{ -+ return allocate_dropping_locks_errcode(trans, -+ __replicas_deltas_realloc(trans, more, _gfp)); -+} -+ -+static inline int update_replicas_list(struct btree_trans *trans, -+ struct bch_replicas_entry *r, -+ s64 sectors) -+{ -+ struct replicas_delta_list *d; -+ struct replicas_delta *n; -+ unsigned b; -+ int ret; -+ -+ if (!sectors) -+ return 0; -+ -+ b = replicas_entry_bytes(r) + 8; -+ ret = bch2_replicas_deltas_realloc(trans, b); -+ if (ret) -+ return ret; -+ -+ d = trans->fs_usage_deltas; -+ n = (void *) d->d + d->used; -+ n->delta = sectors; -+ unsafe_memcpy((void *) n + offsetof(struct replicas_delta, r), -+ r, replicas_entry_bytes(r), -+ "flexible array member embedded in strcuct with padding"); -+ bch2_replicas_entry_sort(&n->r); -+ d->used += b; -+ return 0; -+} -+ -+static inline int update_cached_sectors_list(struct btree_trans *trans, -+ unsigned dev, s64 sectors) -+{ -+ struct bch_replicas_padded r; -+ -+ bch2_replicas_entry_cached(&r.e, dev); -+ -+ return update_replicas_list(trans, &r.e, sectors); -+} -+ -+int bch2_mark_alloc(struct btree_trans *trans, -+ enum btree_id btree, unsigned level, -+ struct bkey_s_c old, struct bkey_s_c new, -+ unsigned flags) -+{ -+ bool gc = flags & BTREE_TRIGGER_GC; -+ u64 journal_seq = trans->journal_res.seq; -+ u64 bucket_journal_seq; -+ struct bch_fs *c = trans->c; -+ struct bch_alloc_v4 old_a_convert, new_a_convert; -+ const struct bch_alloc_v4 *old_a, *new_a; -+ struct bch_dev *ca; -+ int ret = 0; -+ -+ /* -+ * alloc btree is read in by bch2_alloc_read, not gc: -+ */ -+ if ((flags & BTREE_TRIGGER_GC) && -+ !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) -+ return 0; -+ -+ if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans, -+ "alloc key for invalid device or bucket")) -+ return -EIO; -+ -+ ca = bch_dev_bkey_exists(c, new.k->p.inode); -+ -+ old_a = bch2_alloc_to_v4(old, &old_a_convert); -+ new_a = bch2_alloc_to_v4(new, &new_a_convert); -+ -+ bucket_journal_seq = new_a->journal_seq; -+ -+ if ((flags & BTREE_TRIGGER_INSERT) && -+ data_type_is_empty(old_a->data_type) != -+ data_type_is_empty(new_a->data_type) && -+ new.k->type == KEY_TYPE_alloc_v4) { -+ struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v; -+ -+ EBUG_ON(!journal_seq); -+ -+ /* -+ * If the btree updates referring to a bucket weren't flushed -+ * before the bucket became empty again, then the we don't have -+ * to wait on a journal flush before we can reuse the bucket: -+ */ -+ v->journal_seq = bucket_journal_seq = -+ data_type_is_empty(new_a->data_type) && -+ (journal_seq == v->journal_seq || -+ bch2_journal_noflush_seq(&c->journal, v->journal_seq)) -+ ? 0 : journal_seq; -+ } -+ -+ if (!data_type_is_empty(old_a->data_type) && -+ data_type_is_empty(new_a->data_type) && -+ bucket_journal_seq) { -+ ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, -+ c->journal.flushed_seq_ondisk, -+ new.k->p.inode, new.k->p.offset, -+ bucket_journal_seq); -+ if (ret) { -+ bch2_fs_fatal_error(c, -+ "error setting bucket_needs_journal_commit: %i", ret); -+ return ret; -+ } -+ } -+ -+ percpu_down_read(&c->mark_lock); -+ if (!gc && new_a->gen != old_a->gen) -+ *bucket_gen(ca, new.k->p.offset) = new_a->gen; -+ -+ bch2_dev_usage_update(c, ca, *old_a, *new_a, journal_seq, gc); -+ -+ if (gc) { -+ struct bucket *g = gc_bucket(ca, new.k->p.offset); -+ -+ bucket_lock(g); -+ -+ g->gen_valid = 1; -+ g->gen = new_a->gen; -+ g->data_type = new_a->data_type; -+ g->stripe = new_a->stripe; -+ g->stripe_redundancy = new_a->stripe_redundancy; -+ g->dirty_sectors = new_a->dirty_sectors; -+ g->cached_sectors = new_a->cached_sectors; -+ -+ bucket_unlock(g); -+ } -+ percpu_up_read(&c->mark_lock); -+ -+ /* -+ * need to know if we're getting called from the invalidate path or -+ * not: -+ */ -+ -+ if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) && -+ old_a->cached_sectors) { -+ ret = update_cached_sectors(c, new, ca->dev_idx, -+ -((s64) old_a->cached_sectors), -+ journal_seq, gc); -+ if (ret) { -+ bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors", -+ __func__); -+ return ret; -+ } -+ } -+ -+ if (new_a->data_type == BCH_DATA_free && -+ (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk)) -+ closure_wake_up(&c->freelist_wait); -+ -+ if (new_a->data_type == BCH_DATA_need_discard && -+ (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk)) -+ bch2_do_discards(c); -+ -+ if (old_a->data_type != BCH_DATA_cached && -+ new_a->data_type == BCH_DATA_cached && -+ should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) -+ bch2_do_invalidates(c); -+ -+ if (new_a->data_type == BCH_DATA_need_gc_gens) -+ bch2_do_gc_gens(c); -+ -+ return 0; -+} -+ -+int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, -+ size_t b, enum bch_data_type data_type, -+ unsigned sectors, struct gc_pos pos, -+ unsigned flags) -+{ -+ struct bucket old, new, *g; -+ int ret = 0; -+ -+ BUG_ON(!(flags & BTREE_TRIGGER_GC)); -+ BUG_ON(data_type != BCH_DATA_sb && -+ data_type != BCH_DATA_journal); -+ -+ /* -+ * Backup superblock might be past the end of our normal usable space: -+ */ -+ if (b >= ca->mi.nbuckets) -+ return 0; -+ -+ percpu_down_read(&c->mark_lock); -+ g = gc_bucket(ca, b); -+ -+ bucket_lock(g); -+ old = *g; -+ -+ if (bch2_fs_inconsistent_on(g->data_type && -+ g->data_type != data_type, c, -+ "different types of data in same bucket: %s, %s", -+ bch2_data_types[g->data_type], -+ bch2_data_types[data_type])) { -+ ret = -EIO; -+ goto err; -+ } -+ -+ if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c, -+ "bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > bucket size", -+ ca->dev_idx, b, g->gen, -+ bch2_data_types[g->data_type ?: data_type], -+ g->dirty_sectors, sectors)) { -+ ret = -EIO; -+ goto err; -+ } -+ -+ -+ g->data_type = data_type; -+ g->dirty_sectors += sectors; -+ new = *g; -+err: -+ bucket_unlock(g); -+ if (!ret) -+ bch2_dev_usage_update_m(c, ca, old, new, 0, true); -+ percpu_up_read(&c->mark_lock); -+ return ret; -+} -+ -+static int check_bucket_ref(struct btree_trans *trans, -+ struct bkey_s_c k, -+ const struct bch_extent_ptr *ptr, -+ s64 sectors, enum bch_data_type ptr_data_type, -+ u8 b_gen, u8 bucket_data_type, -+ u32 dirty_sectors, u32 cached_sectors) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); -+ size_t bucket_nr = PTR_BUCKET_NR(ca, ptr); -+ u32 bucket_sectors = !ptr->cached -+ ? dirty_sectors -+ : cached_sectors; -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ if (bucket_data_type == BCH_DATA_cached) -+ bucket_data_type = BCH_DATA_user; -+ -+ if ((bucket_data_type == BCH_DATA_stripe && ptr_data_type == BCH_DATA_user) || -+ (bucket_data_type == BCH_DATA_user && ptr_data_type == BCH_DATA_stripe)) -+ bucket_data_type = ptr_data_type = BCH_DATA_stripe; -+ -+ if (gen_after(ptr->gen, b_gen)) { -+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -+ BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen, -+ "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" -+ "while marking %s", -+ ptr->dev, bucket_nr, b_gen, -+ bch2_data_types[bucket_data_type ?: ptr_data_type], -+ ptr->gen, -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); -+ ret = -EIO; -+ goto err; -+ } -+ -+ if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { -+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -+ BCH_FSCK_ERR_ptr_too_stale, -+ "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" -+ "while marking %s", -+ ptr->dev, bucket_nr, b_gen, -+ bch2_data_types[bucket_data_type ?: ptr_data_type], -+ ptr->gen, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, k), buf.buf)); -+ ret = -EIO; -+ goto err; -+ } -+ -+ if (b_gen != ptr->gen && !ptr->cached) { -+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -+ BCH_FSCK_ERR_stale_dirty_ptr, -+ "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n" -+ "while marking %s", -+ ptr->dev, bucket_nr, b_gen, -+ *bucket_gen(ca, bucket_nr), -+ bch2_data_types[bucket_data_type ?: ptr_data_type], -+ ptr->gen, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, k), buf.buf)); -+ ret = -EIO; -+ goto err; -+ } -+ -+ if (b_gen != ptr->gen) { -+ ret = 1; -+ goto out; -+ } -+ -+ if (!data_type_is_empty(bucket_data_type) && -+ ptr_data_type && -+ bucket_data_type != ptr_data_type) { -+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -+ BCH_FSCK_ERR_ptr_bucket_data_type_mismatch, -+ "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" -+ "while marking %s", -+ ptr->dev, bucket_nr, b_gen, -+ bch2_data_types[bucket_data_type], -+ bch2_data_types[ptr_data_type], -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, k), buf.buf)); -+ ret = -EIO; -+ goto err; -+ } -+ -+ if ((u64) bucket_sectors + sectors > U32_MAX) { -+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -+ BCH_FSCK_ERR_bucket_sector_count_overflow, -+ "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n" -+ "while marking %s", -+ ptr->dev, bucket_nr, b_gen, -+ bch2_data_types[bucket_data_type ?: ptr_data_type], -+ bucket_sectors, sectors, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, k), buf.buf)); -+ ret = -EIO; -+ goto err; -+ } -+out: -+ printbuf_exit(&buf); -+ return ret; -+err: -+ bch2_dump_trans_updates(trans); -+ goto out; -+} -+ -+static int mark_stripe_bucket(struct btree_trans *trans, -+ struct bkey_s_c k, -+ unsigned ptr_idx, -+ unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ u64 journal_seq = trans->journal_res.seq; -+ const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; -+ unsigned nr_data = s->nr_blocks - s->nr_redundant; -+ bool parity = ptr_idx >= nr_data; -+ enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe; -+ s64 sectors = parity ? le16_to_cpu(s->sectors) : 0; -+ const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); -+ struct bucket old, new, *g; -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ BUG_ON(!(flags & BTREE_TRIGGER_GC)); -+ -+ /* * XXX doesn't handle deletion */ -+ -+ percpu_down_read(&c->mark_lock); -+ g = PTR_GC_BUCKET(ca, ptr); -+ -+ if (g->dirty_sectors || -+ (g->stripe && g->stripe != k.k->p.offset)) { -+ bch2_fs_inconsistent(c, -+ "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", -+ ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen, -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ bucket_lock(g); -+ old = *g; -+ -+ ret = check_bucket_ref(trans, k, ptr, sectors, data_type, -+ g->gen, g->data_type, -+ g->dirty_sectors, g->cached_sectors); -+ if (ret) -+ goto err; -+ -+ g->data_type = data_type; -+ g->dirty_sectors += sectors; -+ -+ g->stripe = k.k->p.offset; -+ g->stripe_redundancy = s->nr_redundant; -+ new = *g; -+err: -+ bucket_unlock(g); -+ if (!ret) -+ bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true); -+ percpu_up_read(&c->mark_lock); -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static int __mark_pointer(struct btree_trans *trans, -+ struct bkey_s_c k, -+ const struct bch_extent_ptr *ptr, -+ s64 sectors, enum bch_data_type ptr_data_type, -+ u8 bucket_gen, u8 *bucket_data_type, -+ u32 *dirty_sectors, u32 *cached_sectors) -+{ -+ u32 *dst_sectors = !ptr->cached -+ ? dirty_sectors -+ : cached_sectors; -+ int ret = check_bucket_ref(trans, k, ptr, sectors, ptr_data_type, -+ bucket_gen, *bucket_data_type, -+ *dirty_sectors, *cached_sectors); -+ -+ if (ret) -+ return ret; -+ -+ *dst_sectors += sectors; -+ *bucket_data_type = *dirty_sectors || *cached_sectors -+ ? ptr_data_type : 0; -+ return 0; -+} -+ -+static int bch2_mark_pointer(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c k, -+ struct extent_ptr_decoded p, -+ s64 sectors, -+ unsigned flags) -+{ -+ u64 journal_seq = trans->journal_res.seq; -+ struct bch_fs *c = trans->c; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); -+ struct bucket old, new, *g; -+ enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p); -+ u8 bucket_data_type; -+ int ret = 0; -+ -+ BUG_ON(!(flags & BTREE_TRIGGER_GC)); -+ -+ percpu_down_read(&c->mark_lock); -+ g = PTR_GC_BUCKET(ca, &p.ptr); -+ bucket_lock(g); -+ old = *g; -+ -+ bucket_data_type = g->data_type; -+ ret = __mark_pointer(trans, k, &p.ptr, sectors, -+ data_type, g->gen, -+ &bucket_data_type, -+ &g->dirty_sectors, -+ &g->cached_sectors); -+ if (!ret) -+ g->data_type = bucket_data_type; -+ -+ new = *g; -+ bucket_unlock(g); -+ if (!ret) -+ bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true); -+ percpu_up_read(&c->mark_lock); -+ -+ return ret; -+} -+ -+static int bch2_mark_stripe_ptr(struct btree_trans *trans, -+ struct bkey_s_c k, -+ struct bch_extent_stripe_ptr p, -+ enum bch_data_type data_type, -+ s64 sectors, -+ unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_replicas_padded r; -+ struct gc_stripe *m; -+ -+ BUG_ON(!(flags & BTREE_TRIGGER_GC)); -+ -+ m = genradix_ptr_alloc(&c->gc_stripes, p.idx, GFP_KERNEL); -+ if (!m) { -+ bch_err(c, "error allocating memory for gc_stripes, idx %llu", -+ (u64) p.idx); -+ return -BCH_ERR_ENOMEM_mark_stripe_ptr; -+ } -+ -+ mutex_lock(&c->ec_stripes_heap_lock); -+ -+ if (!m || !m->alive) { -+ mutex_unlock(&c->ec_stripes_heap_lock); -+ bch_err_ratelimited(c, "pointer to nonexistent stripe %llu", -+ (u64) p.idx); -+ bch2_inconsistent_error(c); -+ return -EIO; -+ } -+ -+ m->block_sectors[p.block] += sectors; -+ -+ r = m->r; -+ mutex_unlock(&c->ec_stripes_heap_lock); -+ -+ r.e.data_type = data_type; -+ update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true); -+ -+ return 0; -+} -+ -+static int __mark_extent(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c k, unsigned flags) -+{ -+ u64 journal_seq = trans->journal_res.seq; -+ struct bch_fs *c = trans->c; -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ struct bch_replicas_padded r; -+ enum bch_data_type data_type = bkey_is_btree_ptr(k.k) -+ ? BCH_DATA_btree -+ : BCH_DATA_user; -+ s64 sectors = bkey_is_btree_ptr(k.k) -+ ? btree_sectors(c) -+ : k.k->size; -+ s64 dirty_sectors = 0; -+ bool stale; -+ int ret; -+ -+ BUG_ON(!(flags & BTREE_TRIGGER_GC)); -+ -+ r.e.data_type = data_type; -+ r.e.nr_devs = 0; -+ r.e.nr_required = 1; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ s64 disk_sectors = ptr_disk_sectors(sectors, p); -+ -+ if (flags & BTREE_TRIGGER_OVERWRITE) -+ disk_sectors = -disk_sectors; -+ -+ ret = bch2_mark_pointer(trans, btree_id, level, k, p, disk_sectors, flags); -+ if (ret < 0) -+ return ret; -+ -+ stale = ret > 0; -+ -+ if (p.ptr.cached) { -+ if (!stale) { -+ ret = update_cached_sectors(c, k, p.ptr.dev, -+ disk_sectors, journal_seq, true); -+ if (ret) { -+ bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors", -+ __func__); -+ return ret; -+ } -+ } -+ } else if (!p.has_ec) { -+ dirty_sectors += disk_sectors; -+ r.e.devs[r.e.nr_devs++] = p.ptr.dev; -+ } else { -+ ret = bch2_mark_stripe_ptr(trans, k, p.ec, data_type, -+ disk_sectors, flags); -+ if (ret) -+ return ret; -+ -+ /* -+ * There may be other dirty pointers in this extent, but -+ * if so they're not required for mounting if we have an -+ * erasure coded pointer in this extent: -+ */ -+ r.e.nr_required = 0; -+ } -+ } -+ -+ if (r.e.nr_devs) { -+ ret = update_replicas(c, k, &r.e, dirty_sectors, journal_seq, true); -+ if (ret) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, k); -+ bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf); -+ printbuf_exit(&buf); -+ return ret; -+ } -+ } -+ -+ return 0; -+} -+ -+int bch2_mark_extent(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, struct bkey_s_c new, -+ unsigned flags) -+{ -+ return mem_trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags); -+} -+ -+int bch2_mark_stripe(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, struct bkey_s_c new, -+ unsigned flags) -+{ -+ bool gc = flags & BTREE_TRIGGER_GC; -+ u64 journal_seq = trans->journal_res.seq; -+ struct bch_fs *c = trans->c; -+ u64 idx = new.k->p.offset; -+ const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe -+ ? bkey_s_c_to_stripe(old).v : NULL; -+ const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe -+ ? bkey_s_c_to_stripe(new).v : NULL; -+ unsigned i; -+ int ret; -+ -+ BUG_ON(gc && old_s); -+ -+ if (!gc) { -+ struct stripe *m = genradix_ptr(&c->stripes, idx); -+ -+ if (!m) { -+ struct printbuf buf1 = PRINTBUF; -+ struct printbuf buf2 = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf1, c, old); -+ bch2_bkey_val_to_text(&buf2, c, new); -+ bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n" -+ "old %s\n" -+ "new %s", idx, buf1.buf, buf2.buf); -+ printbuf_exit(&buf2); -+ printbuf_exit(&buf1); -+ bch2_inconsistent_error(c); -+ return -1; -+ } -+ -+ if (!new_s) { -+ bch2_stripes_heap_del(c, m, idx); -+ -+ memset(m, 0, sizeof(*m)); -+ } else { -+ m->sectors = le16_to_cpu(new_s->sectors); -+ m->algorithm = new_s->algorithm; -+ m->nr_blocks = new_s->nr_blocks; -+ m->nr_redundant = new_s->nr_redundant; -+ m->blocks_nonempty = 0; -+ -+ for (i = 0; i < new_s->nr_blocks; i++) -+ m->blocks_nonempty += !!stripe_blockcount_get(new_s, i); -+ -+ if (!old_s) -+ bch2_stripes_heap_insert(c, m, idx); -+ else -+ bch2_stripes_heap_update(c, m, idx); -+ } -+ } else { -+ struct gc_stripe *m = -+ genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL); -+ -+ if (!m) { -+ bch_err(c, "error allocating memory for gc_stripes, idx %llu", -+ idx); -+ return -BCH_ERR_ENOMEM_mark_stripe; -+ } -+ /* -+ * This will be wrong when we bring back runtime gc: we should -+ * be unmarking the old key and then marking the new key -+ */ -+ m->alive = true; -+ m->sectors = le16_to_cpu(new_s->sectors); -+ m->nr_blocks = new_s->nr_blocks; -+ m->nr_redundant = new_s->nr_redundant; -+ -+ for (i = 0; i < new_s->nr_blocks; i++) -+ m->ptrs[i] = new_s->ptrs[i]; -+ -+ bch2_bkey_to_replicas(&m->r.e, new); -+ -+ /* -+ * gc recalculates this field from stripe ptr -+ * references: -+ */ -+ memset(m->block_sectors, 0, sizeof(m->block_sectors)); -+ -+ for (i = 0; i < new_s->nr_blocks; i++) { -+ ret = mark_stripe_bucket(trans, new, i, flags); -+ if (ret) -+ return ret; -+ } -+ -+ ret = update_replicas(c, new, &m->r.e, -+ ((s64) m->sectors * m->nr_redundant), -+ journal_seq, gc); -+ if (ret) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, new); -+ bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf); -+ printbuf_exit(&buf); -+ return ret; -+ } -+ } -+ -+ return 0; -+} -+ -+static int __mark_reservation(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c k, unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_fs_usage *fs_usage; -+ unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; -+ s64 sectors = (s64) k.k->size; -+ -+ BUG_ON(!(flags & BTREE_TRIGGER_GC)); -+ -+ if (flags & BTREE_TRIGGER_OVERWRITE) -+ sectors = -sectors; -+ sectors *= replicas; -+ -+ percpu_down_read(&c->mark_lock); -+ preempt_disable(); -+ -+ fs_usage = fs_usage_ptr(c, trans->journal_res.seq, flags & BTREE_TRIGGER_GC); -+ replicas = clamp_t(unsigned, replicas, 1, -+ ARRAY_SIZE(fs_usage->persistent_reserved)); -+ -+ fs_usage->reserved += sectors; -+ fs_usage->persistent_reserved[replicas - 1] += sectors; -+ -+ preempt_enable(); -+ percpu_up_read(&c->mark_lock); -+ -+ return 0; -+} -+ -+int bch2_mark_reservation(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, struct bkey_s_c new, -+ unsigned flags) -+{ -+ return mem_trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags); -+} -+ -+static s64 __bch2_mark_reflink_p(struct btree_trans *trans, -+ struct bkey_s_c_reflink_p p, -+ u64 start, u64 end, -+ u64 *idx, unsigned flags, size_t r_idx) -+{ -+ struct bch_fs *c = trans->c; -+ struct reflink_gc *r; -+ int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; -+ u64 next_idx = end; -+ s64 ret = 0; -+ struct printbuf buf = PRINTBUF; -+ -+ if (r_idx >= c->reflink_gc_nr) -+ goto not_found; -+ -+ r = genradix_ptr(&c->reflink_gc_table, r_idx); -+ next_idx = min(next_idx, r->offset - r->size); -+ if (*idx < next_idx) -+ goto not_found; -+ -+ BUG_ON((s64) r->refcount + add < 0); -+ -+ r->refcount += add; -+ *idx = r->offset; -+ return 0; -+not_found: -+ if (fsck_err(c, reflink_p_to_missing_reflink_v, -+ "pointer to missing indirect extent\n" -+ " %s\n" -+ " missing range %llu-%llu", -+ (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), -+ *idx, next_idx)) { -+ struct bkey_i_error *new; -+ -+ new = bch2_trans_kmalloc(trans, sizeof(*new)); -+ ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ goto err; -+ -+ bkey_init(&new->k); -+ new->k.type = KEY_TYPE_error; -+ new->k.p = bkey_start_pos(p.k); -+ new->k.p.offset += *idx - start; -+ bch2_key_resize(&new->k, next_idx - *idx); -+ ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, -+ BTREE_TRIGGER_NORUN); -+ } -+ -+ *idx = next_idx; -+err: -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static int __mark_reflink_p(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c k, unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); -+ struct reflink_gc *ref; -+ size_t l, r, m; -+ u64 idx = le64_to_cpu(p.v->idx), start = idx; -+ u64 end = le64_to_cpu(p.v->idx) + p.k->size; -+ int ret = 0; -+ -+ BUG_ON(!(flags & BTREE_TRIGGER_GC)); -+ -+ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_reflink_p_fix) { -+ idx -= le32_to_cpu(p.v->front_pad); -+ end += le32_to_cpu(p.v->back_pad); -+ } -+ -+ l = 0; -+ r = c->reflink_gc_nr; -+ while (l < r) { -+ m = l + (r - l) / 2; -+ -+ ref = genradix_ptr(&c->reflink_gc_table, m); -+ if (ref->offset <= idx) -+ l = m + 1; -+ else -+ r = m; -+ } -+ -+ while (idx < end && !ret) -+ ret = __bch2_mark_reflink_p(trans, p, start, end, -+ &idx, flags, l++); -+ -+ return ret; -+} -+ -+int bch2_mark_reflink_p(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, struct bkey_s_c new, -+ unsigned flags) -+{ -+ return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags); -+} -+ -+void bch2_trans_fs_usage_revert(struct btree_trans *trans, -+ struct replicas_delta_list *deltas) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_fs_usage *dst; -+ struct replicas_delta *d, *top = (void *) deltas->d + deltas->used; -+ s64 added = 0; -+ unsigned i; -+ -+ percpu_down_read(&c->mark_lock); -+ preempt_disable(); -+ dst = fs_usage_ptr(c, trans->journal_res.seq, false); -+ -+ /* revert changes: */ -+ for (d = deltas->d; d != top; d = replicas_delta_next(d)) { -+ switch (d->r.data_type) { -+ case BCH_DATA_btree: -+ case BCH_DATA_user: -+ case BCH_DATA_parity: -+ added += d->delta; -+ } -+ BUG_ON(__update_replicas(c, dst, &d->r, -d->delta)); -+ } -+ -+ dst->nr_inodes -= deltas->nr_inodes; -+ -+ for (i = 0; i < BCH_REPLICAS_MAX; i++) { -+ added -= deltas->persistent_reserved[i]; -+ dst->reserved -= deltas->persistent_reserved[i]; -+ dst->persistent_reserved[i] -= deltas->persistent_reserved[i]; -+ } -+ -+ if (added > 0) { -+ trans->disk_res->sectors += added; -+ this_cpu_add(*c->online_reserved, added); -+ } -+ -+ preempt_enable(); -+ percpu_up_read(&c->mark_lock); -+} -+ -+int bch2_trans_fs_usage_apply(struct btree_trans *trans, -+ struct replicas_delta_list *deltas) -+{ -+ struct bch_fs *c = trans->c; -+ static int warned_disk_usage = 0; -+ bool warn = false; -+ u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; -+ struct replicas_delta *d, *d2; -+ struct replicas_delta *top = (void *) deltas->d + deltas->used; -+ struct bch_fs_usage *dst; -+ s64 added = 0, should_not_have_added; -+ unsigned i; -+ -+ percpu_down_read(&c->mark_lock); -+ preempt_disable(); -+ dst = fs_usage_ptr(c, trans->journal_res.seq, false); -+ -+ for (d = deltas->d; d != top; d = replicas_delta_next(d)) { -+ switch (d->r.data_type) { -+ case BCH_DATA_btree: -+ case BCH_DATA_user: -+ case BCH_DATA_parity: -+ added += d->delta; -+ } -+ -+ if (__update_replicas(c, dst, &d->r, d->delta)) -+ goto need_mark; -+ } -+ -+ dst->nr_inodes += deltas->nr_inodes; -+ -+ for (i = 0; i < BCH_REPLICAS_MAX; i++) { -+ added += deltas->persistent_reserved[i]; -+ dst->reserved += deltas->persistent_reserved[i]; -+ dst->persistent_reserved[i] += deltas->persistent_reserved[i]; -+ } -+ -+ /* -+ * Not allowed to reduce sectors_available except by getting a -+ * reservation: -+ */ -+ should_not_have_added = added - (s64) disk_res_sectors; -+ if (unlikely(should_not_have_added > 0)) { -+ u64 old, new, v = atomic64_read(&c->sectors_available); -+ -+ do { -+ old = v; -+ new = max_t(s64, 0, old - should_not_have_added); -+ } while ((v = atomic64_cmpxchg(&c->sectors_available, -+ old, new)) != old); -+ -+ added -= should_not_have_added; -+ warn = true; -+ } -+ -+ if (added > 0) { -+ trans->disk_res->sectors -= added; -+ this_cpu_sub(*c->online_reserved, added); -+ } -+ -+ preempt_enable(); -+ percpu_up_read(&c->mark_lock); -+ -+ if (unlikely(warn) && !xchg(&warned_disk_usage, 1)) -+ bch2_trans_inconsistent(trans, -+ "disk usage increased %lli more than %llu sectors reserved)", -+ should_not_have_added, disk_res_sectors); -+ return 0; -+need_mark: -+ /* revert changes: */ -+ for (d2 = deltas->d; d2 != d; d2 = replicas_delta_next(d2)) -+ BUG_ON(__update_replicas(c, dst, &d2->r, -d2->delta)); -+ -+ preempt_enable(); -+ percpu_up_read(&c->mark_lock); -+ return -1; -+} -+ -+/* trans_mark: */ -+ -+static inline int bch2_trans_mark_pointer(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c k, struct extent_ptr_decoded p, -+ unsigned flags) -+{ -+ bool insert = !(flags & BTREE_TRIGGER_OVERWRITE); -+ struct btree_iter iter; -+ struct bkey_i_alloc_v4 *a; -+ struct bpos bucket; -+ struct bch_backpointer bp; -+ s64 sectors; -+ int ret; -+ -+ bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp); -+ sectors = bp.bucket_len; -+ if (!insert) -+ sectors = -sectors; -+ -+ a = bch2_trans_start_alloc_update(trans, &iter, bucket); -+ if (IS_ERR(a)) -+ return PTR_ERR(a); -+ -+ ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type, -+ a->v.gen, &a->v.data_type, -+ &a->v.dirty_sectors, &a->v.cached_sectors) ?: -+ bch2_trans_update(trans, &iter, &a->k_i, 0); -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (ret) -+ return ret; -+ -+ if (!p.ptr.cached) { -+ ret = bch2_bucket_backpointer_mod(trans, bucket, bp, k, insert); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, -+ struct extent_ptr_decoded p, -+ s64 sectors, enum bch_data_type data_type) -+{ -+ struct btree_iter iter; -+ struct bkey_i_stripe *s; -+ struct bch_replicas_padded r; -+ int ret = 0; -+ -+ s = bch2_bkey_get_mut_typed(trans, &iter, -+ BTREE_ID_stripes, POS(0, p.ec.idx), -+ BTREE_ITER_WITH_UPDATES, stripe); -+ ret = PTR_ERR_OR_ZERO(s); -+ if (unlikely(ret)) { -+ bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans, -+ "pointer to nonexistent stripe %llu", -+ (u64) p.ec.idx); -+ goto err; -+ } -+ -+ if (!bch2_ptr_matches_stripe(&s->v, p)) { -+ bch2_trans_inconsistent(trans, -+ "stripe pointer doesn't match stripe %llu", -+ (u64) p.ec.idx); -+ ret = -EIO; -+ goto err; -+ } -+ -+ stripe_blockcount_set(&s->v, p.ec.block, -+ stripe_blockcount_get(&s->v, p.ec.block) + -+ sectors); -+ -+ bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i)); -+ r.e.data_type = data_type; -+ ret = update_replicas_list(trans, &r.e, sectors); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int __trans_mark_extent(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c k, unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ struct bch_replicas_padded r; -+ enum bch_data_type data_type = bkey_is_btree_ptr(k.k) -+ ? BCH_DATA_btree -+ : BCH_DATA_user; -+ s64 sectors = bkey_is_btree_ptr(k.k) -+ ? btree_sectors(c) -+ : k.k->size; -+ s64 dirty_sectors = 0; -+ bool stale; -+ int ret = 0; -+ -+ r.e.data_type = data_type; -+ r.e.nr_devs = 0; -+ r.e.nr_required = 1; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ s64 disk_sectors = ptr_disk_sectors(sectors, p); -+ -+ if (flags & BTREE_TRIGGER_OVERWRITE) -+ disk_sectors = -disk_sectors; -+ -+ ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags); -+ if (ret < 0) -+ return ret; -+ -+ stale = ret > 0; -+ -+ if (p.ptr.cached) { -+ if (!stale) { -+ ret = update_cached_sectors_list(trans, p.ptr.dev, -+ disk_sectors); -+ if (ret) -+ return ret; -+ } -+ } else if (!p.has_ec) { -+ dirty_sectors += disk_sectors; -+ r.e.devs[r.e.nr_devs++] = p.ptr.dev; -+ } else { -+ ret = bch2_trans_mark_stripe_ptr(trans, p, -+ disk_sectors, data_type); -+ if (ret) -+ return ret; -+ -+ r.e.nr_required = 0; -+ } -+ } -+ -+ if (r.e.nr_devs) -+ ret = update_replicas_list(trans, &r.e, dirty_sectors); -+ -+ return ret; -+} -+ -+int bch2_trans_mark_extent(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, struct bkey_i *new, -+ unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ int mod = (int) bch2_bkey_needs_rebalance(c, bkey_i_to_s_c(new)) - -+ (int) bch2_bkey_needs_rebalance(c, old); -+ -+ if (mod) { -+ int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new->k.p, mod > 0); -+ if (ret) -+ return ret; -+ } -+ -+ return trigger_run_overwrite_then_insert(__trans_mark_extent, trans, btree_id, level, old, new, flags); -+} -+ -+static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, -+ struct bkey_s_c_stripe s, -+ unsigned idx, bool deleting) -+{ -+ struct bch_fs *c = trans->c; -+ const struct bch_extent_ptr *ptr = &s.v->ptrs[idx]; -+ struct btree_iter iter; -+ struct bkey_i_alloc_v4 *a; -+ enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant -+ ? BCH_DATA_parity : 0; -+ s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0; -+ int ret = 0; -+ -+ if (deleting) -+ sectors = -sectors; -+ -+ a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(c, ptr)); -+ if (IS_ERR(a)) -+ return PTR_ERR(a); -+ -+ ret = check_bucket_ref(trans, s.s_c, ptr, sectors, data_type, -+ a->v.gen, a->v.data_type, -+ a->v.dirty_sectors, a->v.cached_sectors); -+ if (ret) -+ goto err; -+ -+ if (!deleting) { -+ if (bch2_trans_inconsistent_on(a->v.stripe || -+ a->v.stripe_redundancy, trans, -+ "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)", -+ iter.pos.inode, iter.pos.offset, a->v.gen, -+ bch2_data_types[a->v.data_type], -+ a->v.dirty_sectors, -+ a->v.stripe, s.k->p.offset)) { -+ ret = -EIO; -+ goto err; -+ } -+ -+ if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans, -+ "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu", -+ iter.pos.inode, iter.pos.offset, a->v.gen, -+ bch2_data_types[a->v.data_type], -+ a->v.dirty_sectors, -+ s.k->p.offset)) { -+ ret = -EIO; -+ goto err; -+ } -+ -+ a->v.stripe = s.k->p.offset; -+ a->v.stripe_redundancy = s.v->nr_redundant; -+ a->v.data_type = BCH_DATA_stripe; -+ } else { -+ if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset || -+ a->v.stripe_redundancy != s.v->nr_redundant, trans, -+ "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)", -+ iter.pos.inode, iter.pos.offset, a->v.gen, -+ s.k->p.offset, a->v.stripe)) { -+ ret = -EIO; -+ goto err; -+ } -+ -+ a->v.stripe = 0; -+ a->v.stripe_redundancy = 0; -+ a->v.data_type = alloc_data_type(a->v, BCH_DATA_user); -+ } -+ -+ a->v.dirty_sectors += sectors; -+ if (data_type) -+ a->v.data_type = !deleting ? data_type : 0; -+ -+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0); -+ if (ret) -+ goto err; -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int bch2_trans_mark_stripe(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, struct bkey_i *new, -+ unsigned flags) -+{ -+ const struct bch_stripe *old_s = NULL; -+ struct bch_stripe *new_s = NULL; -+ struct bch_replicas_padded r; -+ unsigned i, nr_blocks; -+ int ret = 0; -+ -+ if (old.k->type == KEY_TYPE_stripe) -+ old_s = bkey_s_c_to_stripe(old).v; -+ if (new->k.type == KEY_TYPE_stripe) -+ new_s = &bkey_i_to_stripe(new)->v; -+ -+ /* -+ * If the pointers aren't changing, we don't need to do anything: -+ */ -+ if (new_s && old_s && -+ new_s->nr_blocks == old_s->nr_blocks && -+ new_s->nr_redundant == old_s->nr_redundant && -+ !memcmp(old_s->ptrs, new_s->ptrs, -+ new_s->nr_blocks * sizeof(struct bch_extent_ptr))) -+ return 0; -+ -+ BUG_ON(new_s && old_s && -+ (new_s->nr_blocks != old_s->nr_blocks || -+ new_s->nr_redundant != old_s->nr_redundant)); -+ -+ nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks; -+ -+ if (new_s) { -+ s64 sectors = le16_to_cpu(new_s->sectors); -+ -+ bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(new)); -+ ret = update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); -+ if (ret) -+ return ret; -+ } -+ -+ if (old_s) { -+ s64 sectors = -((s64) le16_to_cpu(old_s->sectors)); -+ -+ bch2_bkey_to_replicas(&r.e, old); -+ ret = update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant); -+ if (ret) -+ return ret; -+ } -+ -+ for (i = 0; i < nr_blocks; i++) { -+ if (new_s && old_s && -+ !memcmp(&new_s->ptrs[i], -+ &old_s->ptrs[i], -+ sizeof(new_s->ptrs[i]))) -+ continue; -+ -+ if (new_s) { -+ ret = bch2_trans_mark_stripe_bucket(trans, -+ bkey_i_to_s_c_stripe(new), i, false); -+ if (ret) -+ break; -+ } -+ -+ if (old_s) { -+ ret = bch2_trans_mark_stripe_bucket(trans, -+ bkey_s_c_to_stripe(old), i, true); -+ if (ret) -+ break; -+ } -+ } -+ -+ return ret; -+} -+ -+static int __trans_mark_reservation(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c k, unsigned flags) -+{ -+ unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; -+ s64 sectors = (s64) k.k->size; -+ struct replicas_delta_list *d; -+ int ret; -+ -+ if (flags & BTREE_TRIGGER_OVERWRITE) -+ sectors = -sectors; -+ sectors *= replicas; -+ -+ ret = bch2_replicas_deltas_realloc(trans, 0); -+ if (ret) -+ return ret; -+ -+ d = trans->fs_usage_deltas; -+ replicas = clamp_t(unsigned, replicas, 1, -+ ARRAY_SIZE(d->persistent_reserved)); -+ -+ d->persistent_reserved[replicas - 1] += sectors; -+ return 0; -+} -+ -+int bch2_trans_mark_reservation(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, -+ struct bkey_i *new, -+ unsigned flags) -+{ -+ return trigger_run_overwrite_then_insert(__trans_mark_reservation, trans, btree_id, level, old, new, flags); -+} -+ -+static int trans_mark_reflink_p_segment(struct btree_trans *trans, -+ struct bkey_s_c_reflink_p p, -+ u64 *idx, unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_i *k; -+ __le64 *refcount; -+ int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; -+ struct printbuf buf = PRINTBUF; -+ int ret; -+ -+ k = bch2_bkey_get_mut_noupdate(trans, &iter, -+ BTREE_ID_reflink, POS(0, *idx), -+ BTREE_ITER_WITH_UPDATES); -+ ret = PTR_ERR_OR_ZERO(k); -+ if (ret) -+ goto err; -+ -+ refcount = bkey_refcount(k); -+ if (!refcount) { -+ bch2_bkey_val_to_text(&buf, c, p.s_c); -+ bch2_trans_inconsistent(trans, -+ "nonexistent indirect extent at %llu while marking\n %s", -+ *idx, buf.buf); -+ ret = -EIO; -+ goto err; -+ } -+ -+ if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) { -+ bch2_bkey_val_to_text(&buf, c, p.s_c); -+ bch2_trans_inconsistent(trans, -+ "indirect extent refcount underflow at %llu while marking\n %s", -+ *idx, buf.buf); -+ ret = -EIO; -+ goto err; -+ } -+ -+ if (flags & BTREE_TRIGGER_INSERT) { -+ struct bch_reflink_p *v = (struct bch_reflink_p *) p.v; -+ u64 pad; -+ -+ pad = max_t(s64, le32_to_cpu(v->front_pad), -+ le64_to_cpu(v->idx) - bkey_start_offset(&k->k)); -+ BUG_ON(pad > U32_MAX); -+ v->front_pad = cpu_to_le32(pad); -+ -+ pad = max_t(s64, le32_to_cpu(v->back_pad), -+ k->k.p.offset - p.k->size - le64_to_cpu(v->idx)); -+ BUG_ON(pad > U32_MAX); -+ v->back_pad = cpu_to_le32(pad); -+ } -+ -+ le64_add_cpu(refcount, add); -+ -+ bch2_btree_iter_set_pos_to_extent_start(&iter); -+ ret = bch2_trans_update(trans, &iter, k, 0); -+ if (ret) -+ goto err; -+ -+ *idx = k->k.p.offset; -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static int __trans_mark_reflink_p(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c k, unsigned flags) -+{ -+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); -+ u64 idx, end_idx; -+ int ret = 0; -+ -+ idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); -+ end_idx = le64_to_cpu(p.v->idx) + p.k->size + -+ le32_to_cpu(p.v->back_pad); -+ -+ while (idx < end_idx && !ret) -+ ret = trans_mark_reflink_p_segment(trans, p, &idx, flags); -+ return ret; -+} -+ -+int bch2_trans_mark_reflink_p(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, -+ struct bkey_i *new, -+ unsigned flags) -+{ -+ if (flags & BTREE_TRIGGER_INSERT) { -+ struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v; -+ -+ v->front_pad = v->back_pad = 0; -+ } -+ -+ return trigger_run_overwrite_then_insert(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags); -+} -+ -+static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, -+ struct bch_dev *ca, size_t b, -+ enum bch_data_type type, -+ unsigned sectors) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_i_alloc_v4 *a; -+ int ret = 0; -+ -+ /* -+ * Backup superblock might be past the end of our normal usable space: -+ */ -+ if (b >= ca->mi.nbuckets) -+ return 0; -+ -+ a = bch2_trans_start_alloc_update(trans, &iter, POS(ca->dev_idx, b)); -+ if (IS_ERR(a)) -+ return PTR_ERR(a); -+ -+ if (a->v.data_type && type && a->v.data_type != type) { -+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, -+ BCH_FSCK_ERR_bucket_metadata_type_mismatch, -+ "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" -+ "while marking %s", -+ iter.pos.inode, iter.pos.offset, a->v.gen, -+ bch2_data_types[a->v.data_type], -+ bch2_data_types[type], -+ bch2_data_types[type]); -+ ret = -EIO; -+ goto err; -+ } -+ -+ if (a->v.data_type != type || -+ a->v.dirty_sectors != sectors) { -+ a->v.data_type = type; -+ a->v.dirty_sectors = sectors; -+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0); -+ } -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int bch2_trans_mark_metadata_bucket(struct btree_trans *trans, -+ struct bch_dev *ca, size_t b, -+ enum bch_data_type type, -+ unsigned sectors) -+{ -+ return commit_do(trans, NULL, NULL, 0, -+ __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors)); -+} -+ -+static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans, -+ struct bch_dev *ca, -+ u64 start, u64 end, -+ enum bch_data_type type, -+ u64 *bucket, unsigned *bucket_sectors) -+{ -+ do { -+ u64 b = sector_to_bucket(ca, start); -+ unsigned sectors = -+ min_t(u64, bucket_to_sector(ca, b + 1), end) - start; -+ -+ if (b != *bucket && *bucket_sectors) { -+ int ret = bch2_trans_mark_metadata_bucket(trans, ca, *bucket, -+ type, *bucket_sectors); -+ if (ret) -+ return ret; -+ -+ *bucket_sectors = 0; -+ } -+ -+ *bucket = b; -+ *bucket_sectors += sectors; -+ start += sectors; -+ } while (start < end); -+ -+ return 0; -+} -+ -+static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, -+ struct bch_dev *ca) -+{ -+ struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; -+ u64 bucket = 0; -+ unsigned i, bucket_sectors = 0; -+ int ret; -+ -+ for (i = 0; i < layout->nr_superblocks; i++) { -+ u64 offset = le64_to_cpu(layout->sb_offset[i]); -+ -+ if (offset == BCH_SB_SECTOR) { -+ ret = bch2_trans_mark_metadata_sectors(trans, ca, -+ 0, BCH_SB_SECTOR, -+ BCH_DATA_sb, &bucket, &bucket_sectors); -+ if (ret) -+ return ret; -+ } -+ -+ ret = bch2_trans_mark_metadata_sectors(trans, ca, offset, -+ offset + (1 << layout->sb_max_size_bits), -+ BCH_DATA_sb, &bucket, &bucket_sectors); -+ if (ret) -+ return ret; -+ } -+ -+ if (bucket_sectors) { -+ ret = bch2_trans_mark_metadata_bucket(trans, ca, -+ bucket, BCH_DATA_sb, bucket_sectors); -+ if (ret) -+ return ret; -+ } -+ -+ for (i = 0; i < ca->journal.nr; i++) { -+ ret = bch2_trans_mark_metadata_bucket(trans, ca, -+ ca->journal.buckets[i], -+ BCH_DATA_journal, ca->mi.bucket_size); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca) -+{ -+ int ret = bch2_trans_run(c, __bch2_trans_mark_dev_sb(trans, ca)); -+ -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+int bch2_trans_mark_dev_sbs(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ unsigned i; -+ -+ for_each_online_member(ca, c, i) { -+ int ret = bch2_trans_mark_dev_sb(c, ca); -+ if (ret) { -+ percpu_ref_put(&ca->ref); -+ return ret; -+ } -+ } -+ -+ return 0; -+} -+ -+/* Disk reservations: */ -+ -+#define SECTORS_CACHE 1024 -+ -+int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, -+ u64 sectors, int flags) -+{ -+ struct bch_fs_pcpu *pcpu; -+ u64 old, v, get; -+ s64 sectors_available; -+ int ret; -+ -+ percpu_down_read(&c->mark_lock); -+ preempt_disable(); -+ pcpu = this_cpu_ptr(c->pcpu); -+ -+ if (sectors <= pcpu->sectors_available) -+ goto out; -+ -+ v = atomic64_read(&c->sectors_available); -+ do { -+ old = v; -+ get = min((u64) sectors + SECTORS_CACHE, old); -+ -+ if (get < sectors) { -+ preempt_enable(); -+ goto recalculate; -+ } -+ } while ((v = atomic64_cmpxchg(&c->sectors_available, -+ old, old - get)) != old); -+ -+ pcpu->sectors_available += get; -+ -+out: -+ pcpu->sectors_available -= sectors; -+ this_cpu_add(*c->online_reserved, sectors); -+ res->sectors += sectors; -+ -+ preempt_enable(); -+ percpu_up_read(&c->mark_lock); -+ return 0; -+ -+recalculate: -+ mutex_lock(&c->sectors_available_lock); -+ -+ percpu_u64_set(&c->pcpu->sectors_available, 0); -+ sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free); -+ -+ if (sectors <= sectors_available || -+ (flags & BCH_DISK_RESERVATION_NOFAIL)) { -+ atomic64_set(&c->sectors_available, -+ max_t(s64, 0, sectors_available - sectors)); -+ this_cpu_add(*c->online_reserved, sectors); -+ res->sectors += sectors; -+ ret = 0; -+ } else { -+ atomic64_set(&c->sectors_available, sectors_available); -+ ret = -BCH_ERR_ENOSPC_disk_reservation; -+ } -+ -+ mutex_unlock(&c->sectors_available_lock); -+ percpu_up_read(&c->mark_lock); -+ -+ return ret; -+} -+ -+/* Startup/shutdown: */ -+ -+static void bucket_gens_free_rcu(struct rcu_head *rcu) -+{ -+ struct bucket_gens *buckets = -+ container_of(rcu, struct bucket_gens, rcu); -+ -+ kvpfree(buckets, sizeof(*buckets) + buckets->nbuckets); -+} -+ -+int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) -+{ -+ struct bucket_gens *bucket_gens = NULL, *old_bucket_gens = NULL; -+ unsigned long *buckets_nouse = NULL; -+ bool resize = ca->bucket_gens != NULL; -+ int ret; -+ -+ if (!(bucket_gens = kvpmalloc(sizeof(struct bucket_gens) + nbuckets, -+ GFP_KERNEL|__GFP_ZERO))) { -+ ret = -BCH_ERR_ENOMEM_bucket_gens; -+ goto err; -+ } -+ -+ if ((c->opts.buckets_nouse && -+ !(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) * -+ sizeof(unsigned long), -+ GFP_KERNEL|__GFP_ZERO)))) { -+ ret = -BCH_ERR_ENOMEM_buckets_nouse; -+ goto err; -+ } -+ -+ bucket_gens->first_bucket = ca->mi.first_bucket; -+ bucket_gens->nbuckets = nbuckets; -+ -+ bch2_copygc_stop(c); -+ -+ if (resize) { -+ down_write(&c->gc_lock); -+ down_write(&ca->bucket_lock); -+ percpu_down_write(&c->mark_lock); -+ } -+ -+ old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); -+ -+ if (resize) { -+ size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets); -+ -+ memcpy(bucket_gens->b, -+ old_bucket_gens->b, -+ n); -+ if (buckets_nouse) -+ memcpy(buckets_nouse, -+ ca->buckets_nouse, -+ BITS_TO_LONGS(n) * sizeof(unsigned long)); -+ } -+ -+ rcu_assign_pointer(ca->bucket_gens, bucket_gens); -+ bucket_gens = old_bucket_gens; -+ -+ swap(ca->buckets_nouse, buckets_nouse); -+ -+ nbuckets = ca->mi.nbuckets; -+ -+ if (resize) { -+ percpu_up_write(&c->mark_lock); -+ up_write(&ca->bucket_lock); -+ up_write(&c->gc_lock); -+ } -+ -+ ret = 0; -+err: -+ kvpfree(buckets_nouse, -+ BITS_TO_LONGS(nbuckets) * sizeof(unsigned long)); -+ if (bucket_gens) -+ call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu); -+ -+ return ret; -+} -+ -+void bch2_dev_buckets_free(struct bch_dev *ca) -+{ -+ unsigned i; -+ -+ kvpfree(ca->buckets_nouse, -+ BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); -+ kvpfree(rcu_dereference_protected(ca->bucket_gens, 1), -+ sizeof(struct bucket_gens) + ca->mi.nbuckets); -+ -+ for (i = 0; i < ARRAY_SIZE(ca->usage); i++) -+ free_percpu(ca->usage[i]); -+ kfree(ca->usage_base); -+} -+ -+int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) -+{ -+ unsigned i; -+ -+ ca->usage_base = kzalloc(sizeof(struct bch_dev_usage), GFP_KERNEL); -+ if (!ca->usage_base) -+ return -BCH_ERR_ENOMEM_usage_init; -+ -+ for (i = 0; i < ARRAY_SIZE(ca->usage); i++) { -+ ca->usage[i] = alloc_percpu(struct bch_dev_usage); -+ if (!ca->usage[i]) -+ return -BCH_ERR_ENOMEM_usage_init; -+ } -+ -+ return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets); -+} -diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h -new file mode 100644 -index 000000000000..21f6cb356921 ---- /dev/null -+++ b/fs/bcachefs/buckets.h -@@ -0,0 +1,458 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Code for manipulating bucket marks for garbage collection. -+ * -+ * Copyright 2014 Datera, Inc. -+ */ -+ -+#ifndef _BUCKETS_H -+#define _BUCKETS_H -+ -+#include "buckets_types.h" -+#include "extents.h" -+#include "sb-members.h" -+ -+static inline size_t sector_to_bucket(const struct bch_dev *ca, sector_t s) -+{ -+ return div_u64(s, ca->mi.bucket_size); -+} -+ -+static inline sector_t bucket_to_sector(const struct bch_dev *ca, size_t b) -+{ -+ return ((sector_t) b) * ca->mi.bucket_size; -+} -+ -+static inline sector_t bucket_remainder(const struct bch_dev *ca, sector_t s) -+{ -+ u32 remainder; -+ -+ div_u64_rem(s, ca->mi.bucket_size, &remainder); -+ return remainder; -+} -+ -+static inline size_t sector_to_bucket_and_offset(const struct bch_dev *ca, sector_t s, -+ u32 *offset) -+{ -+ return div_u64_rem(s, ca->mi.bucket_size, offset); -+} -+ -+#define for_each_bucket(_b, _buckets) \ -+ for (_b = (_buckets)->b + (_buckets)->first_bucket; \ -+ _b < (_buckets)->b + (_buckets)->nbuckets; _b++) -+ -+/* -+ * Ugly hack alert: -+ * -+ * We need to cram a spinlock in a single byte, because that's what we have left -+ * in struct bucket, and we care about the size of these - during fsck, we need -+ * in memory state for every single bucket on every device. -+ * -+ * We used to do -+ * while (xchg(&b->lock, 1) cpu_relax(); -+ * but, it turns out not all architectures support xchg on a single byte. -+ * -+ * So now we use bit_spin_lock(), with fun games since we can't burn a whole -+ * ulong for this - we just need to make sure the lock bit always ends up in the -+ * first byte. -+ */ -+ -+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -+#define BUCKET_LOCK_BITNR 0 -+#else -+#define BUCKET_LOCK_BITNR (BITS_PER_LONG - 1) -+#endif -+ -+union ulong_byte_assert { -+ ulong ulong; -+ u8 byte; -+}; -+ -+static inline void bucket_unlock(struct bucket *b) -+{ -+ BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte); -+ -+ clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &b->lock); -+ wake_up_bit((void *) &b->lock, BUCKET_LOCK_BITNR); -+} -+ -+static inline void bucket_lock(struct bucket *b) -+{ -+ wait_on_bit_lock((void *) &b->lock, BUCKET_LOCK_BITNR, -+ TASK_UNINTERRUPTIBLE); -+} -+ -+static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca) -+{ -+ return rcu_dereference_check(ca->buckets_gc, -+ !ca->fs || -+ percpu_rwsem_is_held(&ca->fs->mark_lock) || -+ lockdep_is_held(&ca->fs->gc_lock) || -+ lockdep_is_held(&ca->bucket_lock)); -+} -+ -+static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b) -+{ -+ struct bucket_array *buckets = gc_bucket_array(ca); -+ -+ BUG_ON(b < buckets->first_bucket || b >= buckets->nbuckets); -+ return buckets->b + b; -+} -+ -+static inline struct bucket_gens *bucket_gens(struct bch_dev *ca) -+{ -+ return rcu_dereference_check(ca->bucket_gens, -+ !ca->fs || -+ percpu_rwsem_is_held(&ca->fs->mark_lock) || -+ lockdep_is_held(&ca->fs->gc_lock) || -+ lockdep_is_held(&ca->bucket_lock)); -+} -+ -+static inline u8 *bucket_gen(struct bch_dev *ca, size_t b) -+{ -+ struct bucket_gens *gens = bucket_gens(ca); -+ -+ BUG_ON(b < gens->first_bucket || b >= gens->nbuckets); -+ return gens->b + b; -+} -+ -+static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca, -+ const struct bch_extent_ptr *ptr) -+{ -+ return sector_to_bucket(ca, ptr->offset); -+} -+ -+static inline struct bpos PTR_BUCKET_POS(const struct bch_fs *c, -+ const struct bch_extent_ptr *ptr) -+{ -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); -+ -+ return POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); -+} -+ -+static inline struct bpos PTR_BUCKET_POS_OFFSET(const struct bch_fs *c, -+ const struct bch_extent_ptr *ptr, -+ u32 *bucket_offset) -+{ -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); -+ -+ return POS(ptr->dev, sector_to_bucket_and_offset(ca, ptr->offset, bucket_offset)); -+} -+ -+static inline struct bucket *PTR_GC_BUCKET(struct bch_dev *ca, -+ const struct bch_extent_ptr *ptr) -+{ -+ return gc_bucket(ca, PTR_BUCKET_NR(ca, ptr)); -+} -+ -+static inline enum bch_data_type ptr_data_type(const struct bkey *k, -+ const struct bch_extent_ptr *ptr) -+{ -+ if (bkey_is_btree_ptr(k)) -+ return BCH_DATA_btree; -+ -+ return ptr->cached ? BCH_DATA_cached : BCH_DATA_user; -+} -+ -+static inline s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p) -+{ -+ EBUG_ON(sectors < 0); -+ -+ return crc_is_compressed(p.crc) -+ ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size, -+ p.crc.uncompressed_size) -+ : sectors; -+} -+ -+static inline int gen_cmp(u8 a, u8 b) -+{ -+ return (s8) (a - b); -+} -+ -+static inline int gen_after(u8 a, u8 b) -+{ -+ int r = gen_cmp(a, b); -+ -+ return r > 0 ? r : 0; -+} -+ -+/** -+ * ptr_stale() - check if a pointer points into a bucket that has been -+ * invalidated. -+ */ -+static inline u8 ptr_stale(struct bch_dev *ca, -+ const struct bch_extent_ptr *ptr) -+{ -+ u8 ret; -+ -+ rcu_read_lock(); -+ ret = gen_after(*bucket_gen(ca, PTR_BUCKET_NR(ca, ptr)), ptr->gen); -+ rcu_read_unlock(); -+ -+ return ret; -+} -+ -+/* Device usage: */ -+ -+void bch2_dev_usage_read_fast(struct bch_dev *, struct bch_dev_usage *); -+static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) -+{ -+ struct bch_dev_usage ret; -+ -+ bch2_dev_usage_read_fast(ca, &ret); -+ return ret; -+} -+ -+void bch2_dev_usage_init(struct bch_dev *); -+ -+static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark) -+{ -+ s64 reserved = 0; -+ -+ switch (watermark) { -+ case BCH_WATERMARK_NR: -+ BUG(); -+ case BCH_WATERMARK_stripe: -+ reserved += ca->mi.nbuckets >> 6; -+ fallthrough; -+ case BCH_WATERMARK_normal: -+ reserved += ca->mi.nbuckets >> 6; -+ fallthrough; -+ case BCH_WATERMARK_copygc: -+ reserved += ca->nr_btree_reserve; -+ fallthrough; -+ case BCH_WATERMARK_btree: -+ reserved += ca->nr_btree_reserve; -+ fallthrough; -+ case BCH_WATERMARK_btree_copygc: -+ case BCH_WATERMARK_reclaim: -+ break; -+ } -+ -+ return reserved; -+} -+ -+static inline u64 dev_buckets_free(struct bch_dev *ca, -+ struct bch_dev_usage usage, -+ enum bch_watermark watermark) -+{ -+ return max_t(s64, 0, -+ usage.d[BCH_DATA_free].buckets - -+ ca->nr_open_buckets - -+ bch2_dev_buckets_reserved(ca, watermark)); -+} -+ -+static inline u64 __dev_buckets_available(struct bch_dev *ca, -+ struct bch_dev_usage usage, -+ enum bch_watermark watermark) -+{ -+ return max_t(s64, 0, -+ usage.d[BCH_DATA_free].buckets -+ + usage.d[BCH_DATA_cached].buckets -+ + usage.d[BCH_DATA_need_gc_gens].buckets -+ + usage.d[BCH_DATA_need_discard].buckets -+ - ca->nr_open_buckets -+ - bch2_dev_buckets_reserved(ca, watermark)); -+} -+ -+static inline u64 dev_buckets_available(struct bch_dev *ca, -+ enum bch_watermark watermark) -+{ -+ return __dev_buckets_available(ca, bch2_dev_usage_read(ca), watermark); -+} -+ -+/* Filesystem usage: */ -+ -+static inline unsigned __fs_usage_u64s(unsigned nr_replicas) -+{ -+ return sizeof(struct bch_fs_usage) / sizeof(u64) + nr_replicas; -+} -+ -+static inline unsigned fs_usage_u64s(struct bch_fs *c) -+{ -+ return __fs_usage_u64s(READ_ONCE(c->replicas.nr)); -+} -+ -+static inline unsigned __fs_usage_online_u64s(unsigned nr_replicas) -+{ -+ return sizeof(struct bch_fs_usage_online) / sizeof(u64) + nr_replicas; -+} -+ -+static inline unsigned fs_usage_online_u64s(struct bch_fs *c) -+{ -+ return __fs_usage_online_u64s(READ_ONCE(c->replicas.nr)); -+} -+ -+static inline unsigned dev_usage_u64s(void) -+{ -+ return sizeof(struct bch_dev_usage) / sizeof(u64); -+} -+ -+u64 bch2_fs_usage_read_one(struct bch_fs *, u64 *); -+ -+struct bch_fs_usage_online *bch2_fs_usage_read(struct bch_fs *); -+ -+void bch2_fs_usage_acc_to_base(struct bch_fs *, unsigned); -+ -+void bch2_fs_usage_to_text(struct printbuf *, -+ struct bch_fs *, struct bch_fs_usage_online *); -+ -+u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage_online *); -+ -+struct bch_fs_usage_short -+bch2_fs_usage_read_short(struct bch_fs *); -+ -+/* key/bucket marking: */ -+ -+static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, -+ unsigned journal_seq, -+ bool gc) -+{ -+ percpu_rwsem_assert_held(&c->mark_lock); -+ BUG_ON(!gc && !journal_seq); -+ -+ return this_cpu_ptr(gc -+ ? c->usage_gc -+ : c->usage[journal_seq & JOURNAL_BUF_MASK]); -+} -+ -+int bch2_replicas_deltas_realloc(struct btree_trans *, unsigned); -+ -+void bch2_fs_usage_initialize(struct bch_fs *); -+ -+int bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, -+ size_t, enum bch_data_type, unsigned, -+ struct gc_pos, unsigned); -+ -+int bch2_mark_alloc(struct btree_trans *, enum btree_id, unsigned, -+ struct bkey_s_c, struct bkey_s_c, unsigned); -+int bch2_mark_extent(struct btree_trans *, enum btree_id, unsigned, -+ struct bkey_s_c, struct bkey_s_c, unsigned); -+int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned, -+ struct bkey_s_c, struct bkey_s_c, unsigned); -+int bch2_mark_reservation(struct btree_trans *, enum btree_id, unsigned, -+ struct bkey_s_c, struct bkey_s_c, unsigned); -+int bch2_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, -+ struct bkey_s_c, struct bkey_s_c, unsigned); -+ -+int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); -+int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); -+int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); -+int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); -+ -+#define mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ -+({ \ -+ int ret = 0; \ -+ \ -+ if (_old.k->type) \ -+ ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT); \ -+ if (!ret && _new.k->type) \ -+ ret = _fn(_trans, _btree_id, _level, _new, _flags & ~BTREE_TRIGGER_OVERWRITE); \ -+ ret; \ -+}) -+ -+#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags) \ -+ mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, bkey_i_to_s_c(_new), _flags) -+ -+void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *); -+int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); -+ -+int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *, -+ size_t, enum bch_data_type, unsigned); -+int bch2_trans_mark_dev_sb(struct bch_fs *, struct bch_dev *); -+int bch2_trans_mark_dev_sbs(struct bch_fs *); -+ -+static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b) -+{ -+ struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; -+ u64 b_offset = bucket_to_sector(ca, b); -+ u64 b_end = bucket_to_sector(ca, b + 1); -+ unsigned i; -+ -+ if (!b) -+ return true; -+ -+ for (i = 0; i < layout->nr_superblocks; i++) { -+ u64 offset = le64_to_cpu(layout->sb_offset[i]); -+ u64 end = offset + (1 << layout->sb_max_size_bits); -+ -+ if (!(offset >= b_end || end <= b_offset)) -+ return true; -+ } -+ -+ return false; -+} -+ -+/* disk reservations: */ -+ -+static inline void bch2_disk_reservation_put(struct bch_fs *c, -+ struct disk_reservation *res) -+{ -+ if (res->sectors) { -+ this_cpu_sub(*c->online_reserved, res->sectors); -+ res->sectors = 0; -+ } -+} -+ -+#define BCH_DISK_RESERVATION_NOFAIL (1 << 0) -+ -+int __bch2_disk_reservation_add(struct bch_fs *, -+ struct disk_reservation *, -+ u64, int); -+ -+static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, -+ u64 sectors, int flags) -+{ -+#ifdef __KERNEL__ -+ u64 old, new; -+ -+ do { -+ old = this_cpu_read(c->pcpu->sectors_available); -+ if (sectors > old) -+ return __bch2_disk_reservation_add(c, res, sectors, flags); -+ -+ new = old - sectors; -+ } while (this_cpu_cmpxchg(c->pcpu->sectors_available, old, new) != old); -+ -+ this_cpu_add(*c->online_reserved, sectors); -+ res->sectors += sectors; -+ return 0; -+#else -+ return __bch2_disk_reservation_add(c, res, sectors, flags); -+#endif -+} -+ -+static inline struct disk_reservation -+bch2_disk_reservation_init(struct bch_fs *c, unsigned nr_replicas) -+{ -+ return (struct disk_reservation) { -+ .sectors = 0, -+#if 0 -+ /* not used yet: */ -+ .gen = c->capacity_gen, -+#endif -+ .nr_replicas = nr_replicas, -+ }; -+} -+ -+static inline int bch2_disk_reservation_get(struct bch_fs *c, -+ struct disk_reservation *res, -+ u64 sectors, unsigned nr_replicas, -+ int flags) -+{ -+ *res = bch2_disk_reservation_init(c, nr_replicas); -+ -+ return bch2_disk_reservation_add(c, res, sectors * nr_replicas, flags); -+} -+ -+#define RESERVE_FACTOR 6 -+ -+static inline u64 avail_factor(u64 r) -+{ -+ return div_u64(r << RESERVE_FACTOR, (1 << RESERVE_FACTOR) + 1); -+} -+ -+int bch2_dev_buckets_resize(struct bch_fs *, struct bch_dev *, u64); -+void bch2_dev_buckets_free(struct bch_dev *); -+int bch2_dev_buckets_alloc(struct bch_fs *, struct bch_dev *); -+ -+#endif /* _BUCKETS_H */ -diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h -new file mode 100644 -index 000000000000..2a9dab9006ef ---- /dev/null -+++ b/fs/bcachefs/buckets_types.h -@@ -0,0 +1,92 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BUCKETS_TYPES_H -+#define _BUCKETS_TYPES_H -+ -+#include "bcachefs_format.h" -+#include "util.h" -+ -+#define BUCKET_JOURNAL_SEQ_BITS 16 -+ -+struct bucket { -+ u8 lock; -+ u8 gen_valid:1; -+ u8 data_type:7; -+ u8 gen; -+ u8 stripe_redundancy; -+ u32 stripe; -+ u32 dirty_sectors; -+ u32 cached_sectors; -+}; -+ -+struct bucket_array { -+ struct rcu_head rcu; -+ u16 first_bucket; -+ size_t nbuckets; -+ struct bucket b[]; -+}; -+ -+struct bucket_gens { -+ struct rcu_head rcu; -+ u16 first_bucket; -+ size_t nbuckets; -+ u8 b[]; -+}; -+ -+struct bch_dev_usage { -+ u64 buckets_ec; -+ -+ struct { -+ u64 buckets; -+ u64 sectors; /* _compressed_ sectors: */ -+ /* -+ * XXX -+ * Why do we have this? Isn't it just buckets * bucket_size - -+ * sectors? -+ */ -+ u64 fragmented; -+ } d[BCH_DATA_NR]; -+}; -+ -+struct bch_fs_usage { -+ /* all fields are in units of 512 byte sectors: */ -+ u64 hidden; -+ u64 btree; -+ u64 data; -+ u64 cached; -+ u64 reserved; -+ u64 nr_inodes; -+ -+ /* XXX: add stats for compression ratio */ -+#if 0 -+ u64 uncompressed; -+ u64 compressed; -+#endif -+ -+ /* broken out: */ -+ -+ u64 persistent_reserved[BCH_REPLICAS_MAX]; -+ u64 replicas[]; -+}; -+ -+struct bch_fs_usage_online { -+ u64 online_reserved; -+ struct bch_fs_usage u; -+}; -+ -+struct bch_fs_usage_short { -+ u64 capacity; -+ u64 used; -+ u64 free; -+ u64 nr_inodes; -+}; -+ -+/* -+ * A reservation for space on disk: -+ */ -+struct disk_reservation { -+ u64 sectors; -+ u32 gen; -+ unsigned nr_replicas; -+}; -+ -+#endif /* _BUCKETS_TYPES_H */ -diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c -new file mode 100644 -index 000000000000..ec1b636ef78d ---- /dev/null -+++ b/fs/bcachefs/buckets_waiting_for_journal.c -@@ -0,0 +1,166 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "buckets_waiting_for_journal.h" -+#include -+#include -+ -+static inline struct bucket_hashed * -+bucket_hash(struct buckets_waiting_for_journal_table *t, -+ unsigned hash_seed_idx, u64 dev_bucket) -+{ -+ return t->d + hash_64(dev_bucket ^ t->hash_seeds[hash_seed_idx], t->bits); -+} -+ -+static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_t bits) -+{ -+ unsigned i; -+ -+ t->bits = bits; -+ for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) -+ get_random_bytes(&t->hash_seeds[i], sizeof(t->hash_seeds[i])); -+ memset(t->d, 0, sizeof(t->d[0]) << t->bits); -+} -+ -+bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, -+ u64 flushed_seq, -+ unsigned dev, u64 bucket) -+{ -+ struct buckets_waiting_for_journal_table *t; -+ u64 dev_bucket = (u64) dev << 56 | bucket; -+ bool ret = false; -+ unsigned i; -+ -+ mutex_lock(&b->lock); -+ t = b->t; -+ -+ for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) { -+ struct bucket_hashed *h = bucket_hash(t, i, dev_bucket); -+ -+ if (h->dev_bucket == dev_bucket) { -+ ret = h->journal_seq > flushed_seq; -+ break; -+ } -+ } -+ -+ mutex_unlock(&b->lock); -+ -+ return ret; -+} -+ -+static bool bucket_table_insert(struct buckets_waiting_for_journal_table *t, -+ struct bucket_hashed *new, -+ u64 flushed_seq) -+{ -+ struct bucket_hashed *last_evicted = NULL; -+ unsigned tries, i; -+ -+ for (tries = 0; tries < 10; tries++) { -+ struct bucket_hashed *old, *victim = NULL; -+ -+ for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) { -+ old = bucket_hash(t, i, new->dev_bucket); -+ -+ if (old->dev_bucket == new->dev_bucket || -+ old->journal_seq <= flushed_seq) { -+ *old = *new; -+ return true; -+ } -+ -+ if (last_evicted != old) -+ victim = old; -+ } -+ -+ /* hashed to same slot 3 times: */ -+ if (!victim) -+ break; -+ -+ /* Failed to find an empty slot: */ -+ swap(*new, *victim); -+ last_evicted = victim; -+ } -+ -+ return false; -+} -+ -+int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, -+ u64 flushed_seq, -+ unsigned dev, u64 bucket, -+ u64 journal_seq) -+{ -+ struct buckets_waiting_for_journal_table *t, *n; -+ struct bucket_hashed tmp, new = { -+ .dev_bucket = (u64) dev << 56 | bucket, -+ .journal_seq = journal_seq, -+ }; -+ size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0; -+ int ret = 0; -+ -+ mutex_lock(&b->lock); -+ -+ if (likely(bucket_table_insert(b->t, &new, flushed_seq))) -+ goto out; -+ -+ t = b->t; -+ size = 1UL << t->bits; -+ for (i = 0; i < size; i++) -+ nr_elements += t->d[i].journal_seq > flushed_seq; -+ -+ new_bits = t->bits + (nr_elements * 3 > size); -+ -+ n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL); -+ if (!n) { -+ ret = -BCH_ERR_ENOMEM_buckets_waiting_for_journal_set; -+ goto out; -+ } -+ -+retry_rehash: -+ nr_rehashes++; -+ bucket_table_init(n, new_bits); -+ -+ tmp = new; -+ BUG_ON(!bucket_table_insert(n, &tmp, flushed_seq)); -+ -+ for (i = 0; i < 1UL << t->bits; i++) { -+ if (t->d[i].journal_seq <= flushed_seq) -+ continue; -+ -+ tmp = t->d[i]; -+ if (!bucket_table_insert(n, &tmp, flushed_seq)) -+ goto retry_rehash; -+ } -+ -+ b->t = n; -+ kvfree(t); -+ -+ pr_debug("took %zu rehashes, table at %zu/%lu elements", -+ nr_rehashes, nr_elements, 1UL << b->t->bits); -+out: -+ mutex_unlock(&b->lock); -+ -+ return ret; -+} -+ -+void bch2_fs_buckets_waiting_for_journal_exit(struct bch_fs *c) -+{ -+ struct buckets_waiting_for_journal *b = &c->buckets_waiting_for_journal; -+ -+ kvfree(b->t); -+} -+ -+#define INITIAL_TABLE_BITS 3 -+ -+int bch2_fs_buckets_waiting_for_journal_init(struct bch_fs *c) -+{ -+ struct buckets_waiting_for_journal *b = &c->buckets_waiting_for_journal; -+ -+ mutex_init(&b->lock); -+ -+ b->t = kvmalloc(sizeof(*b->t) + -+ (sizeof(b->t->d[0]) << INITIAL_TABLE_BITS), GFP_KERNEL); -+ if (!b->t) -+ return -BCH_ERR_ENOMEM_buckets_waiting_for_journal_init; -+ -+ bucket_table_init(b->t, INITIAL_TABLE_BITS); -+ return 0; -+} -diff --git a/fs/bcachefs/buckets_waiting_for_journal.h b/fs/bcachefs/buckets_waiting_for_journal.h -new file mode 100644 -index 000000000000..d2ae19cbe18c ---- /dev/null -+++ b/fs/bcachefs/buckets_waiting_for_journal.h -@@ -0,0 +1,15 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BUCKETS_WAITING_FOR_JOURNAL_H -+#define _BUCKETS_WAITING_FOR_JOURNAL_H -+ -+#include "buckets_waiting_for_journal_types.h" -+ -+bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *, -+ u64, unsigned, u64); -+int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *, -+ u64, unsigned, u64, u64); -+ -+void bch2_fs_buckets_waiting_for_journal_exit(struct bch_fs *); -+int bch2_fs_buckets_waiting_for_journal_init(struct bch_fs *); -+ -+#endif /* _BUCKETS_WAITING_FOR_JOURNAL_H */ -diff --git a/fs/bcachefs/buckets_waiting_for_journal_types.h b/fs/bcachefs/buckets_waiting_for_journal_types.h -new file mode 100644 -index 000000000000..e593db061d81 ---- /dev/null -+++ b/fs/bcachefs/buckets_waiting_for_journal_types.h -@@ -0,0 +1,23 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BUCKETS_WAITING_FOR_JOURNAL_TYPES_H -+#define _BUCKETS_WAITING_FOR_JOURNAL_TYPES_H -+ -+#include -+ -+struct bucket_hashed { -+ u64 dev_bucket; -+ u64 journal_seq; -+}; -+ -+struct buckets_waiting_for_journal_table { -+ unsigned bits; -+ u64 hash_seeds[3]; -+ struct bucket_hashed d[]; -+}; -+ -+struct buckets_waiting_for_journal { -+ struct mutex lock; -+ struct buckets_waiting_for_journal_table *t; -+}; -+ -+#endif /* _BUCKETS_WAITING_FOR_JOURNAL_TYPES_H */ -diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c -new file mode 100644 -index 000000000000..4bb88aefed12 ---- /dev/null -+++ b/fs/bcachefs/chardev.c -@@ -0,0 +1,784 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#ifndef NO_BCACHEFS_CHARDEV -+ -+#include "bcachefs.h" -+#include "bcachefs_ioctl.h" -+#include "buckets.h" -+#include "chardev.h" -+#include "journal.h" -+#include "move.h" -+#include "replicas.h" -+#include "super.h" -+#include "super-io.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* returns with ref on ca->ref */ -+static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev, -+ unsigned flags) -+{ -+ struct bch_dev *ca; -+ -+ if (flags & BCH_BY_INDEX) { -+ if (dev >= c->sb.nr_devices) -+ return ERR_PTR(-EINVAL); -+ -+ rcu_read_lock(); -+ ca = rcu_dereference(c->devs[dev]); -+ if (ca) -+ percpu_ref_get(&ca->ref); -+ rcu_read_unlock(); -+ -+ if (!ca) -+ return ERR_PTR(-EINVAL); -+ } else { -+ char *path; -+ -+ path = strndup_user((const char __user *) -+ (unsigned long) dev, PATH_MAX); -+ if (IS_ERR(path)) -+ return ERR_CAST(path); -+ -+ ca = bch2_dev_lookup(c, path); -+ kfree(path); -+ } -+ -+ return ca; -+} -+ -+#if 0 -+static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg) -+{ -+ struct bch_ioctl_assemble arg; -+ struct bch_fs *c; -+ u64 *user_devs = NULL; -+ char **devs = NULL; -+ unsigned i; -+ int ret = -EFAULT; -+ -+ if (copy_from_user(&arg, user_arg, sizeof(arg))) -+ return -EFAULT; -+ -+ if (arg.flags || arg.pad) -+ return -EINVAL; -+ -+ user_devs = kmalloc_array(arg.nr_devs, sizeof(u64), GFP_KERNEL); -+ if (!user_devs) -+ return -ENOMEM; -+ -+ devs = kcalloc(arg.nr_devs, sizeof(char *), GFP_KERNEL); -+ -+ if (copy_from_user(user_devs, user_arg->devs, -+ sizeof(u64) * arg.nr_devs)) -+ goto err; -+ -+ for (i = 0; i < arg.nr_devs; i++) { -+ devs[i] = strndup_user((const char __user *)(unsigned long) -+ user_devs[i], -+ PATH_MAX); -+ ret= PTR_ERR_OR_ZERO(devs[i]); -+ if (ret) -+ goto err; -+ } -+ -+ c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty()); -+ ret = PTR_ERR_OR_ZERO(c); -+ if (!ret) -+ closure_put(&c->cl); -+err: -+ if (devs) -+ for (i = 0; i < arg.nr_devs; i++) -+ kfree(devs[i]); -+ kfree(devs); -+ return ret; -+} -+ -+static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg) -+{ -+ struct bch_ioctl_incremental arg; -+ const char *err; -+ char *path; -+ -+ if (copy_from_user(&arg, user_arg, sizeof(arg))) -+ return -EFAULT; -+ -+ if (arg.flags || arg.pad) -+ return -EINVAL; -+ -+ path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX); -+ ret = PTR_ERR_OR_ZERO(path); -+ if (ret) -+ return ret; -+ -+ err = bch2_fs_open_incremental(path); -+ kfree(path); -+ -+ if (err) { -+ pr_err("Could not register bcachefs devices: %s", err); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+#endif -+ -+static long bch2_global_ioctl(unsigned cmd, void __user *arg) -+{ -+ switch (cmd) { -+#if 0 -+ case BCH_IOCTL_ASSEMBLE: -+ return bch2_ioctl_assemble(arg); -+ case BCH_IOCTL_INCREMENTAL: -+ return bch2_ioctl_incremental(arg); -+#endif -+ default: -+ return -ENOTTY; -+ } -+} -+ -+static long bch2_ioctl_query_uuid(struct bch_fs *c, -+ struct bch_ioctl_query_uuid __user *user_arg) -+{ -+ if (copy_to_user(&user_arg->uuid, &c->sb.user_uuid, -+ sizeof(c->sb.user_uuid))) -+ return -EFAULT; -+ return 0; -+} -+ -+#if 0 -+static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg) -+{ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if (arg.flags || arg.pad) -+ return -EINVAL; -+ -+ return bch2_fs_start(c); -+} -+ -+static long bch2_ioctl_stop(struct bch_fs *c) -+{ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ bch2_fs_stop(c); -+ return 0; -+} -+#endif -+ -+static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg) -+{ -+ char *path; -+ int ret; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if (arg.flags || arg.pad) -+ return -EINVAL; -+ -+ path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX); -+ ret = PTR_ERR_OR_ZERO(path); -+ if (ret) -+ return ret; -+ -+ ret = bch2_dev_add(c, path); -+ kfree(path); -+ -+ return ret; -+} -+ -+static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg) -+{ -+ struct bch_dev *ca; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST| -+ BCH_FORCE_IF_METADATA_LOST| -+ BCH_FORCE_IF_DEGRADED| -+ BCH_BY_INDEX)) || -+ arg.pad) -+ return -EINVAL; -+ -+ ca = bch2_device_lookup(c, arg.dev, arg.flags); -+ if (IS_ERR(ca)) -+ return PTR_ERR(ca); -+ -+ return bch2_dev_remove(c, ca, arg.flags); -+} -+ -+static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg) -+{ -+ char *path; -+ int ret; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if (arg.flags || arg.pad) -+ return -EINVAL; -+ -+ path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX); -+ ret = PTR_ERR_OR_ZERO(path); -+ if (ret) -+ return ret; -+ -+ ret = bch2_dev_online(c, path); -+ kfree(path); -+ return ret; -+} -+ -+static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg) -+{ -+ struct bch_dev *ca; -+ int ret; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST| -+ BCH_FORCE_IF_METADATA_LOST| -+ BCH_FORCE_IF_DEGRADED| -+ BCH_BY_INDEX)) || -+ arg.pad) -+ return -EINVAL; -+ -+ ca = bch2_device_lookup(c, arg.dev, arg.flags); -+ if (IS_ERR(ca)) -+ return PTR_ERR(ca); -+ -+ ret = bch2_dev_offline(c, ca, arg.flags); -+ percpu_ref_put(&ca->ref); -+ return ret; -+} -+ -+static long bch2_ioctl_disk_set_state(struct bch_fs *c, -+ struct bch_ioctl_disk_set_state arg) -+{ -+ struct bch_dev *ca; -+ int ret; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST| -+ BCH_FORCE_IF_METADATA_LOST| -+ BCH_FORCE_IF_DEGRADED| -+ BCH_BY_INDEX)) || -+ arg.pad[0] || arg.pad[1] || arg.pad[2] || -+ arg.new_state >= BCH_MEMBER_STATE_NR) -+ return -EINVAL; -+ -+ ca = bch2_device_lookup(c, arg.dev, arg.flags); -+ if (IS_ERR(ca)) -+ return PTR_ERR(ca); -+ -+ ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags); -+ if (ret) -+ bch_err(c, "Error setting device state: %s", bch2_err_str(ret)); -+ -+ percpu_ref_put(&ca->ref); -+ return ret; -+} -+ -+struct bch_data_ctx { -+ struct bch_fs *c; -+ struct bch_ioctl_data arg; -+ struct bch_move_stats stats; -+ -+ int ret; -+ -+ struct task_struct *thread; -+}; -+ -+static int bch2_data_thread(void *arg) -+{ -+ struct bch_data_ctx *ctx = arg; -+ -+ ctx->ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg); -+ -+ ctx->stats.data_type = U8_MAX; -+ return 0; -+} -+ -+static int bch2_data_job_release(struct inode *inode, struct file *file) -+{ -+ struct bch_data_ctx *ctx = file->private_data; -+ -+ kthread_stop(ctx->thread); -+ put_task_struct(ctx->thread); -+ kfree(ctx); -+ return 0; -+} -+ -+static ssize_t bch2_data_job_read(struct file *file, char __user *buf, -+ size_t len, loff_t *ppos) -+{ -+ struct bch_data_ctx *ctx = file->private_data; -+ struct bch_fs *c = ctx->c; -+ struct bch_ioctl_data_event e = { -+ .type = BCH_DATA_EVENT_PROGRESS, -+ .p.data_type = ctx->stats.data_type, -+ .p.btree_id = ctx->stats.pos.btree, -+ .p.pos = ctx->stats.pos.pos, -+ .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen), -+ .p.sectors_total = bch2_fs_usage_read_short(c).used, -+ }; -+ -+ if (len < sizeof(e)) -+ return -EINVAL; -+ -+ if (copy_to_user(buf, &e, sizeof(e))) -+ return -EFAULT; -+ -+ return sizeof(e); -+} -+ -+static const struct file_operations bcachefs_data_ops = { -+ .release = bch2_data_job_release, -+ .read = bch2_data_job_read, -+ .llseek = no_llseek, -+}; -+ -+static long bch2_ioctl_data(struct bch_fs *c, -+ struct bch_ioctl_data arg) -+{ -+ struct bch_data_ctx *ctx = NULL; -+ struct file *file = NULL; -+ unsigned flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK; -+ int ret, fd = -1; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if (arg.op >= BCH_DATA_OP_NR || arg.flags) -+ return -EINVAL; -+ -+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); -+ if (!ctx) -+ return -ENOMEM; -+ -+ ctx->c = c; -+ ctx->arg = arg; -+ -+ ctx->thread = kthread_create(bch2_data_thread, ctx, -+ "bch-data/%s", c->name); -+ if (IS_ERR(ctx->thread)) { -+ ret = PTR_ERR(ctx->thread); -+ goto err; -+ } -+ -+ ret = get_unused_fd_flags(flags); -+ if (ret < 0) -+ goto err; -+ fd = ret; -+ -+ file = anon_inode_getfile("[bcachefs]", &bcachefs_data_ops, ctx, flags); -+ if (IS_ERR(file)) { -+ ret = PTR_ERR(file); -+ goto err; -+ } -+ -+ fd_install(fd, file); -+ -+ get_task_struct(ctx->thread); -+ wake_up_process(ctx->thread); -+ -+ return fd; -+err: -+ if (fd >= 0) -+ put_unused_fd(fd); -+ if (!IS_ERR_OR_NULL(ctx->thread)) -+ kthread_stop(ctx->thread); -+ kfree(ctx); -+ return ret; -+} -+ -+static long bch2_ioctl_fs_usage(struct bch_fs *c, -+ struct bch_ioctl_fs_usage __user *user_arg) -+{ -+ struct bch_ioctl_fs_usage *arg = NULL; -+ struct bch_replicas_usage *dst_e, *dst_end; -+ struct bch_fs_usage_online *src; -+ u32 replica_entries_bytes; -+ unsigned i; -+ int ret = 0; -+ -+ if (!test_bit(BCH_FS_STARTED, &c->flags)) -+ return -EINVAL; -+ -+ if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes)) -+ return -EFAULT; -+ -+ arg = kzalloc(size_add(sizeof(*arg), replica_entries_bytes), GFP_KERNEL); -+ if (!arg) -+ return -ENOMEM; -+ -+ src = bch2_fs_usage_read(c); -+ if (!src) { -+ ret = -ENOMEM; -+ goto err; -+ } -+ -+ arg->capacity = c->capacity; -+ arg->used = bch2_fs_sectors_used(c, src); -+ arg->online_reserved = src->online_reserved; -+ -+ for (i = 0; i < BCH_REPLICAS_MAX; i++) -+ arg->persistent_reserved[i] = src->u.persistent_reserved[i]; -+ -+ dst_e = arg->replicas; -+ dst_end = (void *) arg->replicas + replica_entries_bytes; -+ -+ for (i = 0; i < c->replicas.nr; i++) { -+ struct bch_replicas_entry *src_e = -+ cpu_replicas_entry(&c->replicas, i); -+ -+ /* check that we have enough space for one replicas entry */ -+ if (dst_e + 1 > dst_end) { -+ ret = -ERANGE; -+ break; -+ } -+ -+ dst_e->sectors = src->u.replicas[i]; -+ dst_e->r = *src_e; -+ -+ /* recheck after setting nr_devs: */ -+ if (replicas_usage_next(dst_e) > dst_end) { -+ ret = -ERANGE; -+ break; -+ } -+ -+ memcpy(dst_e->r.devs, src_e->devs, src_e->nr_devs); -+ -+ dst_e = replicas_usage_next(dst_e); -+ } -+ -+ arg->replica_entries_bytes = (void *) dst_e - (void *) arg->replicas; -+ -+ percpu_up_read(&c->mark_lock); -+ kfree(src); -+ -+ if (ret) -+ goto err; -+ if (copy_to_user(user_arg, arg, -+ sizeof(*arg) + arg->replica_entries_bytes)) -+ ret = -EFAULT; -+err: -+ kfree(arg); -+ return ret; -+} -+ -+static long bch2_ioctl_dev_usage(struct bch_fs *c, -+ struct bch_ioctl_dev_usage __user *user_arg) -+{ -+ struct bch_ioctl_dev_usage arg; -+ struct bch_dev_usage src; -+ struct bch_dev *ca; -+ unsigned i; -+ -+ if (!test_bit(BCH_FS_STARTED, &c->flags)) -+ return -EINVAL; -+ -+ if (copy_from_user(&arg, user_arg, sizeof(arg))) -+ return -EFAULT; -+ -+ if ((arg.flags & ~BCH_BY_INDEX) || -+ arg.pad[0] || -+ arg.pad[1] || -+ arg.pad[2]) -+ return -EINVAL; -+ -+ ca = bch2_device_lookup(c, arg.dev, arg.flags); -+ if (IS_ERR(ca)) -+ return PTR_ERR(ca); -+ -+ src = bch2_dev_usage_read(ca); -+ -+ arg.state = ca->mi.state; -+ arg.bucket_size = ca->mi.bucket_size; -+ arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; -+ arg.buckets_ec = src.buckets_ec; -+ -+ for (i = 0; i < BCH_DATA_NR; i++) { -+ arg.d[i].buckets = src.d[i].buckets; -+ arg.d[i].sectors = src.d[i].sectors; -+ arg.d[i].fragmented = src.d[i].fragmented; -+ } -+ -+ percpu_ref_put(&ca->ref); -+ -+ if (copy_to_user(user_arg, &arg, sizeof(arg))) -+ return -EFAULT; -+ -+ return 0; -+} -+ -+static long bch2_ioctl_read_super(struct bch_fs *c, -+ struct bch_ioctl_read_super arg) -+{ -+ struct bch_dev *ca = NULL; -+ struct bch_sb *sb; -+ int ret = 0; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if ((arg.flags & ~(BCH_BY_INDEX|BCH_READ_DEV)) || -+ arg.pad) -+ return -EINVAL; -+ -+ mutex_lock(&c->sb_lock); -+ -+ if (arg.flags & BCH_READ_DEV) { -+ ca = bch2_device_lookup(c, arg.dev, arg.flags); -+ -+ if (IS_ERR(ca)) { -+ ret = PTR_ERR(ca); -+ goto err; -+ } -+ -+ sb = ca->disk_sb.sb; -+ } else { -+ sb = c->disk_sb.sb; -+ } -+ -+ if (vstruct_bytes(sb) > arg.size) { -+ ret = -ERANGE; -+ goto err; -+ } -+ -+ if (copy_to_user((void __user *)(unsigned long)arg.sb, sb, -+ vstruct_bytes(sb))) -+ ret = -EFAULT; -+err: -+ if (!IS_ERR_OR_NULL(ca)) -+ percpu_ref_put(&ca->ref); -+ mutex_unlock(&c->sb_lock); -+ return ret; -+} -+ -+static long bch2_ioctl_disk_get_idx(struct bch_fs *c, -+ struct bch_ioctl_disk_get_idx arg) -+{ -+ dev_t dev = huge_decode_dev(arg.dev); -+ struct bch_dev *ca; -+ unsigned i; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if (!dev) -+ return -EINVAL; -+ -+ for_each_online_member(ca, c, i) -+ if (ca->dev == dev) { -+ percpu_ref_put(&ca->io_ref); -+ return i; -+ } -+ -+ return -BCH_ERR_ENOENT_dev_idx_not_found; -+} -+ -+static long bch2_ioctl_disk_resize(struct bch_fs *c, -+ struct bch_ioctl_disk_resize arg) -+{ -+ struct bch_dev *ca; -+ int ret; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if ((arg.flags & ~BCH_BY_INDEX) || -+ arg.pad) -+ return -EINVAL; -+ -+ ca = bch2_device_lookup(c, arg.dev, arg.flags); -+ if (IS_ERR(ca)) -+ return PTR_ERR(ca); -+ -+ ret = bch2_dev_resize(c, ca, arg.nbuckets); -+ -+ percpu_ref_put(&ca->ref); -+ return ret; -+} -+ -+static long bch2_ioctl_disk_resize_journal(struct bch_fs *c, -+ struct bch_ioctl_disk_resize_journal arg) -+{ -+ struct bch_dev *ca; -+ int ret; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if ((arg.flags & ~BCH_BY_INDEX) || -+ arg.pad) -+ return -EINVAL; -+ -+ if (arg.nbuckets > U32_MAX) -+ return -EINVAL; -+ -+ ca = bch2_device_lookup(c, arg.dev, arg.flags); -+ if (IS_ERR(ca)) -+ return PTR_ERR(ca); -+ -+ ret = bch2_set_nr_journal_buckets(c, ca, arg.nbuckets); -+ -+ percpu_ref_put(&ca->ref); -+ return ret; -+} -+ -+#define BCH_IOCTL(_name, _argtype) \ -+do { \ -+ _argtype i; \ -+ \ -+ if (copy_from_user(&i, arg, sizeof(i))) \ -+ return -EFAULT; \ -+ ret = bch2_ioctl_##_name(c, i); \ -+ goto out; \ -+} while (0) -+ -+long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) -+{ -+ long ret; -+ -+ switch (cmd) { -+ case BCH_IOCTL_QUERY_UUID: -+ return bch2_ioctl_query_uuid(c, arg); -+ case BCH_IOCTL_FS_USAGE: -+ return bch2_ioctl_fs_usage(c, arg); -+ case BCH_IOCTL_DEV_USAGE: -+ return bch2_ioctl_dev_usage(c, arg); -+#if 0 -+ case BCH_IOCTL_START: -+ BCH_IOCTL(start, struct bch_ioctl_start); -+ case BCH_IOCTL_STOP: -+ return bch2_ioctl_stop(c); -+#endif -+ case BCH_IOCTL_READ_SUPER: -+ BCH_IOCTL(read_super, struct bch_ioctl_read_super); -+ case BCH_IOCTL_DISK_GET_IDX: -+ BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx); -+ } -+ -+ if (!test_bit(BCH_FS_STARTED, &c->flags)) -+ return -EINVAL; -+ -+ switch (cmd) { -+ case BCH_IOCTL_DISK_ADD: -+ BCH_IOCTL(disk_add, struct bch_ioctl_disk); -+ case BCH_IOCTL_DISK_REMOVE: -+ BCH_IOCTL(disk_remove, struct bch_ioctl_disk); -+ case BCH_IOCTL_DISK_ONLINE: -+ BCH_IOCTL(disk_online, struct bch_ioctl_disk); -+ case BCH_IOCTL_DISK_OFFLINE: -+ BCH_IOCTL(disk_offline, struct bch_ioctl_disk); -+ case BCH_IOCTL_DISK_SET_STATE: -+ BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state); -+ case BCH_IOCTL_DATA: -+ BCH_IOCTL(data, struct bch_ioctl_data); -+ case BCH_IOCTL_DISK_RESIZE: -+ BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize); -+ case BCH_IOCTL_DISK_RESIZE_JOURNAL: -+ BCH_IOCTL(disk_resize_journal, struct bch_ioctl_disk_resize_journal); -+ -+ default: -+ return -ENOTTY; -+ } -+out: -+ if (ret < 0) -+ ret = bch2_err_class(ret); -+ return ret; -+} -+ -+static DEFINE_IDR(bch_chardev_minor); -+ -+static long bch2_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v) -+{ -+ unsigned minor = iminor(file_inode(filp)); -+ struct bch_fs *c = minor < U8_MAX ? idr_find(&bch_chardev_minor, minor) : NULL; -+ void __user *arg = (void __user *) v; -+ -+ return c -+ ? bch2_fs_ioctl(c, cmd, arg) -+ : bch2_global_ioctl(cmd, arg); -+} -+ -+static const struct file_operations bch_chardev_fops = { -+ .owner = THIS_MODULE, -+ .unlocked_ioctl = bch2_chardev_ioctl, -+ .open = nonseekable_open, -+}; -+ -+static int bch_chardev_major; -+static struct class *bch_chardev_class; -+static struct device *bch_chardev; -+ -+void bch2_fs_chardev_exit(struct bch_fs *c) -+{ -+ if (!IS_ERR_OR_NULL(c->chardev)) -+ device_unregister(c->chardev); -+ if (c->minor >= 0) -+ idr_remove(&bch_chardev_minor, c->minor); -+} -+ -+int bch2_fs_chardev_init(struct bch_fs *c) -+{ -+ c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL); -+ if (c->minor < 0) -+ return c->minor; -+ -+ c->chardev = device_create(bch_chardev_class, NULL, -+ MKDEV(bch_chardev_major, c->minor), c, -+ "bcachefs%u-ctl", c->minor); -+ if (IS_ERR(c->chardev)) -+ return PTR_ERR(c->chardev); -+ -+ return 0; -+} -+ -+void bch2_chardev_exit(void) -+{ -+ if (!IS_ERR_OR_NULL(bch_chardev_class)) -+ device_destroy(bch_chardev_class, -+ MKDEV(bch_chardev_major, U8_MAX)); -+ if (!IS_ERR_OR_NULL(bch_chardev_class)) -+ class_destroy(bch_chardev_class); -+ if (bch_chardev_major > 0) -+ unregister_chrdev(bch_chardev_major, "bcachefs"); -+} -+ -+int __init bch2_chardev_init(void) -+{ -+ bch_chardev_major = register_chrdev(0, "bcachefs-ctl", &bch_chardev_fops); -+ if (bch_chardev_major < 0) -+ return bch_chardev_major; -+ -+ bch_chardev_class = class_create("bcachefs"); -+ if (IS_ERR(bch_chardev_class)) -+ return PTR_ERR(bch_chardev_class); -+ -+ bch_chardev = device_create(bch_chardev_class, NULL, -+ MKDEV(bch_chardev_major, U8_MAX), -+ NULL, "bcachefs-ctl"); -+ if (IS_ERR(bch_chardev)) -+ return PTR_ERR(bch_chardev); -+ -+ return 0; -+} -+ -+#endif /* NO_BCACHEFS_CHARDEV */ -diff --git a/fs/bcachefs/chardev.h b/fs/bcachefs/chardev.h -new file mode 100644 -index 000000000000..0f563ca53c36 ---- /dev/null -+++ b/fs/bcachefs/chardev.h -@@ -0,0 +1,31 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_CHARDEV_H -+#define _BCACHEFS_CHARDEV_H -+ -+#ifndef NO_BCACHEFS_FS -+ -+long bch2_fs_ioctl(struct bch_fs *, unsigned, void __user *); -+ -+void bch2_fs_chardev_exit(struct bch_fs *); -+int bch2_fs_chardev_init(struct bch_fs *); -+ -+void bch2_chardev_exit(void); -+int __init bch2_chardev_init(void); -+ -+#else -+ -+static inline long bch2_fs_ioctl(struct bch_fs *c, -+ unsigned cmd, void __user * arg) -+{ -+ return -ENOTTY; -+} -+ -+static inline void bch2_fs_chardev_exit(struct bch_fs *c) {} -+static inline int bch2_fs_chardev_init(struct bch_fs *c) { return 0; } -+ -+static inline void bch2_chardev_exit(void) {} -+static inline int __init bch2_chardev_init(void) { return 0; } -+ -+#endif /* NO_BCACHEFS_FS */ -+ -+#endif /* _BCACHEFS_CHARDEV_H */ -diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c -new file mode 100644 -index 000000000000..3c761ad6b1c8 ---- /dev/null -+++ b/fs/bcachefs/checksum.c -@@ -0,0 +1,804 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include "bcachefs.h" -+#include "checksum.h" -+#include "errcode.h" -+#include "super.h" -+#include "super-io.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * bch2_checksum state is an abstraction of the checksum state calculated over different pages. -+ * it features page merging without having the checksum algorithm lose its state. -+ * for native checksum aglorithms (like crc), a default seed value will do. -+ * for hash-like algorithms, a state needs to be stored -+ */ -+ -+struct bch2_checksum_state { -+ union { -+ u64 seed; -+ struct xxh64_state h64state; -+ }; -+ unsigned int type; -+}; -+ -+static void bch2_checksum_init(struct bch2_checksum_state *state) -+{ -+ switch (state->type) { -+ case BCH_CSUM_none: -+ case BCH_CSUM_crc32c: -+ case BCH_CSUM_crc64: -+ state->seed = 0; -+ break; -+ case BCH_CSUM_crc32c_nonzero: -+ state->seed = U32_MAX; -+ break; -+ case BCH_CSUM_crc64_nonzero: -+ state->seed = U64_MAX; -+ break; -+ case BCH_CSUM_xxhash: -+ xxh64_reset(&state->h64state, 0); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static u64 bch2_checksum_final(const struct bch2_checksum_state *state) -+{ -+ switch (state->type) { -+ case BCH_CSUM_none: -+ case BCH_CSUM_crc32c: -+ case BCH_CSUM_crc64: -+ return state->seed; -+ case BCH_CSUM_crc32c_nonzero: -+ return state->seed ^ U32_MAX; -+ case BCH_CSUM_crc64_nonzero: -+ return state->seed ^ U64_MAX; -+ case BCH_CSUM_xxhash: -+ return xxh64_digest(&state->h64state); -+ default: -+ BUG(); -+ } -+} -+ -+static void bch2_checksum_update(struct bch2_checksum_state *state, const void *data, size_t len) -+{ -+ switch (state->type) { -+ case BCH_CSUM_none: -+ return; -+ case BCH_CSUM_crc32c_nonzero: -+ case BCH_CSUM_crc32c: -+ state->seed = crc32c(state->seed, data, len); -+ break; -+ case BCH_CSUM_crc64_nonzero: -+ case BCH_CSUM_crc64: -+ state->seed = crc64_be(state->seed, data, len); -+ break; -+ case BCH_CSUM_xxhash: -+ xxh64_update(&state->h64state, data, len); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static inline int do_encrypt_sg(struct crypto_sync_skcipher *tfm, -+ struct nonce nonce, -+ struct scatterlist *sg, size_t len) -+{ -+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); -+ int ret; -+ -+ skcipher_request_set_sync_tfm(req, tfm); -+ skcipher_request_set_crypt(req, sg, sg, len, nonce.d); -+ -+ ret = crypto_skcipher_encrypt(req); -+ if (ret) -+ pr_err("got error %i from crypto_skcipher_encrypt()", ret); -+ -+ return ret; -+} -+ -+static inline int do_encrypt(struct crypto_sync_skcipher *tfm, -+ struct nonce nonce, -+ void *buf, size_t len) -+{ -+ if (!is_vmalloc_addr(buf)) { -+ struct scatterlist sg; -+ -+ sg_init_table(&sg, 1); -+ sg_set_page(&sg, -+ is_vmalloc_addr(buf) -+ ? vmalloc_to_page(buf) -+ : virt_to_page(buf), -+ len, offset_in_page(buf)); -+ return do_encrypt_sg(tfm, nonce, &sg, len); -+ } else { -+ unsigned pages = buf_pages(buf, len); -+ struct scatterlist *sg; -+ size_t orig_len = len; -+ int ret, i; -+ -+ sg = kmalloc_array(pages, sizeof(*sg), GFP_KERNEL); -+ if (!sg) -+ return -BCH_ERR_ENOMEM_do_encrypt; -+ -+ sg_init_table(sg, pages); -+ -+ for (i = 0; i < pages; i++) { -+ unsigned offset = offset_in_page(buf); -+ unsigned pg_len = min_t(size_t, len, PAGE_SIZE - offset); -+ -+ sg_set_page(sg + i, vmalloc_to_page(buf), pg_len, offset); -+ buf += pg_len; -+ len -= pg_len; -+ } -+ -+ ret = do_encrypt_sg(tfm, nonce, sg, orig_len); -+ kfree(sg); -+ return ret; -+ } -+} -+ -+int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, -+ void *buf, size_t len) -+{ -+ struct crypto_sync_skcipher *chacha20 = -+ crypto_alloc_sync_skcipher("chacha20", 0, 0); -+ int ret; -+ -+ ret = PTR_ERR_OR_ZERO(chacha20); -+ if (ret) { -+ pr_err("error requesting chacha20 cipher: %s", bch2_err_str(ret)); -+ return ret; -+ } -+ -+ ret = crypto_skcipher_setkey(&chacha20->base, -+ (void *) key, sizeof(*key)); -+ if (ret) { -+ pr_err("error from crypto_skcipher_setkey(): %s", bch2_err_str(ret)); -+ goto err; -+ } -+ -+ ret = do_encrypt(chacha20, nonce, buf, len); -+err: -+ crypto_free_sync_skcipher(chacha20); -+ return ret; -+} -+ -+static int gen_poly_key(struct bch_fs *c, struct shash_desc *desc, -+ struct nonce nonce) -+{ -+ u8 key[POLY1305_KEY_SIZE]; -+ int ret; -+ -+ nonce.d[3] ^= BCH_NONCE_POLY; -+ -+ memset(key, 0, sizeof(key)); -+ ret = do_encrypt(c->chacha20, nonce, key, sizeof(key)); -+ if (ret) -+ return ret; -+ -+ desc->tfm = c->poly1305; -+ crypto_shash_init(desc); -+ crypto_shash_update(desc, key, sizeof(key)); -+ return 0; -+} -+ -+struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type, -+ struct nonce nonce, const void *data, size_t len) -+{ -+ switch (type) { -+ case BCH_CSUM_none: -+ case BCH_CSUM_crc32c_nonzero: -+ case BCH_CSUM_crc64_nonzero: -+ case BCH_CSUM_crc32c: -+ case BCH_CSUM_xxhash: -+ case BCH_CSUM_crc64: { -+ struct bch2_checksum_state state; -+ -+ state.type = type; -+ -+ bch2_checksum_init(&state); -+ bch2_checksum_update(&state, data, len); -+ -+ return (struct bch_csum) { .lo = cpu_to_le64(bch2_checksum_final(&state)) }; -+ } -+ -+ case BCH_CSUM_chacha20_poly1305_80: -+ case BCH_CSUM_chacha20_poly1305_128: { -+ SHASH_DESC_ON_STACK(desc, c->poly1305); -+ u8 digest[POLY1305_DIGEST_SIZE]; -+ struct bch_csum ret = { 0 }; -+ -+ gen_poly_key(c, desc, nonce); -+ -+ crypto_shash_update(desc, data, len); -+ crypto_shash_final(desc, digest); -+ -+ memcpy(&ret, digest, bch_crc_bytes[type]); -+ return ret; -+ } -+ default: -+ BUG(); -+ } -+} -+ -+int bch2_encrypt(struct bch_fs *c, unsigned type, -+ struct nonce nonce, void *data, size_t len) -+{ -+ if (!bch2_csum_type_is_encryption(type)) -+ return 0; -+ -+ return do_encrypt(c->chacha20, nonce, data, len); -+} -+ -+static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type, -+ struct nonce nonce, struct bio *bio, -+ struct bvec_iter *iter) -+{ -+ struct bio_vec bv; -+ -+ switch (type) { -+ case BCH_CSUM_none: -+ return (struct bch_csum) { 0 }; -+ case BCH_CSUM_crc32c_nonzero: -+ case BCH_CSUM_crc64_nonzero: -+ case BCH_CSUM_crc32c: -+ case BCH_CSUM_xxhash: -+ case BCH_CSUM_crc64: { -+ struct bch2_checksum_state state; -+ -+ state.type = type; -+ bch2_checksum_init(&state); -+ -+#ifdef CONFIG_HIGHMEM -+ __bio_for_each_segment(bv, bio, *iter, *iter) { -+ void *p = kmap_local_page(bv.bv_page) + bv.bv_offset; -+ -+ bch2_checksum_update(&state, p, bv.bv_len); -+ kunmap_local(p); -+ } -+#else -+ __bio_for_each_bvec(bv, bio, *iter, *iter) -+ bch2_checksum_update(&state, page_address(bv.bv_page) + bv.bv_offset, -+ bv.bv_len); -+#endif -+ return (struct bch_csum) { .lo = cpu_to_le64(bch2_checksum_final(&state)) }; -+ } -+ -+ case BCH_CSUM_chacha20_poly1305_80: -+ case BCH_CSUM_chacha20_poly1305_128: { -+ SHASH_DESC_ON_STACK(desc, c->poly1305); -+ u8 digest[POLY1305_DIGEST_SIZE]; -+ struct bch_csum ret = { 0 }; -+ -+ gen_poly_key(c, desc, nonce); -+ -+#ifdef CONFIG_HIGHMEM -+ __bio_for_each_segment(bv, bio, *iter, *iter) { -+ void *p = kmap_local_page(bv.bv_page) + bv.bv_offset; -+ -+ crypto_shash_update(desc, p, bv.bv_len); -+ kunmap_local(p); -+ } -+#else -+ __bio_for_each_bvec(bv, bio, *iter, *iter) -+ crypto_shash_update(desc, -+ page_address(bv.bv_page) + bv.bv_offset, -+ bv.bv_len); -+#endif -+ crypto_shash_final(desc, digest); -+ -+ memcpy(&ret, digest, bch_crc_bytes[type]); -+ return ret; -+ } -+ default: -+ BUG(); -+ } -+} -+ -+struct bch_csum bch2_checksum_bio(struct bch_fs *c, unsigned type, -+ struct nonce nonce, struct bio *bio) -+{ -+ struct bvec_iter iter = bio->bi_iter; -+ -+ return __bch2_checksum_bio(c, type, nonce, bio, &iter); -+} -+ -+int __bch2_encrypt_bio(struct bch_fs *c, unsigned type, -+ struct nonce nonce, struct bio *bio) -+{ -+ struct bio_vec bv; -+ struct bvec_iter iter; -+ struct scatterlist sgl[16], *sg = sgl; -+ size_t bytes = 0; -+ int ret = 0; -+ -+ if (!bch2_csum_type_is_encryption(type)) -+ return 0; -+ -+ sg_init_table(sgl, ARRAY_SIZE(sgl)); -+ -+ bio_for_each_segment(bv, bio, iter) { -+ if (sg == sgl + ARRAY_SIZE(sgl)) { -+ sg_mark_end(sg - 1); -+ -+ ret = do_encrypt_sg(c->chacha20, nonce, sgl, bytes); -+ if (ret) -+ return ret; -+ -+ nonce = nonce_add(nonce, bytes); -+ bytes = 0; -+ -+ sg_init_table(sgl, ARRAY_SIZE(sgl)); -+ sg = sgl; -+ } -+ -+ sg_set_page(sg++, bv.bv_page, bv.bv_len, bv.bv_offset); -+ bytes += bv.bv_len; -+ } -+ -+ sg_mark_end(sg - 1); -+ return do_encrypt_sg(c->chacha20, nonce, sgl, bytes); -+} -+ -+struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a, -+ struct bch_csum b, size_t b_len) -+{ -+ struct bch2_checksum_state state; -+ -+ state.type = type; -+ bch2_checksum_init(&state); -+ state.seed = le64_to_cpu(a.lo); -+ -+ BUG_ON(!bch2_checksum_mergeable(type)); -+ -+ while (b_len) { -+ unsigned page_len = min_t(unsigned, b_len, PAGE_SIZE); -+ -+ bch2_checksum_update(&state, -+ page_address(ZERO_PAGE(0)), page_len); -+ b_len -= page_len; -+ } -+ a.lo = cpu_to_le64(bch2_checksum_final(&state)); -+ a.lo ^= b.lo; -+ a.hi ^= b.hi; -+ return a; -+} -+ -+int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio, -+ struct bversion version, -+ struct bch_extent_crc_unpacked crc_old, -+ struct bch_extent_crc_unpacked *crc_a, -+ struct bch_extent_crc_unpacked *crc_b, -+ unsigned len_a, unsigned len_b, -+ unsigned new_csum_type) -+{ -+ struct bvec_iter iter = bio->bi_iter; -+ struct nonce nonce = extent_nonce(version, crc_old); -+ struct bch_csum merged = { 0 }; -+ struct crc_split { -+ struct bch_extent_crc_unpacked *crc; -+ unsigned len; -+ unsigned csum_type; -+ struct bch_csum csum; -+ } splits[3] = { -+ { crc_a, len_a, new_csum_type, { 0 }}, -+ { crc_b, len_b, new_csum_type, { 0 } }, -+ { NULL, bio_sectors(bio) - len_a - len_b, new_csum_type, { 0 } }, -+ }, *i; -+ bool mergeable = crc_old.csum_type == new_csum_type && -+ bch2_checksum_mergeable(new_csum_type); -+ unsigned crc_nonce = crc_old.nonce; -+ -+ BUG_ON(len_a + len_b > bio_sectors(bio)); -+ BUG_ON(crc_old.uncompressed_size != bio_sectors(bio)); -+ BUG_ON(crc_is_compressed(crc_old)); -+ BUG_ON(bch2_csum_type_is_encryption(crc_old.csum_type) != -+ bch2_csum_type_is_encryption(new_csum_type)); -+ -+ for (i = splits; i < splits + ARRAY_SIZE(splits); i++) { -+ iter.bi_size = i->len << 9; -+ if (mergeable || i->crc) -+ i->csum = __bch2_checksum_bio(c, i->csum_type, -+ nonce, bio, &iter); -+ else -+ bio_advance_iter(bio, &iter, i->len << 9); -+ nonce = nonce_add(nonce, i->len << 9); -+ } -+ -+ if (mergeable) -+ for (i = splits; i < splits + ARRAY_SIZE(splits); i++) -+ merged = bch2_checksum_merge(new_csum_type, merged, -+ i->csum, i->len << 9); -+ else -+ merged = bch2_checksum_bio(c, crc_old.csum_type, -+ extent_nonce(version, crc_old), bio); -+ -+ if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) { -+ bch_err(c, "checksum error in %s() (memory corruption or bug?)\n" -+ "expected %0llx:%0llx got %0llx:%0llx (old type %s new type %s)", -+ __func__, -+ crc_old.csum.hi, -+ crc_old.csum.lo, -+ merged.hi, -+ merged.lo, -+ bch2_csum_types[crc_old.csum_type], -+ bch2_csum_types[new_csum_type]); -+ return -EIO; -+ } -+ -+ for (i = splits; i < splits + ARRAY_SIZE(splits); i++) { -+ if (i->crc) -+ *i->crc = (struct bch_extent_crc_unpacked) { -+ .csum_type = i->csum_type, -+ .compression_type = crc_old.compression_type, -+ .compressed_size = i->len, -+ .uncompressed_size = i->len, -+ .offset = 0, -+ .live_size = i->len, -+ .nonce = crc_nonce, -+ .csum = i->csum, -+ }; -+ -+ if (bch2_csum_type_is_encryption(new_csum_type)) -+ crc_nonce += i->len; -+ } -+ -+ return 0; -+} -+ -+/* BCH_SB_FIELD_crypt: */ -+ -+static int bch2_sb_crypt_validate(struct bch_sb *sb, -+ struct bch_sb_field *f, -+ struct printbuf *err) -+{ -+ struct bch_sb_field_crypt *crypt = field_to_type(f, crypt); -+ -+ if (vstruct_bytes(&crypt->field) < sizeof(*crypt)) { -+ prt_printf(err, "wrong size (got %zu should be %zu)", -+ vstruct_bytes(&crypt->field), sizeof(*crypt)); -+ return -BCH_ERR_invalid_sb_crypt; -+ } -+ -+ if (BCH_CRYPT_KDF_TYPE(crypt)) { -+ prt_printf(err, "bad kdf type %llu", BCH_CRYPT_KDF_TYPE(crypt)); -+ return -BCH_ERR_invalid_sb_crypt; -+ } -+ -+ return 0; -+} -+ -+static void bch2_sb_crypt_to_text(struct printbuf *out, struct bch_sb *sb, -+ struct bch_sb_field *f) -+{ -+ struct bch_sb_field_crypt *crypt = field_to_type(f, crypt); -+ -+ prt_printf(out, "KFD: %llu", BCH_CRYPT_KDF_TYPE(crypt)); -+ prt_newline(out); -+ prt_printf(out, "scrypt n: %llu", BCH_KDF_SCRYPT_N(crypt)); -+ prt_newline(out); -+ prt_printf(out, "scrypt r: %llu", BCH_KDF_SCRYPT_R(crypt)); -+ prt_newline(out); -+ prt_printf(out, "scrypt p: %llu", BCH_KDF_SCRYPT_P(crypt)); -+ prt_newline(out); -+} -+ -+const struct bch_sb_field_ops bch_sb_field_ops_crypt = { -+ .validate = bch2_sb_crypt_validate, -+ .to_text = bch2_sb_crypt_to_text, -+}; -+ -+#ifdef __KERNEL__ -+static int __bch2_request_key(char *key_description, struct bch_key *key) -+{ -+ struct key *keyring_key; -+ const struct user_key_payload *ukp; -+ int ret; -+ -+ keyring_key = request_key(&key_type_user, key_description, NULL); -+ if (IS_ERR(keyring_key)) -+ return PTR_ERR(keyring_key); -+ -+ down_read(&keyring_key->sem); -+ ukp = dereference_key_locked(keyring_key); -+ if (ukp->datalen == sizeof(*key)) { -+ memcpy(key, ukp->data, ukp->datalen); -+ ret = 0; -+ } else { -+ ret = -EINVAL; -+ } -+ up_read(&keyring_key->sem); -+ key_put(keyring_key); -+ -+ return ret; -+} -+#else -+#include -+ -+static int __bch2_request_key(char *key_description, struct bch_key *key) -+{ -+ key_serial_t key_id; -+ -+ key_id = request_key("user", key_description, NULL, -+ KEY_SPEC_SESSION_KEYRING); -+ if (key_id >= 0) -+ goto got_key; -+ -+ key_id = request_key("user", key_description, NULL, -+ KEY_SPEC_USER_KEYRING); -+ if (key_id >= 0) -+ goto got_key; -+ -+ key_id = request_key("user", key_description, NULL, -+ KEY_SPEC_USER_SESSION_KEYRING); -+ if (key_id >= 0) -+ goto got_key; -+ -+ return -errno; -+got_key: -+ -+ if (keyctl_read(key_id, (void *) key, sizeof(*key)) != sizeof(*key)) -+ return -1; -+ -+ return 0; -+} -+ -+#include "../crypto.h" -+#endif -+ -+int bch2_request_key(struct bch_sb *sb, struct bch_key *key) -+{ -+ struct printbuf key_description = PRINTBUF; -+ int ret; -+ -+ prt_printf(&key_description, "bcachefs:"); -+ pr_uuid(&key_description, sb->user_uuid.b); -+ -+ ret = __bch2_request_key(key_description.buf, key); -+ printbuf_exit(&key_description); -+ -+#ifndef __KERNEL__ -+ if (ret) { -+ char *passphrase = read_passphrase("Enter passphrase: "); -+ struct bch_encrypted_key sb_key; -+ -+ bch2_passphrase_check(sb, passphrase, -+ key, &sb_key); -+ ret = 0; -+ } -+#endif -+ -+ /* stash with memfd, pass memfd fd to mount */ -+ -+ return ret; -+} -+ -+#ifndef __KERNEL__ -+int bch2_revoke_key(struct bch_sb *sb) -+{ -+ key_serial_t key_id; -+ struct printbuf key_description = PRINTBUF; -+ -+ prt_printf(&key_description, "bcachefs:"); -+ pr_uuid(&key_description, sb->user_uuid.b); -+ -+ key_id = request_key("user", key_description.buf, NULL, KEY_SPEC_USER_KEYRING); -+ printbuf_exit(&key_description); -+ if (key_id < 0) -+ return errno; -+ -+ keyctl_revoke(key_id); -+ -+ return 0; -+} -+#endif -+ -+int bch2_decrypt_sb_key(struct bch_fs *c, -+ struct bch_sb_field_crypt *crypt, -+ struct bch_key *key) -+{ -+ struct bch_encrypted_key sb_key = crypt->key; -+ struct bch_key user_key; -+ int ret = 0; -+ -+ /* is key encrypted? */ -+ if (!bch2_key_is_encrypted(&sb_key)) -+ goto out; -+ -+ ret = bch2_request_key(c->disk_sb.sb, &user_key); -+ if (ret) { -+ bch_err(c, "error requesting encryption key: %s", bch2_err_str(ret)); -+ goto err; -+ } -+ -+ /* decrypt real key: */ -+ ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c), -+ &sb_key, sizeof(sb_key)); -+ if (ret) -+ goto err; -+ -+ if (bch2_key_is_encrypted(&sb_key)) { -+ bch_err(c, "incorrect encryption key"); -+ ret = -EINVAL; -+ goto err; -+ } -+out: -+ *key = sb_key.key; -+err: -+ memzero_explicit(&sb_key, sizeof(sb_key)); -+ memzero_explicit(&user_key, sizeof(user_key)); -+ return ret; -+} -+ -+static int bch2_alloc_ciphers(struct bch_fs *c) -+{ -+ int ret; -+ -+ if (!c->chacha20) -+ c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0); -+ ret = PTR_ERR_OR_ZERO(c->chacha20); -+ -+ if (ret) { -+ bch_err(c, "error requesting chacha20 module: %s", bch2_err_str(ret)); -+ return ret; -+ } -+ -+ if (!c->poly1305) -+ c->poly1305 = crypto_alloc_shash("poly1305", 0, 0); -+ ret = PTR_ERR_OR_ZERO(c->poly1305); -+ -+ if (ret) { -+ bch_err(c, "error requesting poly1305 module: %s", bch2_err_str(ret)); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+int bch2_disable_encryption(struct bch_fs *c) -+{ -+ struct bch_sb_field_crypt *crypt; -+ struct bch_key key; -+ int ret = -EINVAL; -+ -+ mutex_lock(&c->sb_lock); -+ -+ crypt = bch2_sb_field_get(c->disk_sb.sb, crypt); -+ if (!crypt) -+ goto out; -+ -+ /* is key encrypted? */ -+ ret = 0; -+ if (bch2_key_is_encrypted(&crypt->key)) -+ goto out; -+ -+ ret = bch2_decrypt_sb_key(c, crypt, &key); -+ if (ret) -+ goto out; -+ -+ crypt->key.magic = cpu_to_le64(BCH_KEY_MAGIC); -+ crypt->key.key = key; -+ -+ SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 0); -+ bch2_write_super(c); -+out: -+ mutex_unlock(&c->sb_lock); -+ -+ return ret; -+} -+ -+int bch2_enable_encryption(struct bch_fs *c, bool keyed) -+{ -+ struct bch_encrypted_key key; -+ struct bch_key user_key; -+ struct bch_sb_field_crypt *crypt; -+ int ret = -EINVAL; -+ -+ mutex_lock(&c->sb_lock); -+ -+ /* Do we already have an encryption key? */ -+ if (bch2_sb_field_get(c->disk_sb.sb, crypt)) -+ goto err; -+ -+ ret = bch2_alloc_ciphers(c); -+ if (ret) -+ goto err; -+ -+ key.magic = cpu_to_le64(BCH_KEY_MAGIC); -+ get_random_bytes(&key.key, sizeof(key.key)); -+ -+ if (keyed) { -+ ret = bch2_request_key(c->disk_sb.sb, &user_key); -+ if (ret) { -+ bch_err(c, "error requesting encryption key: %s", bch2_err_str(ret)); -+ goto err; -+ } -+ -+ ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c), -+ &key, sizeof(key)); -+ if (ret) -+ goto err; -+ } -+ -+ ret = crypto_skcipher_setkey(&c->chacha20->base, -+ (void *) &key.key, sizeof(key.key)); -+ if (ret) -+ goto err; -+ -+ crypt = bch2_sb_field_resize(&c->disk_sb, crypt, -+ sizeof(*crypt) / sizeof(u64)); -+ if (!crypt) { -+ ret = -BCH_ERR_ENOSPC_sb_crypt; -+ goto err; -+ } -+ -+ crypt->key = key; -+ -+ /* write superblock */ -+ SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 1); -+ bch2_write_super(c); -+err: -+ mutex_unlock(&c->sb_lock); -+ memzero_explicit(&user_key, sizeof(user_key)); -+ memzero_explicit(&key, sizeof(key)); -+ return ret; -+} -+ -+void bch2_fs_encryption_exit(struct bch_fs *c) -+{ -+ if (!IS_ERR_OR_NULL(c->poly1305)) -+ crypto_free_shash(c->poly1305); -+ if (!IS_ERR_OR_NULL(c->chacha20)) -+ crypto_free_sync_skcipher(c->chacha20); -+ if (!IS_ERR_OR_NULL(c->sha256)) -+ crypto_free_shash(c->sha256); -+} -+ -+int bch2_fs_encryption_init(struct bch_fs *c) -+{ -+ struct bch_sb_field_crypt *crypt; -+ struct bch_key key; -+ int ret = 0; -+ -+ c->sha256 = crypto_alloc_shash("sha256", 0, 0); -+ ret = PTR_ERR_OR_ZERO(c->sha256); -+ if (ret) { -+ bch_err(c, "error requesting sha256 module: %s", bch2_err_str(ret)); -+ goto out; -+ } -+ -+ crypt = bch2_sb_field_get(c->disk_sb.sb, crypt); -+ if (!crypt) -+ goto out; -+ -+ ret = bch2_alloc_ciphers(c); -+ if (ret) -+ goto out; -+ -+ ret = bch2_decrypt_sb_key(c, crypt, &key); -+ if (ret) -+ goto out; -+ -+ ret = crypto_skcipher_setkey(&c->chacha20->base, -+ (void *) &key.key, sizeof(key.key)); -+ if (ret) -+ goto out; -+out: -+ memzero_explicit(&key, sizeof(key)); -+ return ret; -+} -diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h -new file mode 100644 -index 000000000000..13998388c545 ---- /dev/null -+++ b/fs/bcachefs/checksum.h -@@ -0,0 +1,213 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_CHECKSUM_H -+#define _BCACHEFS_CHECKSUM_H -+ -+#include "bcachefs.h" -+#include "extents_types.h" -+#include "super-io.h" -+ -+#include -+#include -+ -+static inline bool bch2_checksum_mergeable(unsigned type) -+{ -+ -+ switch (type) { -+ case BCH_CSUM_none: -+ case BCH_CSUM_crc32c: -+ case BCH_CSUM_crc64: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+struct bch_csum bch2_checksum_merge(unsigned, struct bch_csum, -+ struct bch_csum, size_t); -+ -+#define BCH_NONCE_EXTENT cpu_to_le32(1 << 28) -+#define BCH_NONCE_BTREE cpu_to_le32(2 << 28) -+#define BCH_NONCE_JOURNAL cpu_to_le32(3 << 28) -+#define BCH_NONCE_PRIO cpu_to_le32(4 << 28) -+#define BCH_NONCE_POLY cpu_to_le32(1 << 31) -+ -+struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce, -+ const void *, size_t); -+ -+/* -+ * This is used for various on disk data structures - bch_sb, prio_set, bset, -+ * jset: The checksum is _always_ the first field of these structs -+ */ -+#define csum_vstruct(_c, _type, _nonce, _i) \ -+({ \ -+ const void *_start = ((const void *) (_i)) + sizeof((_i)->csum);\ -+ \ -+ bch2_checksum(_c, _type, _nonce, _start, vstruct_end(_i) - _start);\ -+}) -+ -+int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t); -+int bch2_request_key(struct bch_sb *, struct bch_key *); -+#ifndef __KERNEL__ -+int bch2_revoke_key(struct bch_sb *); -+#endif -+ -+int bch2_encrypt(struct bch_fs *, unsigned, struct nonce, -+ void *data, size_t); -+ -+struct bch_csum bch2_checksum_bio(struct bch_fs *, unsigned, -+ struct nonce, struct bio *); -+ -+int bch2_rechecksum_bio(struct bch_fs *, struct bio *, struct bversion, -+ struct bch_extent_crc_unpacked, -+ struct bch_extent_crc_unpacked *, -+ struct bch_extent_crc_unpacked *, -+ unsigned, unsigned, unsigned); -+ -+int __bch2_encrypt_bio(struct bch_fs *, unsigned, -+ struct nonce, struct bio *); -+ -+static inline int bch2_encrypt_bio(struct bch_fs *c, unsigned type, -+ struct nonce nonce, struct bio *bio) -+{ -+ return bch2_csum_type_is_encryption(type) -+ ? __bch2_encrypt_bio(c, type, nonce, bio) -+ : 0; -+} -+ -+extern const struct bch_sb_field_ops bch_sb_field_ops_crypt; -+ -+int bch2_decrypt_sb_key(struct bch_fs *, struct bch_sb_field_crypt *, -+ struct bch_key *); -+ -+int bch2_disable_encryption(struct bch_fs *); -+int bch2_enable_encryption(struct bch_fs *, bool); -+ -+void bch2_fs_encryption_exit(struct bch_fs *); -+int bch2_fs_encryption_init(struct bch_fs *); -+ -+static inline enum bch_csum_type bch2_csum_opt_to_type(enum bch_csum_opts type, -+ bool data) -+{ -+ switch (type) { -+ case BCH_CSUM_OPT_none: -+ return BCH_CSUM_none; -+ case BCH_CSUM_OPT_crc32c: -+ return data ? BCH_CSUM_crc32c : BCH_CSUM_crc32c_nonzero; -+ case BCH_CSUM_OPT_crc64: -+ return data ? BCH_CSUM_crc64 : BCH_CSUM_crc64_nonzero; -+ case BCH_CSUM_OPT_xxhash: -+ return BCH_CSUM_xxhash; -+ default: -+ BUG(); -+ } -+} -+ -+static inline enum bch_csum_type bch2_data_checksum_type(struct bch_fs *c, -+ struct bch_io_opts opts) -+{ -+ if (opts.nocow) -+ return 0; -+ -+ if (c->sb.encryption_type) -+ return c->opts.wide_macs -+ ? BCH_CSUM_chacha20_poly1305_128 -+ : BCH_CSUM_chacha20_poly1305_80; -+ -+ return bch2_csum_opt_to_type(opts.data_checksum, true); -+} -+ -+static inline enum bch_csum_type bch2_meta_checksum_type(struct bch_fs *c) -+{ -+ if (c->sb.encryption_type) -+ return BCH_CSUM_chacha20_poly1305_128; -+ -+ return bch2_csum_opt_to_type(c->opts.metadata_checksum, false); -+} -+ -+static inline bool bch2_checksum_type_valid(const struct bch_fs *c, -+ unsigned type) -+{ -+ if (type >= BCH_CSUM_NR) -+ return false; -+ -+ if (bch2_csum_type_is_encryption(type) && !c->chacha20) -+ return false; -+ -+ return true; -+} -+ -+/* returns true if not equal */ -+static inline bool bch2_crc_cmp(struct bch_csum l, struct bch_csum r) -+{ -+ /* -+ * XXX: need some way of preventing the compiler from optimizing this -+ * into a form that isn't constant time.. -+ */ -+ return ((l.lo ^ r.lo) | (l.hi ^ r.hi)) != 0; -+} -+ -+/* for skipping ahead and encrypting/decrypting at an offset: */ -+static inline struct nonce nonce_add(struct nonce nonce, unsigned offset) -+{ -+ EBUG_ON(offset & (CHACHA_BLOCK_SIZE - 1)); -+ -+ le32_add_cpu(&nonce.d[0], offset / CHACHA_BLOCK_SIZE); -+ return nonce; -+} -+ -+static inline struct nonce null_nonce(void) -+{ -+ struct nonce ret; -+ -+ memset(&ret, 0, sizeof(ret)); -+ return ret; -+} -+ -+static inline struct nonce extent_nonce(struct bversion version, -+ struct bch_extent_crc_unpacked crc) -+{ -+ unsigned compression_type = crc_is_compressed(crc) -+ ? crc.compression_type -+ : 0; -+ unsigned size = compression_type ? crc.uncompressed_size : 0; -+ struct nonce nonce = (struct nonce) {{ -+ [0] = cpu_to_le32(size << 22), -+ [1] = cpu_to_le32(version.lo), -+ [2] = cpu_to_le32(version.lo >> 32), -+ [3] = cpu_to_le32(version.hi| -+ (compression_type << 24))^BCH_NONCE_EXTENT, -+ }}; -+ -+ return nonce_add(nonce, crc.nonce << 9); -+} -+ -+static inline bool bch2_key_is_encrypted(struct bch_encrypted_key *key) -+{ -+ return le64_to_cpu(key->magic) != BCH_KEY_MAGIC; -+} -+ -+static inline struct nonce __bch2_sb_key_nonce(struct bch_sb *sb) -+{ -+ __le64 magic = __bch2_sb_magic(sb); -+ -+ return (struct nonce) {{ -+ [0] = 0, -+ [1] = 0, -+ [2] = ((__le32 *) &magic)[0], -+ [3] = ((__le32 *) &magic)[1], -+ }}; -+} -+ -+static inline struct nonce bch2_sb_key_nonce(struct bch_fs *c) -+{ -+ __le64 magic = bch2_sb_magic(c); -+ -+ return (struct nonce) {{ -+ [0] = 0, -+ [1] = 0, -+ [2] = ((__le32 *) &magic)[0], -+ [3] = ((__le32 *) &magic)[1], -+ }}; -+} -+ -+#endif /* _BCACHEFS_CHECKSUM_H */ -diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c -new file mode 100644 -index 000000000000..f41889093a2c ---- /dev/null -+++ b/fs/bcachefs/clock.c -@@ -0,0 +1,193 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include "bcachefs.h" -+#include "clock.h" -+ -+#include -+#include -+#include -+ -+static inline long io_timer_cmp(io_timer_heap *h, -+ struct io_timer *l, -+ struct io_timer *r) -+{ -+ return l->expire - r->expire; -+} -+ -+void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer) -+{ -+ size_t i; -+ -+ spin_lock(&clock->timer_lock); -+ -+ if (time_after_eq((unsigned long) atomic64_read(&clock->now), -+ timer->expire)) { -+ spin_unlock(&clock->timer_lock); -+ timer->fn(timer); -+ return; -+ } -+ -+ for (i = 0; i < clock->timers.used; i++) -+ if (clock->timers.data[i] == timer) -+ goto out; -+ -+ BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL)); -+out: -+ spin_unlock(&clock->timer_lock); -+} -+ -+void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer) -+{ -+ size_t i; -+ -+ spin_lock(&clock->timer_lock); -+ -+ for (i = 0; i < clock->timers.used; i++) -+ if (clock->timers.data[i] == timer) { -+ heap_del(&clock->timers, i, io_timer_cmp, NULL); -+ break; -+ } -+ -+ spin_unlock(&clock->timer_lock); -+} -+ -+struct io_clock_wait { -+ struct io_timer io_timer; -+ struct timer_list cpu_timer; -+ struct task_struct *task; -+ int expired; -+}; -+ -+static void io_clock_wait_fn(struct io_timer *timer) -+{ -+ struct io_clock_wait *wait = container_of(timer, -+ struct io_clock_wait, io_timer); -+ -+ wait->expired = 1; -+ wake_up_process(wait->task); -+} -+ -+static void io_clock_cpu_timeout(struct timer_list *timer) -+{ -+ struct io_clock_wait *wait = container_of(timer, -+ struct io_clock_wait, cpu_timer); -+ -+ wait->expired = 1; -+ wake_up_process(wait->task); -+} -+ -+void bch2_io_clock_schedule_timeout(struct io_clock *clock, unsigned long until) -+{ -+ struct io_clock_wait wait; -+ -+ /* XXX: calculate sleep time rigorously */ -+ wait.io_timer.expire = until; -+ wait.io_timer.fn = io_clock_wait_fn; -+ wait.task = current; -+ wait.expired = 0; -+ bch2_io_timer_add(clock, &wait.io_timer); -+ -+ schedule(); -+ -+ bch2_io_timer_del(clock, &wait.io_timer); -+} -+ -+void bch2_kthread_io_clock_wait(struct io_clock *clock, -+ unsigned long io_until, -+ unsigned long cpu_timeout) -+{ -+ bool kthread = (current->flags & PF_KTHREAD) != 0; -+ struct io_clock_wait wait; -+ -+ wait.io_timer.expire = io_until; -+ wait.io_timer.fn = io_clock_wait_fn; -+ wait.task = current; -+ wait.expired = 0; -+ bch2_io_timer_add(clock, &wait.io_timer); -+ -+ timer_setup_on_stack(&wait.cpu_timer, io_clock_cpu_timeout, 0); -+ -+ if (cpu_timeout != MAX_SCHEDULE_TIMEOUT) -+ mod_timer(&wait.cpu_timer, cpu_timeout + jiffies); -+ -+ while (1) { -+ set_current_state(TASK_INTERRUPTIBLE); -+ if (kthread && kthread_should_stop()) -+ break; -+ -+ if (wait.expired) -+ break; -+ -+ schedule(); -+ try_to_freeze(); -+ } -+ -+ __set_current_state(TASK_RUNNING); -+ del_timer_sync(&wait.cpu_timer); -+ destroy_timer_on_stack(&wait.cpu_timer); -+ bch2_io_timer_del(clock, &wait.io_timer); -+} -+ -+static struct io_timer *get_expired_timer(struct io_clock *clock, -+ unsigned long now) -+{ -+ struct io_timer *ret = NULL; -+ -+ spin_lock(&clock->timer_lock); -+ -+ if (clock->timers.used && -+ time_after_eq(now, clock->timers.data[0]->expire)) -+ heap_pop(&clock->timers, ret, io_timer_cmp, NULL); -+ -+ spin_unlock(&clock->timer_lock); -+ -+ return ret; -+} -+ -+void __bch2_increment_clock(struct io_clock *clock, unsigned sectors) -+{ -+ struct io_timer *timer; -+ unsigned long now = atomic64_add_return(sectors, &clock->now); -+ -+ while ((timer = get_expired_timer(clock, now))) -+ timer->fn(timer); -+} -+ -+void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock) -+{ -+ unsigned long now; -+ unsigned i; -+ -+ out->atomic++; -+ spin_lock(&clock->timer_lock); -+ now = atomic64_read(&clock->now); -+ -+ for (i = 0; i < clock->timers.used; i++) -+ prt_printf(out, "%ps:\t%li\n", -+ clock->timers.data[i]->fn, -+ clock->timers.data[i]->expire - now); -+ spin_unlock(&clock->timer_lock); -+ --out->atomic; -+} -+ -+void bch2_io_clock_exit(struct io_clock *clock) -+{ -+ free_heap(&clock->timers); -+ free_percpu(clock->pcpu_buf); -+} -+ -+int bch2_io_clock_init(struct io_clock *clock) -+{ -+ atomic64_set(&clock->now, 0); -+ spin_lock_init(&clock->timer_lock); -+ -+ clock->max_slop = IO_CLOCK_PCPU_SECTORS * num_possible_cpus(); -+ -+ clock->pcpu_buf = alloc_percpu(*clock->pcpu_buf); -+ if (!clock->pcpu_buf) -+ return -BCH_ERR_ENOMEM_io_clock_init; -+ -+ if (!init_heap(&clock->timers, NR_IO_TIMERS, GFP_KERNEL)) -+ return -BCH_ERR_ENOMEM_io_clock_init; -+ -+ return 0; -+} -diff --git a/fs/bcachefs/clock.h b/fs/bcachefs/clock.h -new file mode 100644 -index 000000000000..70a0f7436c84 ---- /dev/null -+++ b/fs/bcachefs/clock.h -@@ -0,0 +1,38 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_CLOCK_H -+#define _BCACHEFS_CLOCK_H -+ -+void bch2_io_timer_add(struct io_clock *, struct io_timer *); -+void bch2_io_timer_del(struct io_clock *, struct io_timer *); -+void bch2_kthread_io_clock_wait(struct io_clock *, unsigned long, -+ unsigned long); -+ -+void __bch2_increment_clock(struct io_clock *, unsigned); -+ -+static inline void bch2_increment_clock(struct bch_fs *c, unsigned sectors, -+ int rw) -+{ -+ struct io_clock *clock = &c->io_clock[rw]; -+ -+ if (unlikely(this_cpu_add_return(*clock->pcpu_buf, sectors) >= -+ IO_CLOCK_PCPU_SECTORS)) -+ __bch2_increment_clock(clock, this_cpu_xchg(*clock->pcpu_buf, 0)); -+} -+ -+void bch2_io_clock_schedule_timeout(struct io_clock *, unsigned long); -+ -+#define bch2_kthread_wait_event_ioclock_timeout(condition, clock, timeout)\ -+({ \ -+ long __ret = timeout; \ -+ might_sleep(); \ -+ if (!___wait_cond_timeout(condition)) \ -+ __ret = __wait_event_timeout(wq, condition, timeout); \ -+ __ret; \ -+}) -+ -+void bch2_io_timers_to_text(struct printbuf *, struct io_clock *); -+ -+void bch2_io_clock_exit(struct io_clock *); -+int bch2_io_clock_init(struct io_clock *); -+ -+#endif /* _BCACHEFS_CLOCK_H */ -diff --git a/fs/bcachefs/clock_types.h b/fs/bcachefs/clock_types.h -new file mode 100644 -index 000000000000..5fae0012d808 ---- /dev/null -+++ b/fs/bcachefs/clock_types.h -@@ -0,0 +1,37 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_CLOCK_TYPES_H -+#define _BCACHEFS_CLOCK_TYPES_H -+ -+#include "util.h" -+ -+#define NR_IO_TIMERS (BCH_SB_MEMBERS_MAX * 3) -+ -+/* -+ * Clocks/timers in units of sectors of IO: -+ * -+ * Note - they use percpu batching, so they're only approximate. -+ */ -+ -+struct io_timer; -+typedef void (*io_timer_fn)(struct io_timer *); -+ -+struct io_timer { -+ io_timer_fn fn; -+ unsigned long expire; -+}; -+ -+/* Amount to buffer up on a percpu counter */ -+#define IO_CLOCK_PCPU_SECTORS 128 -+ -+typedef HEAP(struct io_timer *) io_timer_heap; -+ -+struct io_clock { -+ atomic64_t now; -+ u16 __percpu *pcpu_buf; -+ unsigned max_slop; -+ -+ spinlock_t timer_lock; -+ io_timer_heap timers; -+}; -+ -+#endif /* _BCACHEFS_CLOCK_TYPES_H */ -diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c -new file mode 100644 -index 000000000000..a8b148ec2a2b ---- /dev/null -+++ b/fs/bcachefs/compress.c -@@ -0,0 +1,728 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include "bcachefs.h" -+#include "checksum.h" -+#include "compress.h" -+#include "extents.h" -+#include "super-io.h" -+ -+#include -+#include -+#include -+ -+/* Bounce buffer: */ -+struct bbuf { -+ void *b; -+ enum { -+ BB_NONE, -+ BB_VMAP, -+ BB_KMALLOC, -+ BB_MEMPOOL, -+ } type; -+ int rw; -+}; -+ -+static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw) -+{ -+ void *b; -+ -+ BUG_ON(size > c->opts.encoded_extent_max); -+ -+ b = kmalloc(size, GFP_NOFS|__GFP_NOWARN); -+ if (b) -+ return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw }; -+ -+ b = mempool_alloc(&c->compression_bounce[rw], GFP_NOFS); -+ if (b) -+ return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw }; -+ -+ BUG(); -+} -+ -+static bool bio_phys_contig(struct bio *bio, struct bvec_iter start) -+{ -+ struct bio_vec bv; -+ struct bvec_iter iter; -+ void *expected_start = NULL; -+ -+ __bio_for_each_bvec(bv, bio, iter, start) { -+ if (expected_start && -+ expected_start != page_address(bv.bv_page) + bv.bv_offset) -+ return false; -+ -+ expected_start = page_address(bv.bv_page) + -+ bv.bv_offset + bv.bv_len; -+ } -+ -+ return true; -+} -+ -+static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio, -+ struct bvec_iter start, int rw) -+{ -+ struct bbuf ret; -+ struct bio_vec bv; -+ struct bvec_iter iter; -+ unsigned nr_pages = 0; -+ struct page *stack_pages[16]; -+ struct page **pages = NULL; -+ void *data; -+ -+ BUG_ON(start.bi_size > c->opts.encoded_extent_max); -+ -+ if (!PageHighMem(bio_iter_page(bio, start)) && -+ bio_phys_contig(bio, start)) -+ return (struct bbuf) { -+ .b = page_address(bio_iter_page(bio, start)) + -+ bio_iter_offset(bio, start), -+ .type = BB_NONE, .rw = rw -+ }; -+ -+ /* check if we can map the pages contiguously: */ -+ __bio_for_each_segment(bv, bio, iter, start) { -+ if (iter.bi_size != start.bi_size && -+ bv.bv_offset) -+ goto bounce; -+ -+ if (bv.bv_len < iter.bi_size && -+ bv.bv_offset + bv.bv_len < PAGE_SIZE) -+ goto bounce; -+ -+ nr_pages++; -+ } -+ -+ BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages); -+ -+ pages = nr_pages > ARRAY_SIZE(stack_pages) -+ ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS) -+ : stack_pages; -+ if (!pages) -+ goto bounce; -+ -+ nr_pages = 0; -+ __bio_for_each_segment(bv, bio, iter, start) -+ pages[nr_pages++] = bv.bv_page; -+ -+ data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); -+ if (pages != stack_pages) -+ kfree(pages); -+ -+ if (data) -+ return (struct bbuf) { -+ .b = data + bio_iter_offset(bio, start), -+ .type = BB_VMAP, .rw = rw -+ }; -+bounce: -+ ret = __bounce_alloc(c, start.bi_size, rw); -+ -+ if (rw == READ) -+ memcpy_from_bio(ret.b, bio, start); -+ -+ return ret; -+} -+ -+static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw) -+{ -+ return __bio_map_or_bounce(c, bio, bio->bi_iter, rw); -+} -+ -+static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf) -+{ -+ switch (buf.type) { -+ case BB_NONE: -+ break; -+ case BB_VMAP: -+ vunmap((void *) ((unsigned long) buf.b & PAGE_MASK)); -+ break; -+ case BB_KMALLOC: -+ kfree(buf.b); -+ break; -+ case BB_MEMPOOL: -+ mempool_free(buf.b, &c->compression_bounce[buf.rw]); -+ break; -+ } -+} -+ -+static inline void zlib_set_workspace(z_stream *strm, void *workspace) -+{ -+#ifdef __KERNEL__ -+ strm->workspace = workspace; -+#endif -+} -+ -+static int __bio_uncompress(struct bch_fs *c, struct bio *src, -+ void *dst_data, struct bch_extent_crc_unpacked crc) -+{ -+ struct bbuf src_data = { NULL }; -+ size_t src_len = src->bi_iter.bi_size; -+ size_t dst_len = crc.uncompressed_size << 9; -+ void *workspace; -+ int ret; -+ -+ src_data = bio_map_or_bounce(c, src, READ); -+ -+ switch (crc.compression_type) { -+ case BCH_COMPRESSION_TYPE_lz4_old: -+ case BCH_COMPRESSION_TYPE_lz4: -+ ret = LZ4_decompress_safe_partial(src_data.b, dst_data, -+ src_len, dst_len, dst_len); -+ if (ret != dst_len) -+ goto err; -+ break; -+ case BCH_COMPRESSION_TYPE_gzip: { -+ z_stream strm = { -+ .next_in = src_data.b, -+ .avail_in = src_len, -+ .next_out = dst_data, -+ .avail_out = dst_len, -+ }; -+ -+ workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS); -+ -+ zlib_set_workspace(&strm, workspace); -+ zlib_inflateInit2(&strm, -MAX_WBITS); -+ ret = zlib_inflate(&strm, Z_FINISH); -+ -+ mempool_free(workspace, &c->decompress_workspace); -+ -+ if (ret != Z_STREAM_END) -+ goto err; -+ break; -+ } -+ case BCH_COMPRESSION_TYPE_zstd: { -+ ZSTD_DCtx *ctx; -+ size_t real_src_len = le32_to_cpup(src_data.b); -+ -+ if (real_src_len > src_len - 4) -+ goto err; -+ -+ workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS); -+ ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound()); -+ -+ ret = zstd_decompress_dctx(ctx, -+ dst_data, dst_len, -+ src_data.b + 4, real_src_len); -+ -+ mempool_free(workspace, &c->decompress_workspace); -+ -+ if (ret != dst_len) -+ goto err; -+ break; -+ } -+ default: -+ BUG(); -+ } -+ ret = 0; -+out: -+ bio_unmap_or_unbounce(c, src_data); -+ return ret; -+err: -+ ret = -EIO; -+ goto out; -+} -+ -+int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio, -+ struct bch_extent_crc_unpacked *crc) -+{ -+ struct bbuf data = { NULL }; -+ size_t dst_len = crc->uncompressed_size << 9; -+ -+ /* bio must own its pages: */ -+ BUG_ON(!bio->bi_vcnt); -+ BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs); -+ -+ if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max || -+ crc->compressed_size << 9 > c->opts.encoded_extent_max) { -+ bch_err(c, "error rewriting existing data: extent too big"); -+ return -EIO; -+ } -+ -+ data = __bounce_alloc(c, dst_len, WRITE); -+ -+ if (__bio_uncompress(c, bio, data.b, *crc)) { -+ if (!c->opts.no_data_io) -+ bch_err(c, "error rewriting existing data: decompression error"); -+ bio_unmap_or_unbounce(c, data); -+ return -EIO; -+ } -+ -+ /* -+ * XXX: don't have a good way to assert that the bio was allocated with -+ * enough space, we depend on bch2_move_extent doing the right thing -+ */ -+ bio->bi_iter.bi_size = crc->live_size << 9; -+ -+ memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9)); -+ -+ crc->csum_type = 0; -+ crc->compression_type = 0; -+ crc->compressed_size = crc->live_size; -+ crc->uncompressed_size = crc->live_size; -+ crc->offset = 0; -+ crc->csum = (struct bch_csum) { 0, 0 }; -+ -+ bio_unmap_or_unbounce(c, data); -+ return 0; -+} -+ -+int bch2_bio_uncompress(struct bch_fs *c, struct bio *src, -+ struct bio *dst, struct bvec_iter dst_iter, -+ struct bch_extent_crc_unpacked crc) -+{ -+ struct bbuf dst_data = { NULL }; -+ size_t dst_len = crc.uncompressed_size << 9; -+ int ret; -+ -+ if (crc.uncompressed_size << 9 > c->opts.encoded_extent_max || -+ crc.compressed_size << 9 > c->opts.encoded_extent_max) -+ return -EIO; -+ -+ dst_data = dst_len == dst_iter.bi_size -+ ? __bio_map_or_bounce(c, dst, dst_iter, WRITE) -+ : __bounce_alloc(c, dst_len, WRITE); -+ -+ ret = __bio_uncompress(c, src, dst_data.b, crc); -+ if (ret) -+ goto err; -+ -+ if (dst_data.type != BB_NONE && -+ dst_data.type != BB_VMAP) -+ memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9)); -+err: -+ bio_unmap_or_unbounce(c, dst_data); -+ return ret; -+} -+ -+static int attempt_compress(struct bch_fs *c, -+ void *workspace, -+ void *dst, size_t dst_len, -+ void *src, size_t src_len, -+ struct bch_compression_opt compression) -+{ -+ enum bch_compression_type compression_type = -+ __bch2_compression_opt_to_type[compression.type]; -+ -+ switch (compression_type) { -+ case BCH_COMPRESSION_TYPE_lz4: -+ if (compression.level < LZ4HC_MIN_CLEVEL) { -+ int len = src_len; -+ int ret = LZ4_compress_destSize( -+ src, dst, -+ &len, dst_len, -+ workspace); -+ if (len < src_len) -+ return -len; -+ -+ return ret; -+ } else { -+ int ret = LZ4_compress_HC( -+ src, dst, -+ src_len, dst_len, -+ compression.level, -+ workspace); -+ -+ return ret ?: -1; -+ } -+ case BCH_COMPRESSION_TYPE_gzip: { -+ z_stream strm = { -+ .next_in = src, -+ .avail_in = src_len, -+ .next_out = dst, -+ .avail_out = dst_len, -+ }; -+ -+ zlib_set_workspace(&strm, workspace); -+ zlib_deflateInit2(&strm, -+ compression.level -+ ? clamp_t(unsigned, compression.level, -+ Z_BEST_SPEED, Z_BEST_COMPRESSION) -+ : Z_DEFAULT_COMPRESSION, -+ Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, -+ Z_DEFAULT_STRATEGY); -+ -+ if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END) -+ return 0; -+ -+ if (zlib_deflateEnd(&strm) != Z_OK) -+ return 0; -+ -+ return strm.total_out; -+ } -+ case BCH_COMPRESSION_TYPE_zstd: { -+ /* -+ * rescale: -+ * zstd max compression level is 22, our max level is 15 -+ */ -+ unsigned level = min((compression.level * 3) / 2, zstd_max_clevel()); -+ ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max); -+ ZSTD_CCtx *ctx = zstd_init_cctx(workspace, -+ zstd_cctx_workspace_bound(¶ms.cParams)); -+ -+ /* -+ * ZSTD requires that when we decompress we pass in the exact -+ * compressed size - rounding it up to the nearest sector -+ * doesn't work, so we use the first 4 bytes of the buffer for -+ * that. -+ * -+ * Additionally, the ZSTD code seems to have a bug where it will -+ * write just past the end of the buffer - so subtract a fudge -+ * factor (7 bytes) from the dst buffer size to account for -+ * that. -+ */ -+ size_t len = zstd_compress_cctx(ctx, -+ dst + 4, dst_len - 4 - 7, -+ src, src_len, -+ &c->zstd_params); -+ if (zstd_is_error(len)) -+ return 0; -+ -+ *((__le32 *) dst) = cpu_to_le32(len); -+ return len + 4; -+ } -+ default: -+ BUG(); -+ } -+} -+ -+static unsigned __bio_compress(struct bch_fs *c, -+ struct bio *dst, size_t *dst_len, -+ struct bio *src, size_t *src_len, -+ struct bch_compression_opt compression) -+{ -+ struct bbuf src_data = { NULL }, dst_data = { NULL }; -+ void *workspace; -+ enum bch_compression_type compression_type = -+ __bch2_compression_opt_to_type[compression.type]; -+ unsigned pad; -+ int ret = 0; -+ -+ BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR); -+ BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type])); -+ -+ /* If it's only one block, don't bother trying to compress: */ -+ if (src->bi_iter.bi_size <= c->opts.block_size) -+ return BCH_COMPRESSION_TYPE_incompressible; -+ -+ dst_data = bio_map_or_bounce(c, dst, WRITE); -+ src_data = bio_map_or_bounce(c, src, READ); -+ -+ workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOFS); -+ -+ *src_len = src->bi_iter.bi_size; -+ *dst_len = dst->bi_iter.bi_size; -+ -+ /* -+ * XXX: this algorithm sucks when the compression code doesn't tell us -+ * how much would fit, like LZ4 does: -+ */ -+ while (1) { -+ if (*src_len <= block_bytes(c)) { -+ ret = -1; -+ break; -+ } -+ -+ ret = attempt_compress(c, workspace, -+ dst_data.b, *dst_len, -+ src_data.b, *src_len, -+ compression); -+ if (ret > 0) { -+ *dst_len = ret; -+ ret = 0; -+ break; -+ } -+ -+ /* Didn't fit: should we retry with a smaller amount? */ -+ if (*src_len <= *dst_len) { -+ ret = -1; -+ break; -+ } -+ -+ /* -+ * If ret is negative, it's a hint as to how much data would fit -+ */ -+ BUG_ON(-ret >= *src_len); -+ -+ if (ret < 0) -+ *src_len = -ret; -+ else -+ *src_len -= (*src_len - *dst_len) / 2; -+ *src_len = round_down(*src_len, block_bytes(c)); -+ } -+ -+ mempool_free(workspace, &c->compress_workspace[compression_type]); -+ -+ if (ret) -+ goto err; -+ -+ /* Didn't get smaller: */ -+ if (round_up(*dst_len, block_bytes(c)) >= *src_len) -+ goto err; -+ -+ pad = round_up(*dst_len, block_bytes(c)) - *dst_len; -+ -+ memset(dst_data.b + *dst_len, 0, pad); -+ *dst_len += pad; -+ -+ if (dst_data.type != BB_NONE && -+ dst_data.type != BB_VMAP) -+ memcpy_to_bio(dst, dst->bi_iter, dst_data.b); -+ -+ BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size); -+ BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size); -+ BUG_ON(*dst_len & (block_bytes(c) - 1)); -+ BUG_ON(*src_len & (block_bytes(c) - 1)); -+ ret = compression_type; -+out: -+ bio_unmap_or_unbounce(c, src_data); -+ bio_unmap_or_unbounce(c, dst_data); -+ return ret; -+err: -+ ret = BCH_COMPRESSION_TYPE_incompressible; -+ goto out; -+} -+ -+unsigned bch2_bio_compress(struct bch_fs *c, -+ struct bio *dst, size_t *dst_len, -+ struct bio *src, size_t *src_len, -+ unsigned compression_opt) -+{ -+ unsigned orig_dst = dst->bi_iter.bi_size; -+ unsigned orig_src = src->bi_iter.bi_size; -+ unsigned compression_type; -+ -+ /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */ -+ src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size, -+ c->opts.encoded_extent_max); -+ /* Don't generate a bigger output than input: */ -+ dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size); -+ -+ compression_type = -+ __bio_compress(c, dst, dst_len, src, src_len, -+ bch2_compression_decode(compression_opt)); -+ -+ dst->bi_iter.bi_size = orig_dst; -+ src->bi_iter.bi_size = orig_src; -+ return compression_type; -+} -+ -+static int __bch2_fs_compress_init(struct bch_fs *, u64); -+ -+#define BCH_FEATURE_none 0 -+ -+static const unsigned bch2_compression_opt_to_feature[] = { -+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t, -+ BCH_COMPRESSION_OPTS() -+#undef x -+}; -+ -+#undef BCH_FEATURE_none -+ -+static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f) -+{ -+ int ret = 0; -+ -+ if ((c->sb.features & f) == f) -+ return 0; -+ -+ mutex_lock(&c->sb_lock); -+ -+ if ((c->sb.features & f) == f) { -+ mutex_unlock(&c->sb_lock); -+ return 0; -+ } -+ -+ ret = __bch2_fs_compress_init(c, c->sb.features|f); -+ if (ret) { -+ mutex_unlock(&c->sb_lock); -+ return ret; -+ } -+ -+ c->disk_sb.sb->features[0] |= cpu_to_le64(f); -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ -+ return 0; -+} -+ -+int bch2_check_set_has_compressed_data(struct bch_fs *c, -+ unsigned compression_opt) -+{ -+ unsigned compression_type = bch2_compression_decode(compression_opt).type; -+ -+ BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature)); -+ -+ return compression_type -+ ? __bch2_check_set_has_compressed_data(c, -+ 1ULL << bch2_compression_opt_to_feature[compression_type]) -+ : 0; -+} -+ -+void bch2_fs_compress_exit(struct bch_fs *c) -+{ -+ unsigned i; -+ -+ mempool_exit(&c->decompress_workspace); -+ for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++) -+ mempool_exit(&c->compress_workspace[i]); -+ mempool_exit(&c->compression_bounce[WRITE]); -+ mempool_exit(&c->compression_bounce[READ]); -+} -+ -+static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) -+{ -+ size_t decompress_workspace_size = 0; -+ ZSTD_parameters params = zstd_get_params(zstd_max_clevel(), -+ c->opts.encoded_extent_max); -+ struct { -+ unsigned feature; -+ enum bch_compression_type type; -+ size_t compress_workspace; -+ size_t decompress_workspace; -+ } compression_types[] = { -+ { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, -+ max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS), -+ 0 }, -+ { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip, -+ zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), -+ zlib_inflate_workspacesize(), }, -+ { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd, -+ zstd_cctx_workspace_bound(¶ms.cParams), -+ zstd_dctx_workspace_bound() }, -+ }, *i; -+ bool have_compressed = false; -+ -+ c->zstd_params = params; -+ -+ for (i = compression_types; -+ i < compression_types + ARRAY_SIZE(compression_types); -+ i++) -+ have_compressed |= (features & (1 << i->feature)) != 0; -+ -+ if (!have_compressed) -+ return 0; -+ -+ if (!mempool_initialized(&c->compression_bounce[READ]) && -+ mempool_init_kvpmalloc_pool(&c->compression_bounce[READ], -+ 1, c->opts.encoded_extent_max)) -+ return -BCH_ERR_ENOMEM_compression_bounce_read_init; -+ -+ if (!mempool_initialized(&c->compression_bounce[WRITE]) && -+ mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE], -+ 1, c->opts.encoded_extent_max)) -+ return -BCH_ERR_ENOMEM_compression_bounce_write_init; -+ -+ for (i = compression_types; -+ i < compression_types + ARRAY_SIZE(compression_types); -+ i++) { -+ decompress_workspace_size = -+ max(decompress_workspace_size, i->decompress_workspace); -+ -+ if (!(features & (1 << i->feature))) -+ continue; -+ -+ if (mempool_initialized(&c->compress_workspace[i->type])) -+ continue; -+ -+ if (mempool_init_kvpmalloc_pool( -+ &c->compress_workspace[i->type], -+ 1, i->compress_workspace)) -+ return -BCH_ERR_ENOMEM_compression_workspace_init; -+ } -+ -+ if (!mempool_initialized(&c->decompress_workspace) && -+ mempool_init_kvpmalloc_pool(&c->decompress_workspace, -+ 1, decompress_workspace_size)) -+ return -BCH_ERR_ENOMEM_decompression_workspace_init; -+ -+ return 0; -+} -+ -+static u64 compression_opt_to_feature(unsigned v) -+{ -+ unsigned type = bch2_compression_decode(v).type; -+ -+ return BIT_ULL(bch2_compression_opt_to_feature[type]); -+} -+ -+int bch2_fs_compress_init(struct bch_fs *c) -+{ -+ u64 f = c->sb.features; -+ -+ f |= compression_opt_to_feature(c->opts.compression); -+ f |= compression_opt_to_feature(c->opts.background_compression); -+ -+ return __bch2_fs_compress_init(c, f); -+} -+ -+int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res, -+ struct printbuf *err) -+{ -+ char *val = kstrdup(_val, GFP_KERNEL); -+ char *p = val, *type_str, *level_str; -+ struct bch_compression_opt opt = { 0 }; -+ int ret; -+ -+ if (!val) -+ return -ENOMEM; -+ -+ type_str = strsep(&p, ":"); -+ level_str = p; -+ -+ ret = match_string(bch2_compression_opts, -1, type_str); -+ if (ret < 0 && err) -+ prt_str(err, "invalid compression type"); -+ if (ret < 0) -+ goto err; -+ -+ opt.type = ret; -+ -+ if (level_str) { -+ unsigned level; -+ -+ ret = kstrtouint(level_str, 10, &level); -+ if (!ret && !opt.type && level) -+ ret = -EINVAL; -+ if (!ret && level > 15) -+ ret = -EINVAL; -+ if (ret < 0 && err) -+ prt_str(err, "invalid compression level"); -+ if (ret < 0) -+ goto err; -+ -+ opt.level = level; -+ } -+ -+ *res = bch2_compression_encode(opt); -+err: -+ kfree(val); -+ return ret; -+} -+ -+void bch2_compression_opt_to_text(struct printbuf *out, u64 v) -+{ -+ struct bch_compression_opt opt = bch2_compression_decode(v); -+ -+ if (opt.type < BCH_COMPRESSION_OPT_NR) -+ prt_str(out, bch2_compression_opts[opt.type]); -+ else -+ prt_printf(out, "(unknown compression opt %u)", opt.type); -+ if (opt.level) -+ prt_printf(out, ":%u", opt.level); -+} -+ -+void bch2_opt_compression_to_text(struct printbuf *out, -+ struct bch_fs *c, -+ struct bch_sb *sb, -+ u64 v) -+{ -+ return bch2_compression_opt_to_text(out, v); -+} -+ -+int bch2_opt_compression_validate(u64 v, struct printbuf *err) -+{ -+ if (!bch2_compression_opt_valid(v)) { -+ prt_printf(err, "invalid compression opt %llu", v); -+ return -BCH_ERR_invalid_sb_opt_compression; -+ } -+ -+ return 0; -+} -diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h -new file mode 100644 -index 000000000000..607fd5e232c9 ---- /dev/null -+++ b/fs/bcachefs/compress.h -@@ -0,0 +1,73 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_COMPRESS_H -+#define _BCACHEFS_COMPRESS_H -+ -+#include "extents_types.h" -+ -+static const unsigned __bch2_compression_opt_to_type[] = { -+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t, -+ BCH_COMPRESSION_OPTS() -+#undef x -+}; -+ -+struct bch_compression_opt { -+ u8 type:4, -+ level:4; -+}; -+ -+static inline struct bch_compression_opt __bch2_compression_decode(unsigned v) -+{ -+ return (struct bch_compression_opt) { -+ .type = v & 15, -+ .level = v >> 4, -+ }; -+} -+ -+static inline bool bch2_compression_opt_valid(unsigned v) -+{ -+ struct bch_compression_opt opt = __bch2_compression_decode(v); -+ -+ return opt.type < ARRAY_SIZE(__bch2_compression_opt_to_type) && !(!opt.type && opt.level); -+} -+ -+static inline struct bch_compression_opt bch2_compression_decode(unsigned v) -+{ -+ return bch2_compression_opt_valid(v) -+ ? __bch2_compression_decode(v) -+ : (struct bch_compression_opt) { 0 }; -+} -+ -+static inline unsigned bch2_compression_encode(struct bch_compression_opt opt) -+{ -+ return opt.type|(opt.level << 4); -+} -+ -+static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v) -+{ -+ return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; -+} -+ -+int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *, -+ struct bch_extent_crc_unpacked *); -+int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *, -+ struct bvec_iter, struct bch_extent_crc_unpacked); -+unsigned bch2_bio_compress(struct bch_fs *, struct bio *, size_t *, -+ struct bio *, size_t *, unsigned); -+ -+int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned); -+void bch2_fs_compress_exit(struct bch_fs *); -+int bch2_fs_compress_init(struct bch_fs *); -+ -+void bch2_compression_opt_to_text(struct printbuf *, u64); -+ -+int bch2_opt_compression_parse(struct bch_fs *, const char *, u64 *, struct printbuf *); -+void bch2_opt_compression_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); -+int bch2_opt_compression_validate(u64, struct printbuf *); -+ -+#define bch2_opt_compression (struct bch_opt_fn) { \ -+ .parse = bch2_opt_compression_parse, \ -+ .to_text = bch2_opt_compression_to_text, \ -+ .validate = bch2_opt_compression_validate, \ -+} -+ -+#endif /* _BCACHEFS_COMPRESS_H */ -diff --git a/fs/bcachefs/counters.c b/fs/bcachefs/counters.c -new file mode 100644 -index 000000000000..02a996e06a64 ---- /dev/null -+++ b/fs/bcachefs/counters.c -@@ -0,0 +1,107 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include "bcachefs.h" -+#include "super-io.h" -+#include "counters.h" -+ -+/* BCH_SB_FIELD_counters */ -+ -+static const char * const bch2_counter_names[] = { -+#define x(t, n, ...) (#t), -+ BCH_PERSISTENT_COUNTERS() -+#undef x -+ NULL -+}; -+ -+static size_t bch2_sb_counter_nr_entries(struct bch_sb_field_counters *ctrs) -+{ -+ if (!ctrs) -+ return 0; -+ -+ return (__le64 *) vstruct_end(&ctrs->field) - &ctrs->d[0]; -+}; -+ -+static int bch2_sb_counters_validate(struct bch_sb *sb, -+ struct bch_sb_field *f, -+ struct printbuf *err) -+{ -+ return 0; -+}; -+ -+static void bch2_sb_counters_to_text(struct printbuf *out, struct bch_sb *sb, -+ struct bch_sb_field *f) -+{ -+ struct bch_sb_field_counters *ctrs = field_to_type(f, counters); -+ unsigned int i; -+ unsigned int nr = bch2_sb_counter_nr_entries(ctrs); -+ -+ for (i = 0; i < nr; i++) { -+ if (i < BCH_COUNTER_NR) -+ prt_printf(out, "%s ", bch2_counter_names[i]); -+ else -+ prt_printf(out, "(unknown)"); -+ -+ prt_tab(out); -+ prt_printf(out, "%llu", le64_to_cpu(ctrs->d[i])); -+ prt_newline(out); -+ } -+}; -+ -+int bch2_sb_counters_to_cpu(struct bch_fs *c) -+{ -+ struct bch_sb_field_counters *ctrs = bch2_sb_field_get(c->disk_sb.sb, counters); -+ unsigned int i; -+ unsigned int nr = bch2_sb_counter_nr_entries(ctrs); -+ u64 val = 0; -+ -+ for (i = 0; i < BCH_COUNTER_NR; i++) -+ c->counters_on_mount[i] = 0; -+ -+ for (i = 0; i < min_t(unsigned int, nr, BCH_COUNTER_NR); i++) { -+ val = le64_to_cpu(ctrs->d[i]); -+ percpu_u64_set(&c->counters[i], val); -+ c->counters_on_mount[i] = val; -+ } -+ return 0; -+}; -+ -+int bch2_sb_counters_from_cpu(struct bch_fs *c) -+{ -+ struct bch_sb_field_counters *ctrs = bch2_sb_field_get(c->disk_sb.sb, counters); -+ struct bch_sb_field_counters *ret; -+ unsigned int i; -+ unsigned int nr = bch2_sb_counter_nr_entries(ctrs); -+ -+ if (nr < BCH_COUNTER_NR) { -+ ret = bch2_sb_field_resize(&c->disk_sb, counters, -+ sizeof(*ctrs) / sizeof(u64) + BCH_COUNTER_NR); -+ -+ if (ret) { -+ ctrs = ret; -+ nr = bch2_sb_counter_nr_entries(ctrs); -+ } -+ } -+ -+ -+ for (i = 0; i < min_t(unsigned int, nr, BCH_COUNTER_NR); i++) -+ ctrs->d[i] = cpu_to_le64(percpu_u64_get(&c->counters[i])); -+ return 0; -+} -+ -+void bch2_fs_counters_exit(struct bch_fs *c) -+{ -+ free_percpu(c->counters); -+} -+ -+int bch2_fs_counters_init(struct bch_fs *c) -+{ -+ c->counters = __alloc_percpu(sizeof(u64) * BCH_COUNTER_NR, sizeof(u64)); -+ if (!c->counters) -+ return -BCH_ERR_ENOMEM_fs_counters_init; -+ -+ return bch2_sb_counters_to_cpu(c); -+} -+ -+const struct bch_sb_field_ops bch_sb_field_ops_counters = { -+ .validate = bch2_sb_counters_validate, -+ .to_text = bch2_sb_counters_to_text, -+}; -diff --git a/fs/bcachefs/counters.h b/fs/bcachefs/counters.h -new file mode 100644 -index 000000000000..4778aa19bf34 ---- /dev/null -+++ b/fs/bcachefs/counters.h -@@ -0,0 +1,17 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_COUNTERS_H -+#define _BCACHEFS_COUNTERS_H -+ -+#include "bcachefs.h" -+#include "super-io.h" -+ -+ -+int bch2_sb_counters_to_cpu(struct bch_fs *); -+int bch2_sb_counters_from_cpu(struct bch_fs *); -+ -+void bch2_fs_counters_exit(struct bch_fs *); -+int bch2_fs_counters_init(struct bch_fs *); -+ -+extern const struct bch_sb_field_ops bch_sb_field_ops_counters; -+ -+#endif // _BCACHEFS_COUNTERS_H -diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h -new file mode 100644 -index 000000000000..87b4b2d1ec76 ---- /dev/null -+++ b/fs/bcachefs/darray.h -@@ -0,0 +1,93 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_DARRAY_H -+#define _BCACHEFS_DARRAY_H -+ -+/* -+ * Dynamic arrays: -+ * -+ * Inspired by CCAN's darray -+ */ -+ -+#include "util.h" -+#include -+ -+#define DARRAY(type) \ -+struct { \ -+ size_t nr, size; \ -+ type *data; \ -+} -+ -+typedef DARRAY(void) darray_void; -+ -+static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, gfp_t gfp) -+{ -+ if (d->nr + more > d->size) { -+ size_t new_size = roundup_pow_of_two(d->nr + more); -+ void *data = krealloc_array(d->data, new_size, t_size, gfp); -+ -+ if (!data) -+ return -ENOMEM; -+ -+ d->data = data; -+ d->size = new_size; -+ } -+ -+ return 0; -+} -+ -+#define darray_make_room_gfp(_d, _more, _gfp) \ -+ __darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more), _gfp) -+ -+#define darray_make_room(_d, _more) \ -+ darray_make_room_gfp(_d, _more, GFP_KERNEL) -+ -+#define darray_top(_d) ((_d).data[(_d).nr]) -+ -+#define darray_push_gfp(_d, _item, _gfp) \ -+({ \ -+ int _ret = darray_make_room_gfp((_d), 1, _gfp); \ -+ \ -+ if (!_ret) \ -+ (_d)->data[(_d)->nr++] = (_item); \ -+ _ret; \ -+}) -+ -+#define darray_push(_d, _item) darray_push_gfp(_d, _item, GFP_KERNEL) -+ -+#define darray_pop(_d) ((_d)->data[--(_d)->nr]) -+ -+#define darray_first(_d) ((_d).data[0]) -+#define darray_last(_d) ((_d).data[(_d).nr - 1]) -+ -+#define darray_insert_item(_d, pos, _item) \ -+({ \ -+ size_t _pos = (pos); \ -+ int _ret = darray_make_room((_d), 1); \ -+ \ -+ if (!_ret) \ -+ array_insert_item((_d)->data, (_d)->nr, _pos, (_item)); \ -+ _ret; \ -+}) -+ -+#define darray_remove_item(_d, _pos) \ -+ array_remove_item((_d)->data, (_d)->nr, (_pos) - (_d)->data) -+ -+#define darray_for_each(_d, _i) \ -+ for (_i = (_d).data; _i < (_d).data + (_d).nr; _i++) -+ -+#define darray_for_each_reverse(_d, _i) \ -+ for (_i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i) -+ -+#define darray_init(_d) \ -+do { \ -+ (_d)->data = NULL; \ -+ (_d)->nr = (_d)->size = 0; \ -+} while (0) -+ -+#define darray_exit(_d) \ -+do { \ -+ kfree((_d)->data); \ -+ darray_init(_d); \ -+} while (0) -+ -+#endif /* _BCACHEFS_DARRAY_H */ -diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c -new file mode 100644 -index 000000000000..0771a6d880bf ---- /dev/null -+++ b/fs/bcachefs/data_update.c -@@ -0,0 +1,551 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "alloc_foreground.h" -+#include "bkey_buf.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "data_update.h" -+#include "ec.h" -+#include "error.h" -+#include "extents.h" -+#include "io_write.h" -+#include "keylist.h" -+#include "move.h" -+#include "nocow_locking.h" -+#include "rebalance.h" -+#include "subvolume.h" -+#include "trace.h" -+ -+static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k) -+{ -+ if (trace_move_extent_finish_enabled()) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, k); -+ trace_move_extent_finish(c, buf.buf); -+ printbuf_exit(&buf); -+ } -+} -+ -+static void trace_move_extent_fail2(struct data_update *m, -+ struct bkey_s_c new, -+ struct bkey_s_c wrote, -+ struct bkey_i *insert, -+ const char *msg) -+{ -+ struct bch_fs *c = m->op.c; -+ struct bkey_s_c old = bkey_i_to_s_c(m->k.k); -+ const union bch_extent_entry *entry; -+ struct bch_extent_ptr *ptr; -+ struct extent_ptr_decoded p; -+ struct printbuf buf = PRINTBUF; -+ unsigned i, rewrites_found = 0; -+ -+ if (!trace_move_extent_fail_enabled()) -+ return; -+ -+ prt_str(&buf, msg); -+ -+ if (insert) { -+ i = 0; -+ bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) { -+ if (((1U << i) & m->data_opts.rewrite_ptrs) && -+ (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) && -+ !ptr->cached) -+ rewrites_found |= 1U << i; -+ i++; -+ } -+ } -+ -+ prt_printf(&buf, "\nrewrite ptrs: %u%u%u%u", -+ (m->data_opts.rewrite_ptrs & (1 << 0)) != 0, -+ (m->data_opts.rewrite_ptrs & (1 << 1)) != 0, -+ (m->data_opts.rewrite_ptrs & (1 << 2)) != 0, -+ (m->data_opts.rewrite_ptrs & (1 << 3)) != 0); -+ -+ prt_printf(&buf, "\nrewrites found: %u%u%u%u", -+ (rewrites_found & (1 << 0)) != 0, -+ (rewrites_found & (1 << 1)) != 0, -+ (rewrites_found & (1 << 2)) != 0, -+ (rewrites_found & (1 << 3)) != 0); -+ -+ prt_str(&buf, "\nold: "); -+ bch2_bkey_val_to_text(&buf, c, old); -+ -+ prt_str(&buf, "\nnew: "); -+ bch2_bkey_val_to_text(&buf, c, new); -+ -+ prt_str(&buf, "\nwrote: "); -+ bch2_bkey_val_to_text(&buf, c, wrote); -+ -+ if (insert) { -+ prt_str(&buf, "\ninsert: "); -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); -+ } -+ -+ trace_move_extent_fail(c, buf.buf); -+ printbuf_exit(&buf); -+} -+ -+static int __bch2_data_update_index_update(struct btree_trans *trans, -+ struct bch_write_op *op) -+{ -+ struct bch_fs *c = op->c; -+ struct btree_iter iter; -+ struct data_update *m = -+ container_of(op, struct data_update, op); -+ struct keylist *keys = &op->insert_keys; -+ struct bkey_buf _new, _insert; -+ int ret = 0; -+ -+ bch2_bkey_buf_init(&_new); -+ bch2_bkey_buf_init(&_insert); -+ bch2_bkey_buf_realloc(&_insert, c, U8_MAX); -+ -+ bch2_trans_iter_init(trans, &iter, m->btree_id, -+ bkey_start_pos(&bch2_keylist_front(keys)->k), -+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT); -+ -+ while (1) { -+ struct bkey_s_c k; -+ struct bkey_s_c old = bkey_i_to_s_c(m->k.k); -+ struct bkey_i *insert = NULL; -+ struct bkey_i_extent *new; -+ const union bch_extent_entry *entry_c; -+ union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ struct bch_extent_ptr *ptr; -+ const struct bch_extent_ptr *ptr_c; -+ struct bpos next_pos; -+ bool should_check_enospc; -+ s64 i_sectors_delta = 0, disk_sectors_delta = 0; -+ unsigned rewrites_found = 0, durability, i; -+ -+ bch2_trans_begin(trans); -+ -+ k = bch2_btree_iter_peek_slot(&iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ new = bkey_i_to_extent(bch2_keylist_front(keys)); -+ -+ if (!bch2_extents_match(k, old)) { -+ trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), -+ NULL, "no match:"); -+ goto nowork; -+ } -+ -+ bkey_reassemble(_insert.k, k); -+ insert = _insert.k; -+ -+ bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys)); -+ new = bkey_i_to_extent(_new.k); -+ bch2_cut_front(iter.pos, &new->k_i); -+ -+ bch2_cut_front(iter.pos, insert); -+ bch2_cut_back(new->k.p, insert); -+ bch2_cut_back(insert->k.p, &new->k_i); -+ -+ /* -+ * @old: extent that we read from -+ * @insert: key that we're going to update, initialized from -+ * extent currently in btree - same as @old unless we raced with -+ * other updates -+ * @new: extent with new pointers that we'll be adding to @insert -+ * -+ * Fist, drop rewrite_ptrs from @new: -+ */ -+ i = 0; -+ bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry_c) { -+ if (((1U << i) & m->data_opts.rewrite_ptrs) && -+ (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) && -+ !ptr->cached) { -+ bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr); -+ rewrites_found |= 1U << i; -+ } -+ i++; -+ } -+ -+ if (m->data_opts.rewrite_ptrs && -+ !rewrites_found && -+ bch2_bkey_durability(c, k) >= m->op.opts.data_replicas) { -+ trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "no rewrites found:"); -+ goto nowork; -+ } -+ -+ /* -+ * A replica that we just wrote might conflict with a replica -+ * that we want to keep, due to racing with another move: -+ */ -+restart_drop_conflicting_replicas: -+ extent_for_each_ptr(extent_i_to_s(new), ptr) -+ if ((ptr_c = bch2_bkey_has_device_c(bkey_i_to_s_c(insert), ptr->dev)) && -+ !ptr_c->cached) { -+ bch2_bkey_drop_ptr_noerror(bkey_i_to_s(&new->k_i), ptr); -+ goto restart_drop_conflicting_replicas; -+ } -+ -+ if (!bkey_val_u64s(&new->k)) { -+ trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "new replicas conflicted:"); -+ goto nowork; -+ } -+ -+ /* Now, drop pointers that conflict with what we just wrote: */ -+ extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) -+ if ((ptr = bch2_bkey_has_device(bkey_i_to_s(insert), p.ptr.dev))) -+ bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), ptr); -+ -+ durability = bch2_bkey_durability(c, bkey_i_to_s_c(insert)) + -+ bch2_bkey_durability(c, bkey_i_to_s_c(&new->k_i)); -+ -+ /* Now, drop excess replicas: */ -+restart_drop_extra_replicas: -+ bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs(bkey_i_to_s(insert)), p, entry) { -+ unsigned ptr_durability = bch2_extent_ptr_durability(c, &p); -+ -+ if (!p.ptr.cached && -+ durability - ptr_durability >= m->op.opts.data_replicas) { -+ durability -= ptr_durability; -+ -+ bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr); -+ goto restart_drop_extra_replicas; -+ } -+ } -+ -+ /* Finally, add the pointers we just wrote: */ -+ extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) -+ bch2_extent_ptr_decoded_append(insert, &p); -+ -+ bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 }); -+ bch2_extent_normalize(c, bkey_i_to_s(insert)); -+ -+ ret = bch2_sum_sector_overwrites(trans, &iter, insert, -+ &should_check_enospc, -+ &i_sectors_delta, -+ &disk_sectors_delta); -+ if (ret) -+ goto err; -+ -+ if (disk_sectors_delta > (s64) op->res.sectors) { -+ ret = bch2_disk_reservation_add(c, &op->res, -+ disk_sectors_delta - op->res.sectors, -+ !should_check_enospc -+ ? BCH_DISK_RESERVATION_NOFAIL : 0); -+ if (ret) -+ goto out; -+ } -+ -+ next_pos = insert->k.p; -+ -+ ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id, -+ k.k->p, bkey_start_pos(&insert->k)) ?: -+ bch2_insert_snapshot_whiteouts(trans, m->btree_id, -+ k.k->p, insert->k.p) ?: -+ bch2_bkey_set_needs_rebalance(c, insert, -+ op->opts.background_target, -+ op->opts.background_compression) ?: -+ bch2_trans_update(trans, &iter, insert, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: -+ bch2_trans_commit(trans, &op->res, -+ NULL, -+ BTREE_INSERT_NOCHECK_RW| -+ BTREE_INSERT_NOFAIL| -+ m->data_opts.btree_insert_flags); -+ if (!ret) { -+ bch2_btree_iter_set_pos(&iter, next_pos); -+ -+ this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size); -+ trace_move_extent_finish2(c, bkey_i_to_s_c(&new->k_i)); -+ } -+err: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ ret = 0; -+ if (ret) -+ break; -+next: -+ while (bkey_ge(iter.pos, bch2_keylist_front(keys)->k.p)) { -+ bch2_keylist_pop_front(keys); -+ if (bch2_keylist_empty(keys)) -+ goto out; -+ } -+ continue; -+nowork: -+ if (m->stats && m->stats) { -+ BUG_ON(k.k->p.offset <= iter.pos.offset); -+ atomic64_inc(&m->stats->keys_raced); -+ atomic64_add(k.k->p.offset - iter.pos.offset, -+ &m->stats->sectors_raced); -+ } -+ -+ this_cpu_inc(c->counters[BCH_COUNTER_move_extent_fail]); -+ -+ bch2_btree_iter_advance(&iter); -+ goto next; -+ } -+out: -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_bkey_buf_exit(&_insert, c); -+ bch2_bkey_buf_exit(&_new, c); -+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); -+ return ret; -+} -+ -+int bch2_data_update_index_update(struct bch_write_op *op) -+{ -+ return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op)); -+} -+ -+void bch2_data_update_read_done(struct data_update *m, -+ struct bch_extent_crc_unpacked crc) -+{ -+ /* write bio must own pages: */ -+ BUG_ON(!m->op.wbio.bio.bi_vcnt); -+ -+ m->op.crc = crc; -+ m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9; -+ -+ closure_call(&m->op.cl, bch2_write, NULL, NULL); -+} -+ -+void bch2_data_update_exit(struct data_update *update) -+{ -+ struct bch_fs *c = update->op.c; -+ struct bkey_ptrs_c ptrs = -+ bch2_bkey_ptrs_c(bkey_i_to_s_c(update->k.k)); -+ const struct bch_extent_ptr *ptr; -+ -+ bkey_for_each_ptr(ptrs, ptr) { -+ if (c->opts.nocow_enabled) -+ bch2_bucket_nocow_unlock(&c->nocow_locks, -+ PTR_BUCKET_POS(c, ptr), 0); -+ percpu_ref_put(&bch_dev_bkey_exists(c, ptr->dev)->ref); -+ } -+ -+ bch2_bkey_buf_exit(&update->k, c); -+ bch2_disk_reservation_put(c, &update->op.res); -+ bch2_bio_free_pages_pool(c, &update->op.wbio.bio); -+} -+ -+void bch2_update_unwritten_extent(struct btree_trans *trans, -+ struct data_update *update) -+{ -+ struct bch_fs *c = update->op.c; -+ struct bio *bio = &update->op.wbio.bio; -+ struct bkey_i_extent *e; -+ struct write_point *wp; -+ struct bch_extent_ptr *ptr; -+ struct closure cl; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ closure_init_stack(&cl); -+ bch2_keylist_init(&update->op.insert_keys, update->op.inline_keys); -+ -+ while (bio_sectors(bio)) { -+ unsigned sectors = bio_sectors(bio); -+ -+ bch2_trans_iter_init(trans, &iter, update->btree_id, update->op.pos, -+ BTREE_ITER_SLOTS); -+ ret = lockrestart_do(trans, ({ -+ k = bch2_btree_iter_peek_slot(&iter); -+ bkey_err(k); -+ })); -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (ret || !bch2_extents_match(k, bkey_i_to_s_c(update->k.k))) -+ break; -+ -+ e = bkey_extent_init(update->op.insert_keys.top); -+ e->k.p = update->op.pos; -+ -+ ret = bch2_alloc_sectors_start_trans(trans, -+ update->op.target, -+ false, -+ update->op.write_point, -+ &update->op.devs_have, -+ update->op.nr_replicas, -+ update->op.nr_replicas, -+ update->op.watermark, -+ 0, &cl, &wp); -+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) { -+ bch2_trans_unlock(trans); -+ closure_sync(&cl); -+ continue; -+ } -+ -+ if (ret) -+ return; -+ -+ sectors = min(sectors, wp->sectors_free); -+ -+ bch2_key_resize(&e->k, sectors); -+ -+ bch2_open_bucket_get(c, wp, &update->op.open_buckets); -+ bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false); -+ bch2_alloc_sectors_done(c, wp); -+ -+ bio_advance(bio, sectors << 9); -+ update->op.pos.offset += sectors; -+ -+ extent_for_each_ptr(extent_i_to_s(e), ptr) -+ ptr->unwritten = true; -+ bch2_keylist_push(&update->op.insert_keys); -+ -+ ret = __bch2_data_update_index_update(trans, &update->op); -+ -+ bch2_open_buckets_put(c, &update->op.open_buckets); -+ -+ if (ret) -+ break; -+ } -+ -+ if (closure_nr_remaining(&cl) != 1) { -+ bch2_trans_unlock(trans); -+ closure_sync(&cl); -+ } -+} -+ -+int bch2_data_update_init(struct btree_trans *trans, -+ struct moving_context *ctxt, -+ struct data_update *m, -+ struct write_point_specifier wp, -+ struct bch_io_opts io_opts, -+ struct data_update_opts data_opts, -+ enum btree_id btree_id, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ const struct bch_extent_ptr *ptr; -+ unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; -+ unsigned ptrs_locked = 0; -+ int ret; -+ -+ bch2_bkey_buf_init(&m->k); -+ bch2_bkey_buf_reassemble(&m->k, c, k); -+ m->btree_id = btree_id; -+ m->data_opts = data_opts; -+ m->ctxt = ctxt; -+ m->stats = ctxt ? ctxt->stats : NULL; -+ -+ bch2_write_op_init(&m->op, c, io_opts); -+ m->op.pos = bkey_start_pos(k.k); -+ m->op.version = k.k->version; -+ m->op.target = data_opts.target; -+ m->op.write_point = wp; -+ m->op.nr_replicas = 0; -+ m->op.flags |= BCH_WRITE_PAGES_STABLE| -+ BCH_WRITE_PAGES_OWNED| -+ BCH_WRITE_DATA_ENCODED| -+ BCH_WRITE_MOVE| -+ m->data_opts.write_flags; -+ m->op.compression_opt = io_opts.background_compression ?: io_opts.compression; -+ m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK; -+ -+ bkey_for_each_ptr(ptrs, ptr) -+ percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref); -+ -+ i = 0; -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ bool locked; -+ -+ if (((1U << i) & m->data_opts.rewrite_ptrs)) { -+ BUG_ON(p.ptr.cached); -+ -+ if (crc_is_compressed(p.crc)) -+ reserve_sectors += k.k->size; -+ -+ m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); -+ } else if (!p.ptr.cached) { -+ bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); -+ } -+ -+ /* -+ * op->csum_type is normally initialized from the fs/file's -+ * current options - but if an extent is encrypted, we require -+ * that it stays encrypted: -+ */ -+ if (bch2_csum_type_is_encryption(p.crc.csum_type)) { -+ m->op.nonce = p.crc.nonce + p.crc.offset; -+ m->op.csum_type = p.crc.csum_type; -+ } -+ -+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) -+ m->op.incompressible = true; -+ -+ if (c->opts.nocow_enabled) { -+ if (ctxt) { -+ move_ctxt_wait_event(ctxt, -+ (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, -+ PTR_BUCKET_POS(c, &p.ptr), 0)) || -+ !atomic_read(&ctxt->read_sectors)); -+ -+ if (!locked) -+ bch2_bucket_nocow_lock(&c->nocow_locks, -+ PTR_BUCKET_POS(c, &p.ptr), 0); -+ } else { -+ if (!bch2_bucket_nocow_trylock(&c->nocow_locks, -+ PTR_BUCKET_POS(c, &p.ptr), 0)) { -+ ret = -BCH_ERR_nocow_lock_blocked; -+ goto err; -+ } -+ } -+ ptrs_locked |= (1U << i); -+ } -+ -+ i++; -+ } -+ -+ if (reserve_sectors) { -+ ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, -+ m->data_opts.extra_replicas -+ ? 0 -+ : BCH_DISK_RESERVATION_NOFAIL); -+ if (ret) -+ goto err; -+ } -+ -+ m->op.nr_replicas += m->data_opts.extra_replicas; -+ m->op.nr_replicas_required = m->op.nr_replicas; -+ -+ BUG_ON(!m->op.nr_replicas); -+ -+ /* Special handling required: */ -+ if (bkey_extent_is_unwritten(k)) -+ return -BCH_ERR_unwritten_extent_update; -+ return 0; -+err: -+ i = 0; -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ if ((1U << i) & ptrs_locked) -+ bch2_bucket_nocow_unlock(&c->nocow_locks, -+ PTR_BUCKET_POS(c, &p.ptr), 0); -+ percpu_ref_put(&bch_dev_bkey_exists(c, p.ptr.dev)->ref); -+ i++; -+ } -+ -+ bch2_bkey_buf_exit(&m->k, c); -+ bch2_bio_free_pages_pool(c, &m->op.wbio.bio); -+ return ret; -+} -+ -+void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const struct bch_extent_ptr *ptr; -+ unsigned i = 0; -+ -+ bkey_for_each_ptr(ptrs, ptr) { -+ if ((opts->rewrite_ptrs & (1U << i)) && ptr->cached) { -+ opts->kill_ptrs |= 1U << i; -+ opts->rewrite_ptrs ^= 1U << i; -+ } -+ -+ i++; -+ } -+} -diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h -new file mode 100644 -index 000000000000..9dc17b9d8379 ---- /dev/null -+++ b/fs/bcachefs/data_update.h -@@ -0,0 +1,44 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+ -+#ifndef _BCACHEFS_DATA_UPDATE_H -+#define _BCACHEFS_DATA_UPDATE_H -+ -+#include "bkey_buf.h" -+#include "io_write_types.h" -+ -+struct moving_context; -+ -+struct data_update_opts { -+ unsigned rewrite_ptrs; -+ unsigned kill_ptrs; -+ u16 target; -+ u8 extra_replicas; -+ unsigned btree_insert_flags; -+ unsigned write_flags; -+}; -+ -+struct data_update { -+ /* extent being updated: */ -+ enum btree_id btree_id; -+ struct bkey_buf k; -+ struct data_update_opts data_opts; -+ struct moving_context *ctxt; -+ struct bch_move_stats *stats; -+ struct bch_write_op op; -+}; -+ -+int bch2_data_update_index_update(struct bch_write_op *); -+ -+void bch2_data_update_read_done(struct data_update *, -+ struct bch_extent_crc_unpacked); -+ -+void bch2_data_update_exit(struct data_update *); -+void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *); -+int bch2_data_update_init(struct btree_trans *, struct moving_context *, -+ struct data_update *, -+ struct write_point_specifier, -+ struct bch_io_opts, struct data_update_opts, -+ enum btree_id, struct bkey_s_c); -+void bch2_data_update_opts_normalize(struct bkey_s_c, struct data_update_opts *); -+ -+#endif /* _BCACHEFS_DATA_UPDATE_H */ -diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c -new file mode 100644 -index 000000000000..57c5128db173 ---- /dev/null -+++ b/fs/bcachefs/debug.c -@@ -0,0 +1,954 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Assorted bcachefs debug code -+ * -+ * Copyright 2010, 2011 Kent Overstreet -+ * Copyright 2012 Google, Inc. -+ */ -+ -+#include "bcachefs.h" -+#include "bkey_methods.h" -+#include "btree_cache.h" -+#include "btree_io.h" -+#include "btree_iter.h" -+#include "btree_locking.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "debug.h" -+#include "error.h" -+#include "extents.h" -+#include "fsck.h" -+#include "inode.h" -+#include "super.h" -+ -+#include -+#include -+#include -+#include -+#include -+ -+static struct dentry *bch_debug; -+ -+static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, -+ struct extent_ptr_decoded pick) -+{ -+ struct btree *v = c->verify_data; -+ struct btree_node *n_ondisk = c->verify_ondisk; -+ struct btree_node *n_sorted = c->verify_data->data; -+ struct bset *sorted, *inmemory = &b->data->keys; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev); -+ struct bio *bio; -+ bool failed = false, saw_error = false; -+ -+ if (!bch2_dev_get_ioref(ca, READ)) -+ return false; -+ -+ bio = bio_alloc_bioset(ca->disk_sb.bdev, -+ buf_pages(n_sorted, btree_bytes(c)), -+ REQ_OP_READ|REQ_META, -+ GFP_NOFS, -+ &c->btree_bio); -+ bio->bi_iter.bi_sector = pick.ptr.offset; -+ bch2_bio_map(bio, n_sorted, btree_bytes(c)); -+ -+ submit_bio_wait(bio); -+ -+ bio_put(bio); -+ percpu_ref_put(&ca->io_ref); -+ -+ memcpy(n_ondisk, n_sorted, btree_bytes(c)); -+ -+ v->written = 0; -+ if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error) -+ return false; -+ -+ n_sorted = c->verify_data->data; -+ sorted = &n_sorted->keys; -+ -+ if (inmemory->u64s != sorted->u64s || -+ memcmp(inmemory->start, -+ sorted->start, -+ vstruct_end(inmemory) - (void *) inmemory->start)) { -+ unsigned offset = 0, sectors; -+ struct bset *i; -+ unsigned j; -+ -+ console_lock(); -+ -+ printk(KERN_ERR "*** in memory:\n"); -+ bch2_dump_bset(c, b, inmemory, 0); -+ -+ printk(KERN_ERR "*** read back in:\n"); -+ bch2_dump_bset(c, v, sorted, 0); -+ -+ while (offset < v->written) { -+ if (!offset) { -+ i = &n_ondisk->keys; -+ sectors = vstruct_blocks(n_ondisk, c->block_bits) << -+ c->block_bits; -+ } else { -+ struct btree_node_entry *bne = -+ (void *) n_ondisk + (offset << 9); -+ i = &bne->keys; -+ -+ sectors = vstruct_blocks(bne, c->block_bits) << -+ c->block_bits; -+ } -+ -+ printk(KERN_ERR "*** on disk block %u:\n", offset); -+ bch2_dump_bset(c, b, i, offset); -+ -+ offset += sectors; -+ } -+ -+ for (j = 0; j < le16_to_cpu(inmemory->u64s); j++) -+ if (inmemory->_data[j] != sorted->_data[j]) -+ break; -+ -+ console_unlock(); -+ bch_err(c, "verify failed at key %u", j); -+ -+ failed = true; -+ } -+ -+ if (v->written != b->written) { -+ bch_err(c, "written wrong: expected %u, got %u", -+ b->written, v->written); -+ failed = true; -+ } -+ -+ return failed; -+} -+ -+void __bch2_btree_verify(struct bch_fs *c, struct btree *b) -+{ -+ struct bkey_ptrs_c ptrs; -+ struct extent_ptr_decoded p; -+ const union bch_extent_entry *entry; -+ struct btree *v; -+ struct bset *inmemory = &b->data->keys; -+ struct bkey_packed *k; -+ bool failed = false; -+ -+ if (c->opts.nochanges) -+ return; -+ -+ bch2_btree_node_io_lock(b); -+ mutex_lock(&c->verify_lock); -+ -+ if (!c->verify_ondisk) { -+ c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL); -+ if (!c->verify_ondisk) -+ goto out; -+ } -+ -+ if (!c->verify_data) { -+ c->verify_data = __bch2_btree_node_mem_alloc(c); -+ if (!c->verify_data) -+ goto out; -+ -+ list_del_init(&c->verify_data->list); -+ } -+ -+ BUG_ON(b->nsets != 1); -+ -+ for (k = inmemory->start; k != vstruct_last(inmemory); k = bkey_p_next(k)) -+ if (k->type == KEY_TYPE_btree_ptr_v2) -+ ((struct bch_btree_ptr_v2 *) bkeyp_val(&b->format, k))->mem_ptr = 0; -+ -+ v = c->verify_data; -+ bkey_copy(&v->key, &b->key); -+ v->c.level = b->c.level; -+ v->c.btree_id = b->c.btree_id; -+ bch2_btree_keys_init(v); -+ -+ ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)); -+ bkey_for_each_ptr_decode(&b->key.k, ptrs, p, entry) -+ failed |= bch2_btree_verify_replica(c, b, p); -+ -+ if (failed) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); -+ bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf.buf); -+ printbuf_exit(&buf); -+ } -+out: -+ mutex_unlock(&c->verify_lock); -+ bch2_btree_node_io_unlock(b); -+} -+ -+void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, -+ const struct btree *b) -+{ -+ struct btree_node *n_ondisk = NULL; -+ struct extent_ptr_decoded pick; -+ struct bch_dev *ca; -+ struct bio *bio = NULL; -+ unsigned offset = 0; -+ int ret; -+ -+ if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick) <= 0) { -+ prt_printf(out, "error getting device to read from: invalid device\n"); -+ return; -+ } -+ -+ ca = bch_dev_bkey_exists(c, pick.ptr.dev); -+ if (!bch2_dev_get_ioref(ca, READ)) { -+ prt_printf(out, "error getting device to read from: not online\n"); -+ return; -+ } -+ -+ n_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL); -+ if (!n_ondisk) { -+ prt_printf(out, "memory allocation failure\n"); -+ goto out; -+ } -+ -+ bio = bio_alloc_bioset(ca->disk_sb.bdev, -+ buf_pages(n_ondisk, btree_bytes(c)), -+ REQ_OP_READ|REQ_META, -+ GFP_NOFS, -+ &c->btree_bio); -+ bio->bi_iter.bi_sector = pick.ptr.offset; -+ bch2_bio_map(bio, n_ondisk, btree_bytes(c)); -+ -+ ret = submit_bio_wait(bio); -+ if (ret) { -+ prt_printf(out, "IO error reading btree node: %s\n", bch2_err_str(ret)); -+ goto out; -+ } -+ -+ while (offset < btree_sectors(c)) { -+ struct bset *i; -+ struct nonce nonce; -+ struct bch_csum csum; -+ struct bkey_packed *k; -+ unsigned sectors; -+ -+ if (!offset) { -+ i = &n_ondisk->keys; -+ -+ if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) { -+ prt_printf(out, "unknown checksum type at offset %u: %llu\n", -+ offset, BSET_CSUM_TYPE(i)); -+ goto out; -+ } -+ -+ nonce = btree_nonce(i, offset << 9); -+ csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, n_ondisk); -+ -+ if (bch2_crc_cmp(csum, n_ondisk->csum)) { -+ prt_printf(out, "invalid checksum\n"); -+ goto out; -+ } -+ -+ bset_encrypt(c, i, offset << 9); -+ -+ sectors = vstruct_sectors(n_ondisk, c->block_bits); -+ } else { -+ struct btree_node_entry *bne = (void *) n_ondisk + (offset << 9); -+ -+ i = &bne->keys; -+ -+ if (i->seq != n_ondisk->keys.seq) -+ break; -+ -+ if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) { -+ prt_printf(out, "unknown checksum type at offset %u: %llu\n", -+ offset, BSET_CSUM_TYPE(i)); -+ goto out; -+ } -+ -+ nonce = btree_nonce(i, offset << 9); -+ csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); -+ -+ if (bch2_crc_cmp(csum, bne->csum)) { -+ prt_printf(out, "invalid checksum"); -+ goto out; -+ } -+ -+ bset_encrypt(c, i, offset << 9); -+ -+ sectors = vstruct_sectors(bne, c->block_bits); -+ } -+ -+ prt_printf(out, " offset %u version %u, journal seq %llu\n", -+ offset, -+ le16_to_cpu(i->version), -+ le64_to_cpu(i->journal_seq)); -+ offset += sectors; -+ -+ printbuf_indent_add(out, 4); -+ -+ for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) { -+ struct bkey u; -+ -+ bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u)); -+ prt_newline(out); -+ } -+ -+ printbuf_indent_sub(out, 4); -+ } -+out: -+ if (bio) -+ bio_put(bio); -+ kvpfree(n_ondisk, btree_bytes(c)); -+ percpu_ref_put(&ca->io_ref); -+} -+ -+#ifdef CONFIG_DEBUG_FS -+ -+/* XXX: bch_fs refcounting */ -+ -+struct dump_iter { -+ struct bch_fs *c; -+ enum btree_id id; -+ struct bpos from; -+ struct bpos prev_node; -+ u64 iter; -+ -+ struct printbuf buf; -+ -+ char __user *ubuf; /* destination user buffer */ -+ size_t size; /* size of requested read */ -+ ssize_t ret; /* bytes read so far */ -+}; -+ -+static ssize_t flush_buf(struct dump_iter *i) -+{ -+ if (i->buf.pos) { -+ size_t bytes = min_t(size_t, i->buf.pos, i->size); -+ int copied = bytes - copy_to_user(i->ubuf, i->buf.buf, bytes); -+ -+ i->ret += copied; -+ i->ubuf += copied; -+ i->size -= copied; -+ i->buf.pos -= copied; -+ memmove(i->buf.buf, i->buf.buf + copied, i->buf.pos); -+ -+ if (copied != bytes) -+ return -EFAULT; -+ } -+ -+ return i->size ? 0 : i->ret; -+} -+ -+static int bch2_dump_open(struct inode *inode, struct file *file) -+{ -+ struct btree_debug *bd = inode->i_private; -+ struct dump_iter *i; -+ -+ i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL); -+ if (!i) -+ return -ENOMEM; -+ -+ file->private_data = i; -+ i->from = POS_MIN; -+ i->iter = 0; -+ i->c = container_of(bd, struct bch_fs, btree_debug[bd->id]); -+ i->id = bd->id; -+ i->buf = PRINTBUF; -+ -+ return 0; -+} -+ -+static int bch2_dump_release(struct inode *inode, struct file *file) -+{ -+ struct dump_iter *i = file->private_data; -+ -+ printbuf_exit(&i->buf); -+ kfree(i); -+ return 0; -+} -+ -+static ssize_t bch2_read_btree(struct file *file, char __user *buf, -+ size_t size, loff_t *ppos) -+{ -+ struct dump_iter *i = file->private_data; -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ ssize_t ret; -+ -+ i->ubuf = buf; -+ i->size = size; -+ i->ret = 0; -+ -+ ret = flush_buf(i); -+ if (ret) -+ return ret; -+ -+ trans = bch2_trans_get(i->c); -+ ret = for_each_btree_key2(trans, iter, i->id, i->from, -+ BTREE_ITER_PREFETCH| -+ BTREE_ITER_ALL_SNAPSHOTS, k, ({ -+ bch2_bkey_val_to_text(&i->buf, i->c, k); -+ prt_newline(&i->buf); -+ drop_locks_do(trans, flush_buf(i)); -+ })); -+ i->from = iter.pos; -+ -+ bch2_trans_put(trans); -+ -+ if (!ret) -+ ret = flush_buf(i); -+ -+ return ret ?: i->ret; -+} -+ -+static const struct file_operations btree_debug_ops = { -+ .owner = THIS_MODULE, -+ .open = bch2_dump_open, -+ .release = bch2_dump_release, -+ .read = bch2_read_btree, -+}; -+ -+static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, -+ size_t size, loff_t *ppos) -+{ -+ struct dump_iter *i = file->private_data; -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct btree *b; -+ ssize_t ret; -+ -+ i->ubuf = buf; -+ i->size = size; -+ i->ret = 0; -+ -+ ret = flush_buf(i); -+ if (ret) -+ return ret; -+ -+ if (bpos_eq(SPOS_MAX, i->from)) -+ return i->ret; -+ -+ trans = bch2_trans_get(i->c); -+retry: -+ bch2_trans_begin(trans); -+ -+ for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) { -+ bch2_btree_node_to_text(&i->buf, i->c, b); -+ i->from = !bpos_eq(SPOS_MAX, b->key.k.p) -+ ? bpos_successor(b->key.k.p) -+ : b->key.k.p; -+ -+ ret = drop_locks_do(trans, flush_buf(i)); -+ if (ret) -+ break; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ bch2_trans_put(trans); -+ -+ if (!ret) -+ ret = flush_buf(i); -+ -+ return ret ?: i->ret; -+} -+ -+static const struct file_operations btree_format_debug_ops = { -+ .owner = THIS_MODULE, -+ .open = bch2_dump_open, -+ .release = bch2_dump_release, -+ .read = bch2_read_btree_formats, -+}; -+ -+static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, -+ size_t size, loff_t *ppos) -+{ -+ struct dump_iter *i = file->private_data; -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ ssize_t ret; -+ -+ i->ubuf = buf; -+ i->size = size; -+ i->ret = 0; -+ -+ ret = flush_buf(i); -+ if (ret) -+ return ret; -+ -+ trans = bch2_trans_get(i->c); -+ -+ ret = for_each_btree_key2(trans, iter, i->id, i->from, -+ BTREE_ITER_PREFETCH| -+ BTREE_ITER_ALL_SNAPSHOTS, k, ({ -+ struct btree_path_level *l = &iter.path->l[0]; -+ struct bkey_packed *_k = -+ bch2_btree_node_iter_peek(&l->iter, l->b); -+ -+ if (bpos_gt(l->b->key.k.p, i->prev_node)) { -+ bch2_btree_node_to_text(&i->buf, i->c, l->b); -+ i->prev_node = l->b->key.k.p; -+ } -+ -+ bch2_bfloat_to_text(&i->buf, l->b, _k); -+ drop_locks_do(trans, flush_buf(i)); -+ })); -+ i->from = iter.pos; -+ -+ bch2_trans_put(trans); -+ -+ if (!ret) -+ ret = flush_buf(i); -+ -+ return ret ?: i->ret; -+} -+ -+static const struct file_operations bfloat_failed_debug_ops = { -+ .owner = THIS_MODULE, -+ .open = bch2_dump_open, -+ .release = bch2_dump_release, -+ .read = bch2_read_bfloat_failed, -+}; -+ -+static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c, -+ struct btree *b) -+{ -+ if (!out->nr_tabstops) -+ printbuf_tabstop_push(out, 32); -+ -+ prt_printf(out, "%px btree=%s l=%u ", -+ b, -+ bch2_btree_id_str(b->c.btree_id), -+ b->c.level); -+ prt_newline(out); -+ -+ printbuf_indent_add(out, 2); -+ -+ bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); -+ prt_newline(out); -+ -+ prt_printf(out, "flags: "); -+ prt_tab(out); -+ prt_bitflags(out, bch2_btree_node_flags, b->flags); -+ prt_newline(out); -+ -+ prt_printf(out, "pcpu read locks: "); -+ prt_tab(out); -+ prt_printf(out, "%u", b->c.lock.readers != NULL); -+ prt_newline(out); -+ -+ prt_printf(out, "written:"); -+ prt_tab(out); -+ prt_printf(out, "%u", b->written); -+ prt_newline(out); -+ -+ prt_printf(out, "writes blocked:"); -+ prt_tab(out); -+ prt_printf(out, "%u", !list_empty_careful(&b->write_blocked)); -+ prt_newline(out); -+ -+ prt_printf(out, "will make reachable:"); -+ prt_tab(out); -+ prt_printf(out, "%lx", b->will_make_reachable); -+ prt_newline(out); -+ -+ prt_printf(out, "journal pin %px:", &b->writes[0].journal); -+ prt_tab(out); -+ prt_printf(out, "%llu", b->writes[0].journal.seq); -+ prt_newline(out); -+ -+ prt_printf(out, "journal pin %px:", &b->writes[1].journal); -+ prt_tab(out); -+ prt_printf(out, "%llu", b->writes[1].journal.seq); -+ prt_newline(out); -+ -+ printbuf_indent_sub(out, 2); -+} -+ -+static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, -+ size_t size, loff_t *ppos) -+{ -+ struct dump_iter *i = file->private_data; -+ struct bch_fs *c = i->c; -+ bool done = false; -+ ssize_t ret = 0; -+ -+ i->ubuf = buf; -+ i->size = size; -+ i->ret = 0; -+ -+ do { -+ struct bucket_table *tbl; -+ struct rhash_head *pos; -+ struct btree *b; -+ -+ ret = flush_buf(i); -+ if (ret) -+ return ret; -+ -+ rcu_read_lock(); -+ i->buf.atomic++; -+ tbl = rht_dereference_rcu(c->btree_cache.table.tbl, -+ &c->btree_cache.table); -+ if (i->iter < tbl->size) { -+ rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash) -+ bch2_cached_btree_node_to_text(&i->buf, c, b); -+ i->iter++; -+ } else { -+ done = true; -+ } -+ --i->buf.atomic; -+ rcu_read_unlock(); -+ } while (!done); -+ -+ if (i->buf.allocation_failure) -+ ret = -ENOMEM; -+ -+ if (!ret) -+ ret = flush_buf(i); -+ -+ return ret ?: i->ret; -+} -+ -+static const struct file_operations cached_btree_nodes_ops = { -+ .owner = THIS_MODULE, -+ .open = bch2_dump_open, -+ .release = bch2_dump_release, -+ .read = bch2_cached_btree_nodes_read, -+}; -+ -+#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS -+static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, -+ size_t size, loff_t *ppos) -+{ -+ struct dump_iter *i = file->private_data; -+ struct bch_fs *c = i->c; -+ struct btree_trans *trans; -+ ssize_t ret = 0; -+ u32 seq; -+ -+ i->ubuf = buf; -+ i->size = size; -+ i->ret = 0; -+restart: -+ seqmutex_lock(&c->btree_trans_lock); -+ list_for_each_entry(trans, &c->btree_trans_list, list) { -+ if (trans->locking_wait.task->pid <= i->iter) -+ continue; -+ -+ closure_get(&trans->ref); -+ seq = seqmutex_seq(&c->btree_trans_lock); -+ seqmutex_unlock(&c->btree_trans_lock); -+ -+ ret = flush_buf(i); -+ if (ret) { -+ closure_put(&trans->ref); -+ goto unlocked; -+ } -+ -+ bch2_btree_trans_to_text(&i->buf, trans); -+ -+ prt_printf(&i->buf, "backtrace:"); -+ prt_newline(&i->buf); -+ printbuf_indent_add(&i->buf, 2); -+ bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task); -+ printbuf_indent_sub(&i->buf, 2); -+ prt_newline(&i->buf); -+ -+ i->iter = trans->locking_wait.task->pid; -+ -+ closure_put(&trans->ref); -+ -+ if (!seqmutex_relock(&c->btree_trans_lock, seq)) -+ goto restart; -+ } -+ seqmutex_unlock(&c->btree_trans_lock); -+unlocked: -+ if (i->buf.allocation_failure) -+ ret = -ENOMEM; -+ -+ if (!ret) -+ ret = flush_buf(i); -+ -+ return ret ?: i->ret; -+} -+ -+static const struct file_operations btree_transactions_ops = { -+ .owner = THIS_MODULE, -+ .open = bch2_dump_open, -+ .release = bch2_dump_release, -+ .read = bch2_btree_transactions_read, -+}; -+#endif /* CONFIG_BCACHEFS_DEBUG_TRANSACTIONS */ -+ -+static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf, -+ size_t size, loff_t *ppos) -+{ -+ struct dump_iter *i = file->private_data; -+ struct bch_fs *c = i->c; -+ bool done = false; -+ int err; -+ -+ i->ubuf = buf; -+ i->size = size; -+ i->ret = 0; -+ -+ do { -+ err = flush_buf(i); -+ if (err) -+ return err; -+ -+ if (!i->size) -+ break; -+ -+ done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter); -+ i->iter++; -+ } while (!done); -+ -+ if (i->buf.allocation_failure) -+ return -ENOMEM; -+ -+ return i->ret; -+} -+ -+static const struct file_operations journal_pins_ops = { -+ .owner = THIS_MODULE, -+ .open = bch2_dump_open, -+ .release = bch2_dump_release, -+ .read = bch2_journal_pins_read, -+}; -+ -+static int lock_held_stats_open(struct inode *inode, struct file *file) -+{ -+ struct bch_fs *c = inode->i_private; -+ struct dump_iter *i; -+ -+ i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL); -+ -+ if (!i) -+ return -ENOMEM; -+ -+ i->iter = 0; -+ i->c = c; -+ i->buf = PRINTBUF; -+ file->private_data = i; -+ -+ return 0; -+} -+ -+static int lock_held_stats_release(struct inode *inode, struct file *file) -+{ -+ struct dump_iter *i = file->private_data; -+ -+ printbuf_exit(&i->buf); -+ kfree(i); -+ -+ return 0; -+} -+ -+static ssize_t lock_held_stats_read(struct file *file, char __user *buf, -+ size_t size, loff_t *ppos) -+{ -+ struct dump_iter *i = file->private_data; -+ struct bch_fs *c = i->c; -+ int err; -+ -+ i->ubuf = buf; -+ i->size = size; -+ i->ret = 0; -+ -+ while (1) { -+ struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter]; -+ -+ err = flush_buf(i); -+ if (err) -+ return err; -+ -+ if (!i->size) -+ break; -+ -+ if (i->iter == ARRAY_SIZE(bch2_btree_transaction_fns) || -+ !bch2_btree_transaction_fns[i->iter]) -+ break; -+ -+ prt_printf(&i->buf, "%s: ", bch2_btree_transaction_fns[i->iter]); -+ prt_newline(&i->buf); -+ printbuf_indent_add(&i->buf, 2); -+ -+ mutex_lock(&s->lock); -+ -+ prt_printf(&i->buf, "Max mem used: %u", s->max_mem); -+ prt_newline(&i->buf); -+ -+ if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { -+ prt_printf(&i->buf, "Lock hold times:"); -+ prt_newline(&i->buf); -+ -+ printbuf_indent_add(&i->buf, 2); -+ bch2_time_stats_to_text(&i->buf, &s->lock_hold_times); -+ printbuf_indent_sub(&i->buf, 2); -+ } -+ -+ if (s->max_paths_text) { -+ prt_printf(&i->buf, "Maximum allocated btree paths (%u):", s->nr_max_paths); -+ prt_newline(&i->buf); -+ -+ printbuf_indent_add(&i->buf, 2); -+ prt_str_indented(&i->buf, s->max_paths_text); -+ printbuf_indent_sub(&i->buf, 2); -+ } -+ -+ mutex_unlock(&s->lock); -+ -+ printbuf_indent_sub(&i->buf, 2); -+ prt_newline(&i->buf); -+ i->iter++; -+ } -+ -+ if (i->buf.allocation_failure) -+ return -ENOMEM; -+ -+ return i->ret; -+} -+ -+static const struct file_operations lock_held_stats_op = { -+ .owner = THIS_MODULE, -+ .open = lock_held_stats_open, -+ .release = lock_held_stats_release, -+ .read = lock_held_stats_read, -+}; -+ -+static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, -+ size_t size, loff_t *ppos) -+{ -+ struct dump_iter *i = file->private_data; -+ struct bch_fs *c = i->c; -+ struct btree_trans *trans; -+ ssize_t ret = 0; -+ u32 seq; -+ -+ i->ubuf = buf; -+ i->size = size; -+ i->ret = 0; -+ -+ if (i->iter) -+ goto out; -+restart: -+ seqmutex_lock(&c->btree_trans_lock); -+ list_for_each_entry(trans, &c->btree_trans_list, list) { -+ if (trans->locking_wait.task->pid <= i->iter) -+ continue; -+ -+ closure_get(&trans->ref); -+ seq = seqmutex_seq(&c->btree_trans_lock); -+ seqmutex_unlock(&c->btree_trans_lock); -+ -+ ret = flush_buf(i); -+ if (ret) { -+ closure_put(&trans->ref); -+ goto out; -+ } -+ -+ bch2_check_for_deadlock(trans, &i->buf); -+ -+ i->iter = trans->locking_wait.task->pid; -+ -+ closure_put(&trans->ref); -+ -+ if (!seqmutex_relock(&c->btree_trans_lock, seq)) -+ goto restart; -+ } -+ seqmutex_unlock(&c->btree_trans_lock); -+out: -+ if (i->buf.allocation_failure) -+ ret = -ENOMEM; -+ -+ if (!ret) -+ ret = flush_buf(i); -+ -+ return ret ?: i->ret; -+} -+ -+static const struct file_operations btree_deadlock_ops = { -+ .owner = THIS_MODULE, -+ .open = bch2_dump_open, -+ .release = bch2_dump_release, -+ .read = bch2_btree_deadlock_read, -+}; -+ -+void bch2_fs_debug_exit(struct bch_fs *c) -+{ -+ if (!IS_ERR_OR_NULL(c->fs_debug_dir)) -+ debugfs_remove_recursive(c->fs_debug_dir); -+} -+ -+void bch2_fs_debug_init(struct bch_fs *c) -+{ -+ struct btree_debug *bd; -+ char name[100]; -+ -+ if (IS_ERR_OR_NULL(bch_debug)) -+ return; -+ -+ snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b); -+ c->fs_debug_dir = debugfs_create_dir(name, bch_debug); -+ if (IS_ERR_OR_NULL(c->fs_debug_dir)) -+ return; -+ -+ debugfs_create_file("cached_btree_nodes", 0400, c->fs_debug_dir, -+ c->btree_debug, &cached_btree_nodes_ops); -+ -+#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS -+ debugfs_create_file("btree_transactions", 0400, c->fs_debug_dir, -+ c->btree_debug, &btree_transactions_ops); -+#endif -+ -+ debugfs_create_file("journal_pins", 0400, c->fs_debug_dir, -+ c->btree_debug, &journal_pins_ops); -+ -+ debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir, -+ c, &lock_held_stats_op); -+ -+ debugfs_create_file("btree_deadlock", 0400, c->fs_debug_dir, -+ c->btree_debug, &btree_deadlock_ops); -+ -+ c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir); -+ if (IS_ERR_OR_NULL(c->btree_debug_dir)) -+ return; -+ -+ for (bd = c->btree_debug; -+ bd < c->btree_debug + ARRAY_SIZE(c->btree_debug); -+ bd++) { -+ bd->id = bd - c->btree_debug; -+ debugfs_create_file(bch2_btree_id_str(bd->id), -+ 0400, c->btree_debug_dir, bd, -+ &btree_debug_ops); -+ -+ snprintf(name, sizeof(name), "%s-formats", -+ bch2_btree_id_str(bd->id)); -+ -+ debugfs_create_file(name, 0400, c->btree_debug_dir, bd, -+ &btree_format_debug_ops); -+ -+ snprintf(name, sizeof(name), "%s-bfloat-failed", -+ bch2_btree_id_str(bd->id)); -+ -+ debugfs_create_file(name, 0400, c->btree_debug_dir, bd, -+ &bfloat_failed_debug_ops); -+ } -+} -+ -+#endif -+ -+void bch2_debug_exit(void) -+{ -+ if (!IS_ERR_OR_NULL(bch_debug)) -+ debugfs_remove_recursive(bch_debug); -+} -+ -+int __init bch2_debug_init(void) -+{ -+ int ret = 0; -+ -+ bch_debug = debugfs_create_dir("bcachefs", NULL); -+ return ret; -+} -diff --git a/fs/bcachefs/debug.h b/fs/bcachefs/debug.h -new file mode 100644 -index 000000000000..2c37143b5fd1 ---- /dev/null -+++ b/fs/bcachefs/debug.h -@@ -0,0 +1,32 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_DEBUG_H -+#define _BCACHEFS_DEBUG_H -+ -+#include "bcachefs.h" -+ -+struct bio; -+struct btree; -+struct bch_fs; -+ -+void __bch2_btree_verify(struct bch_fs *, struct btree *); -+void bch2_btree_node_ondisk_to_text(struct printbuf *, struct bch_fs *, -+ const struct btree *); -+ -+static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b) -+{ -+ if (bch2_verify_btree_ondisk) -+ __bch2_btree_verify(c, b); -+} -+ -+#ifdef CONFIG_DEBUG_FS -+void bch2_fs_debug_exit(struct bch_fs *); -+void bch2_fs_debug_init(struct bch_fs *); -+#else -+static inline void bch2_fs_debug_exit(struct bch_fs *c) {} -+static inline void bch2_fs_debug_init(struct bch_fs *c) {} -+#endif -+ -+void bch2_debug_exit(void); -+int bch2_debug_init(void); -+ -+#endif /* _BCACHEFS_DEBUG_H */ -diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c -new file mode 100644 -index 000000000000..1a0f2d571569 ---- /dev/null -+++ b/fs/bcachefs/dirent.c -@@ -0,0 +1,577 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "bkey_buf.h" -+#include "bkey_methods.h" -+#include "btree_update.h" -+#include "extents.h" -+#include "dirent.h" -+#include "fs.h" -+#include "keylist.h" -+#include "str_hash.h" -+#include "subvolume.h" -+ -+#include -+ -+static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) -+{ -+ unsigned bkey_u64s = bkey_val_u64s(d.k); -+ unsigned bkey_bytes = bkey_u64s * sizeof(u64); -+ u64 last_u64 = ((u64*)d.v)[bkey_u64s - 1]; -+#if CPU_BIG_ENDIAN -+ unsigned trailing_nuls = last_u64 ? __builtin_ctzll(last_u64) / 8 : 64 / 8; -+#else -+ unsigned trailing_nuls = last_u64 ? __builtin_clzll(last_u64) / 8 : 64 / 8; -+#endif -+ -+ return bkey_bytes - -+ offsetof(struct bch_dirent, d_name) - -+ trailing_nuls; -+} -+ -+struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d) -+{ -+ return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d)); -+} -+ -+static u64 bch2_dirent_hash(const struct bch_hash_info *info, -+ const struct qstr *name) -+{ -+ struct bch_str_hash_ctx ctx; -+ -+ bch2_str_hash_init(&ctx, info); -+ bch2_str_hash_update(&ctx, info, name->name, name->len); -+ -+ /* [0,2) reserved for dots */ -+ return max_t(u64, bch2_str_hash_end(&ctx, info), 2); -+} -+ -+static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key) -+{ -+ return bch2_dirent_hash(info, key); -+} -+ -+static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) -+{ -+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); -+ struct qstr name = bch2_dirent_get_name(d); -+ -+ return bch2_dirent_hash(info, &name); -+} -+ -+static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r) -+{ -+ struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); -+ const struct qstr l_name = bch2_dirent_get_name(l); -+ const struct qstr *r_name = _r; -+ -+ return l_name.len - r_name->len ?: memcmp(l_name.name, r_name->name, l_name.len); -+} -+ -+static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) -+{ -+ struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); -+ struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r); -+ const struct qstr l_name = bch2_dirent_get_name(l); -+ const struct qstr r_name = bch2_dirent_get_name(r); -+ -+ return l_name.len - r_name.len ?: memcmp(l_name.name, r_name.name, l_name.len); -+} -+ -+static bool dirent_is_visible(subvol_inum inum, struct bkey_s_c k) -+{ -+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); -+ -+ if (d.v->d_type == DT_SUBVOL) -+ return le32_to_cpu(d.v->d_parent_subvol) == inum.subvol; -+ return true; -+} -+ -+const struct bch_hash_desc bch2_dirent_hash_desc = { -+ .btree_id = BTREE_ID_dirents, -+ .key_type = KEY_TYPE_dirent, -+ .hash_key = dirent_hash_key, -+ .hash_bkey = dirent_hash_bkey, -+ .cmp_key = dirent_cmp_key, -+ .cmp_bkey = dirent_cmp_bkey, -+ .is_visible = dirent_is_visible, -+}; -+ -+int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); -+ struct qstr d_name = bch2_dirent_get_name(d); -+ int ret = 0; -+ -+ bkey_fsck_err_on(!d_name.len, c, err, -+ dirent_empty_name, -+ "empty name"); -+ -+ bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err, -+ dirent_val_too_big, -+ "value too big (%zu > %u)", -+ bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); -+ -+ /* -+ * Check new keys don't exceed the max length -+ * (older keys may be larger.) -+ */ -+ bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX, c, err, -+ dirent_name_too_long, -+ "dirent name too big (%u > %u)", -+ d_name.len, BCH_NAME_MAX); -+ -+ bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err, -+ dirent_name_embedded_nul, -+ "dirent has stray data after name's NUL"); -+ -+ bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) || -+ (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err, -+ dirent_name_dot_or_dotdot, -+ "invalid name"); -+ -+ bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err, -+ dirent_name_has_slash, -+ "name with /"); -+ -+ bkey_fsck_err_on(d.v->d_type != DT_SUBVOL && -+ le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err, -+ dirent_to_itself, -+ "dirent points to own directory"); -+fsck_err: -+ return ret; -+} -+ -+void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); -+ struct qstr d_name = bch2_dirent_get_name(d); -+ -+ prt_printf(out, "%.*s -> %llu type %s", -+ d_name.len, -+ d_name.name, -+ d.v->d_type != DT_SUBVOL -+ ? le64_to_cpu(d.v->d_inum) -+ : le32_to_cpu(d.v->d_child_subvol), -+ bch2_d_type_str(d.v->d_type)); -+} -+ -+static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, -+ subvol_inum dir, u8 type, -+ const struct qstr *name, u64 dst) -+{ -+ struct bkey_i_dirent *dirent; -+ unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len); -+ -+ if (name->len > BCH_NAME_MAX) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ BUG_ON(u64s > U8_MAX); -+ -+ dirent = bch2_trans_kmalloc(trans, u64s * sizeof(u64)); -+ if (IS_ERR(dirent)) -+ return dirent; -+ -+ bkey_dirent_init(&dirent->k_i); -+ dirent->k.u64s = u64s; -+ -+ if (type != DT_SUBVOL) { -+ dirent->v.d_inum = cpu_to_le64(dst); -+ } else { -+ dirent->v.d_parent_subvol = cpu_to_le32(dir.subvol); -+ dirent->v.d_child_subvol = cpu_to_le32(dst); -+ } -+ -+ dirent->v.d_type = type; -+ -+ memcpy(dirent->v.d_name, name->name, name->len); -+ memset(dirent->v.d_name + name->len, 0, -+ bkey_val_bytes(&dirent->k) - -+ offsetof(struct bch_dirent, d_name) - -+ name->len); -+ -+ EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len); -+ -+ return dirent; -+} -+ -+int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir, -+ const struct bch_hash_info *hash_info, -+ u8 type, const struct qstr *name, u64 dst_inum, -+ u64 *dir_offset, int flags) -+{ -+ struct bkey_i_dirent *dirent; -+ int ret; -+ -+ dirent = dirent_create_key(trans, dir, type, name, dst_inum); -+ ret = PTR_ERR_OR_ZERO(dirent); -+ if (ret) -+ return ret; -+ -+ ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info, -+ dir, &dirent->k_i, flags); -+ *dir_offset = dirent->k.p.offset; -+ -+ return ret; -+} -+ -+static void dirent_copy_target(struct bkey_i_dirent *dst, -+ struct bkey_s_c_dirent src) -+{ -+ dst->v.d_inum = src.v->d_inum; -+ dst->v.d_type = src.v->d_type; -+} -+ -+int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, -+ struct bkey_s_c_dirent d, subvol_inum *target) -+{ -+ struct bch_subvolume s; -+ int ret = 0; -+ -+ if (d.v->d_type == DT_SUBVOL && -+ le32_to_cpu(d.v->d_parent_subvol) != dir.subvol) -+ return 1; -+ -+ if (likely(d.v->d_type != DT_SUBVOL)) { -+ target->subvol = dir.subvol; -+ target->inum = le64_to_cpu(d.v->d_inum); -+ } else { -+ target->subvol = le32_to_cpu(d.v->d_child_subvol); -+ -+ ret = bch2_subvolume_get(trans, target->subvol, true, BTREE_ITER_CACHED, &s); -+ -+ target->inum = le64_to_cpu(s.inode); -+ } -+ -+ return ret; -+} -+ -+int bch2_dirent_rename(struct btree_trans *trans, -+ subvol_inum src_dir, struct bch_hash_info *src_hash, -+ subvol_inum dst_dir, struct bch_hash_info *dst_hash, -+ const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset, -+ const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset, -+ enum bch_rename_mode mode) -+{ -+ struct btree_iter src_iter = { NULL }; -+ struct btree_iter dst_iter = { NULL }; -+ struct bkey_s_c old_src, old_dst = bkey_s_c_null; -+ struct bkey_i_dirent *new_src = NULL, *new_dst = NULL; -+ struct bpos dst_pos = -+ POS(dst_dir.inum, bch2_dirent_hash(dst_hash, dst_name)); -+ unsigned src_type = 0, dst_type = 0, src_update_flags = 0; -+ int ret = 0; -+ -+ if (src_dir.subvol != dst_dir.subvol) -+ return -EXDEV; -+ -+ memset(src_inum, 0, sizeof(*src_inum)); -+ memset(dst_inum, 0, sizeof(*dst_inum)); -+ -+ /* Lookup src: */ -+ ret = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc, -+ src_hash, src_dir, src_name, -+ BTREE_ITER_INTENT); -+ if (ret) -+ goto out; -+ -+ old_src = bch2_btree_iter_peek_slot(&src_iter); -+ ret = bkey_err(old_src); -+ if (ret) -+ goto out; -+ -+ ret = bch2_dirent_read_target(trans, src_dir, -+ bkey_s_c_to_dirent(old_src), src_inum); -+ if (ret) -+ goto out; -+ -+ src_type = bkey_s_c_to_dirent(old_src).v->d_type; -+ -+ if (src_type == DT_SUBVOL && mode == BCH_RENAME_EXCHANGE) -+ return -EOPNOTSUPP; -+ -+ -+ /* Lookup dst: */ -+ if (mode == BCH_RENAME) { -+ /* -+ * Note that we're _not_ checking if the target already exists - -+ * we're relying on the VFS to do that check for us for -+ * correctness: -+ */ -+ ret = bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc, -+ dst_hash, dst_dir, dst_name); -+ if (ret) -+ goto out; -+ } else { -+ ret = bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc, -+ dst_hash, dst_dir, dst_name, -+ BTREE_ITER_INTENT); -+ if (ret) -+ goto out; -+ -+ old_dst = bch2_btree_iter_peek_slot(&dst_iter); -+ ret = bkey_err(old_dst); -+ if (ret) -+ goto out; -+ -+ ret = bch2_dirent_read_target(trans, dst_dir, -+ bkey_s_c_to_dirent(old_dst), dst_inum); -+ if (ret) -+ goto out; -+ -+ dst_type = bkey_s_c_to_dirent(old_dst).v->d_type; -+ -+ if (dst_type == DT_SUBVOL) -+ return -EOPNOTSUPP; -+ } -+ -+ if (mode != BCH_RENAME_EXCHANGE) -+ *src_offset = dst_iter.pos.offset; -+ -+ /* Create new dst key: */ -+ new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, 0); -+ ret = PTR_ERR_OR_ZERO(new_dst); -+ if (ret) -+ goto out; -+ -+ dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src)); -+ new_dst->k.p = dst_iter.pos; -+ -+ /* Create new src key: */ -+ if (mode == BCH_RENAME_EXCHANGE) { -+ new_src = dirent_create_key(trans, src_dir, 0, src_name, 0); -+ ret = PTR_ERR_OR_ZERO(new_src); -+ if (ret) -+ goto out; -+ -+ dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst)); -+ new_src->k.p = src_iter.pos; -+ } else { -+ new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); -+ ret = PTR_ERR_OR_ZERO(new_src); -+ if (ret) -+ goto out; -+ -+ bkey_init(&new_src->k); -+ new_src->k.p = src_iter.pos; -+ -+ if (bkey_le(dst_pos, src_iter.pos) && -+ bkey_lt(src_iter.pos, dst_iter.pos)) { -+ /* -+ * We have a hash collision for the new dst key, -+ * and new_src - the key we're deleting - is between -+ * new_dst's hashed slot and the slot we're going to be -+ * inserting it into - oops. This will break the hash -+ * table if we don't deal with it: -+ */ -+ if (mode == BCH_RENAME) { -+ /* -+ * If we're not overwriting, we can just insert -+ * new_dst at the src position: -+ */ -+ new_src = new_dst; -+ new_src->k.p = src_iter.pos; -+ goto out_set_src; -+ } else { -+ /* If we're overwriting, we can't insert new_dst -+ * at a different slot because it has to -+ * overwrite old_dst - just make sure to use a -+ * whiteout when deleting src: -+ */ -+ new_src->k.type = KEY_TYPE_hash_whiteout; -+ } -+ } else { -+ /* Check if we need a whiteout to delete src: */ -+ ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc, -+ src_hash, &src_iter); -+ if (ret < 0) -+ goto out; -+ -+ if (ret) -+ new_src->k.type = KEY_TYPE_hash_whiteout; -+ } -+ } -+ -+ ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0); -+ if (ret) -+ goto out; -+out_set_src: -+ -+ /* -+ * If we're deleting a subvolume, we need to really delete the dirent, -+ * not just emit a whiteout in the current snapshot: -+ */ -+ if (src_type == DT_SUBVOL) { -+ bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot); -+ ret = bch2_btree_iter_traverse(&src_iter); -+ if (ret) -+ goto out; -+ -+ new_src->k.p = src_iter.pos; -+ src_update_flags |= BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE; -+ } -+ -+ ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags); -+ if (ret) -+ goto out; -+ -+ if (mode == BCH_RENAME_EXCHANGE) -+ *src_offset = new_src->k.p.offset; -+ *dst_offset = new_dst->k.p.offset; -+out: -+ bch2_trans_iter_exit(trans, &src_iter); -+ bch2_trans_iter_exit(trans, &dst_iter); -+ return ret; -+} -+ -+int __bch2_dirent_lookup_trans(struct btree_trans *trans, -+ struct btree_iter *iter, -+ subvol_inum dir, -+ const struct bch_hash_info *hash_info, -+ const struct qstr *name, subvol_inum *inum, -+ unsigned flags) -+{ -+ struct bkey_s_c k; -+ struct bkey_s_c_dirent d; -+ u32 snapshot; -+ int ret; -+ -+ ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot); -+ if (ret) -+ return ret; -+ -+ ret = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc, -+ hash_info, dir, name, flags); -+ if (ret) -+ return ret; -+ -+ k = bch2_btree_iter_peek_slot(iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ d = bkey_s_c_to_dirent(k); -+ -+ ret = bch2_dirent_read_target(trans, dir, d, inum); -+ if (ret > 0) -+ ret = -ENOENT; -+err: -+ if (ret) -+ bch2_trans_iter_exit(trans, iter); -+ -+ return ret; -+} -+ -+u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir, -+ const struct bch_hash_info *hash_info, -+ const struct qstr *name, subvol_inum *inum) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ int ret; -+retry: -+ bch2_trans_begin(trans); -+ -+ ret = __bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, -+ name, inum, 0); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ if (!ret) -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ u32 snapshot; -+ int ret; -+ -+ ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot); -+ if (ret) -+ return ret; -+ -+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, -+ SPOS(dir.inum, 0, snapshot), -+ POS(dir.inum, U64_MAX), 0, k, ret) -+ if (k.k->type == KEY_TYPE_dirent) { -+ ret = -ENOTEMPTY; -+ break; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ return ret; -+} -+ -+int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bkey_s_c_dirent dirent; -+ subvol_inum target; -+ u32 snapshot; -+ struct bkey_buf sk; -+ struct qstr name; -+ int ret; -+ -+ bch2_bkey_buf_init(&sk); -+retry: -+ bch2_trans_begin(trans); -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ goto err; -+ -+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, -+ SPOS(inum.inum, ctx->pos, snapshot), -+ POS(inum.inum, U64_MAX), 0, k, ret) { -+ if (k.k->type != KEY_TYPE_dirent) -+ continue; -+ -+ dirent = bkey_s_c_to_dirent(k); -+ -+ ret = bch2_dirent_read_target(trans, inum, dirent, &target); -+ if (ret < 0) -+ break; -+ if (ret) -+ continue; -+ -+ /* dir_emit() can fault and block: */ -+ bch2_bkey_buf_reassemble(&sk, c, k); -+ dirent = bkey_i_to_s_c_dirent(sk.k); -+ bch2_trans_unlock(trans); -+ -+ name = bch2_dirent_get_name(dirent); -+ -+ ctx->pos = dirent.k->p.offset; -+ if (!dir_emit(ctx, name.name, -+ name.len, -+ target.inum, -+ vfs_d_type(dirent.v->d_type))) -+ break; -+ ctx->pos = dirent.k->p.offset + 1; -+ -+ /* -+ * read_target looks up subvolumes, we can overflow paths if the -+ * directory has many subvolumes in it -+ */ -+ ret = btree_trans_too_many_iters(trans); -+ if (ret) -+ break; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+err: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ bch2_trans_put(trans); -+ bch2_bkey_buf_exit(&sk, c); -+ -+ return ret; -+} -diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h -new file mode 100644 -index 000000000000..cd262bf4d9c5 ---- /dev/null -+++ b/fs/bcachefs/dirent.h -@@ -0,0 +1,70 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_DIRENT_H -+#define _BCACHEFS_DIRENT_H -+ -+#include "str_hash.h" -+ -+enum bkey_invalid_flags; -+extern const struct bch_hash_desc bch2_dirent_hash_desc; -+ -+int bch2_dirent_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+ -+#define bch2_bkey_ops_dirent ((struct bkey_ops) { \ -+ .key_invalid = bch2_dirent_invalid, \ -+ .val_to_text = bch2_dirent_to_text, \ -+ .min_val_size = 16, \ -+}) -+ -+struct qstr; -+struct file; -+struct dir_context; -+struct bch_fs; -+struct bch_hash_info; -+struct bch_inode_info; -+ -+struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d); -+ -+static inline unsigned dirent_val_u64s(unsigned len) -+{ -+ return DIV_ROUND_UP(offsetof(struct bch_dirent, d_name) + len, -+ sizeof(u64)); -+} -+ -+int bch2_dirent_read_target(struct btree_trans *, subvol_inum, -+ struct bkey_s_c_dirent, subvol_inum *); -+ -+int bch2_dirent_create(struct btree_trans *, subvol_inum, -+ const struct bch_hash_info *, u8, -+ const struct qstr *, u64, u64 *, int); -+ -+static inline unsigned vfs_d_type(unsigned type) -+{ -+ return type == DT_SUBVOL ? DT_DIR : type; -+} -+ -+enum bch_rename_mode { -+ BCH_RENAME, -+ BCH_RENAME_OVERWRITE, -+ BCH_RENAME_EXCHANGE, -+}; -+ -+int bch2_dirent_rename(struct btree_trans *, -+ subvol_inum, struct bch_hash_info *, -+ subvol_inum, struct bch_hash_info *, -+ const struct qstr *, subvol_inum *, u64 *, -+ const struct qstr *, subvol_inum *, u64 *, -+ enum bch_rename_mode); -+ -+int __bch2_dirent_lookup_trans(struct btree_trans *, struct btree_iter *, -+ subvol_inum, const struct bch_hash_info *, -+ const struct qstr *, subvol_inum *, unsigned); -+u64 bch2_dirent_lookup(struct bch_fs *, subvol_inum, -+ const struct bch_hash_info *, -+ const struct qstr *, subvol_inum *); -+ -+int bch2_empty_dir_trans(struct btree_trans *, subvol_inum); -+int bch2_readdir(struct bch_fs *, subvol_inum, struct dir_context *); -+ -+#endif /* _BCACHEFS_DIRENT_H */ -diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c -new file mode 100644 -index 000000000000..d613695abf9f ---- /dev/null -+++ b/fs/bcachefs/disk_groups.c -@@ -0,0 +1,620 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include "bcachefs.h" -+#include "disk_groups.h" -+#include "sb-members.h" -+#include "super-io.h" -+ -+#include -+ -+static int group_cmp(const void *_l, const void *_r) -+{ -+ const struct bch_disk_group *l = _l; -+ const struct bch_disk_group *r = _r; -+ -+ return ((BCH_GROUP_DELETED(l) > BCH_GROUP_DELETED(r)) - -+ (BCH_GROUP_DELETED(l) < BCH_GROUP_DELETED(r))) ?: -+ ((BCH_GROUP_PARENT(l) > BCH_GROUP_PARENT(r)) - -+ (BCH_GROUP_PARENT(l) < BCH_GROUP_PARENT(r))) ?: -+ strncmp(l->label, r->label, sizeof(l->label)); -+} -+ -+static int bch2_sb_disk_groups_validate(struct bch_sb *sb, -+ struct bch_sb_field *f, -+ struct printbuf *err) -+{ -+ struct bch_sb_field_disk_groups *groups = -+ field_to_type(f, disk_groups); -+ struct bch_disk_group *g, *sorted = NULL; -+ unsigned nr_groups = disk_groups_nr(groups); -+ unsigned i, len; -+ int ret = 0; -+ -+ for (i = 0; i < sb->nr_devices; i++) { -+ struct bch_member m = bch2_sb_member_get(sb, i); -+ unsigned group_id; -+ -+ if (!BCH_MEMBER_GROUP(&m)) -+ continue; -+ -+ group_id = BCH_MEMBER_GROUP(&m) - 1; -+ -+ if (group_id >= nr_groups) { -+ prt_printf(err, "disk %u has invalid label %u (have %u)", -+ i, group_id, nr_groups); -+ return -BCH_ERR_invalid_sb_disk_groups; -+ } -+ -+ if (BCH_GROUP_DELETED(&groups->entries[group_id])) { -+ prt_printf(err, "disk %u has deleted label %u", i, group_id); -+ return -BCH_ERR_invalid_sb_disk_groups; -+ } -+ } -+ -+ if (!nr_groups) -+ return 0; -+ -+ for (i = 0; i < nr_groups; i++) { -+ g = groups->entries + i; -+ -+ if (BCH_GROUP_DELETED(g)) -+ continue; -+ -+ len = strnlen(g->label, sizeof(g->label)); -+ if (!len) { -+ prt_printf(err, "label %u empty", i); -+ return -BCH_ERR_invalid_sb_disk_groups; -+ } -+ } -+ -+ sorted = kmalloc_array(nr_groups, sizeof(*sorted), GFP_KERNEL); -+ if (!sorted) -+ return -BCH_ERR_ENOMEM_disk_groups_validate; -+ -+ memcpy(sorted, groups->entries, nr_groups * sizeof(*sorted)); -+ sort(sorted, nr_groups, sizeof(*sorted), group_cmp, NULL); -+ -+ for (g = sorted; g + 1 < sorted + nr_groups; g++) -+ if (!BCH_GROUP_DELETED(g) && -+ !group_cmp(&g[0], &g[1])) { -+ prt_printf(err, "duplicate label %llu.%.*s", -+ BCH_GROUP_PARENT(g), -+ (int) sizeof(g->label), g->label); -+ ret = -BCH_ERR_invalid_sb_disk_groups; -+ goto err; -+ } -+err: -+ kfree(sorted); -+ return ret; -+} -+ -+void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ struct bch_disk_groups_cpu *g; -+ struct bch_dev *ca; -+ int i; -+ unsigned iter; -+ -+ out->atomic++; -+ rcu_read_lock(); -+ -+ g = rcu_dereference(c->disk_groups); -+ if (!g) -+ goto out; -+ -+ for (i = 0; i < g->nr; i++) { -+ if (i) -+ prt_printf(out, " "); -+ -+ if (g->entries[i].deleted) { -+ prt_printf(out, "[deleted]"); -+ continue; -+ } -+ -+ prt_printf(out, "[parent %d devs", g->entries[i].parent); -+ for_each_member_device_rcu(ca, c, iter, &g->entries[i].devs) -+ prt_printf(out, " %s", ca->name); -+ prt_printf(out, "]"); -+ } -+ -+out: -+ rcu_read_unlock(); -+ out->atomic--; -+} -+ -+static void bch2_sb_disk_groups_to_text(struct printbuf *out, -+ struct bch_sb *sb, -+ struct bch_sb_field *f) -+{ -+ struct bch_sb_field_disk_groups *groups = -+ field_to_type(f, disk_groups); -+ struct bch_disk_group *g; -+ unsigned nr_groups = disk_groups_nr(groups); -+ -+ for (g = groups->entries; -+ g < groups->entries + nr_groups; -+ g++) { -+ if (g != groups->entries) -+ prt_printf(out, " "); -+ -+ if (BCH_GROUP_DELETED(g)) -+ prt_printf(out, "[deleted]"); -+ else -+ prt_printf(out, "[parent %llu name %s]", -+ BCH_GROUP_PARENT(g), g->label); -+ } -+} -+ -+const struct bch_sb_field_ops bch_sb_field_ops_disk_groups = { -+ .validate = bch2_sb_disk_groups_validate, -+ .to_text = bch2_sb_disk_groups_to_text -+}; -+ -+int bch2_sb_disk_groups_to_cpu(struct bch_fs *c) -+{ -+ struct bch_sb_field_disk_groups *groups; -+ struct bch_disk_groups_cpu *cpu_g, *old_g; -+ unsigned i, g, nr_groups; -+ -+ lockdep_assert_held(&c->sb_lock); -+ -+ groups = bch2_sb_field_get(c->disk_sb.sb, disk_groups); -+ nr_groups = disk_groups_nr(groups); -+ -+ if (!groups) -+ return 0; -+ -+ cpu_g = kzalloc(struct_size(cpu_g, entries, nr_groups), GFP_KERNEL); -+ if (!cpu_g) -+ return -BCH_ERR_ENOMEM_disk_groups_to_cpu; -+ -+ cpu_g->nr = nr_groups; -+ -+ for (i = 0; i < nr_groups; i++) { -+ struct bch_disk_group *src = &groups->entries[i]; -+ struct bch_disk_group_cpu *dst = &cpu_g->entries[i]; -+ -+ dst->deleted = BCH_GROUP_DELETED(src); -+ dst->parent = BCH_GROUP_PARENT(src); -+ memcpy(dst->label, src->label, sizeof(dst->label)); -+ } -+ -+ for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { -+ struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, i); -+ struct bch_disk_group_cpu *dst; -+ -+ if (!bch2_member_exists(&m)) -+ continue; -+ -+ g = BCH_MEMBER_GROUP(&m); -+ while (g) { -+ dst = &cpu_g->entries[g - 1]; -+ __set_bit(i, dst->devs.d); -+ g = dst->parent; -+ } -+ } -+ -+ old_g = rcu_dereference_protected(c->disk_groups, -+ lockdep_is_held(&c->sb_lock)); -+ rcu_assign_pointer(c->disk_groups, cpu_g); -+ if (old_g) -+ kfree_rcu(old_g, rcu); -+ -+ return 0; -+} -+ -+const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *c, unsigned target) -+{ -+ struct target t = target_decode(target); -+ struct bch_devs_mask *devs; -+ -+ rcu_read_lock(); -+ -+ switch (t.type) { -+ case TARGET_NULL: -+ devs = NULL; -+ break; -+ case TARGET_DEV: { -+ struct bch_dev *ca = t.dev < c->sb.nr_devices -+ ? rcu_dereference(c->devs[t.dev]) -+ : NULL; -+ devs = ca ? &ca->self : NULL; -+ break; -+ } -+ case TARGET_GROUP: { -+ struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups); -+ -+ devs = g && t.group < g->nr && !g->entries[t.group].deleted -+ ? &g->entries[t.group].devs -+ : NULL; -+ break; -+ } -+ default: -+ BUG(); -+ } -+ -+ rcu_read_unlock(); -+ -+ return devs; -+} -+ -+bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target) -+{ -+ struct target t = target_decode(target); -+ -+ switch (t.type) { -+ case TARGET_NULL: -+ return false; -+ case TARGET_DEV: -+ return dev == t.dev; -+ case TARGET_GROUP: { -+ struct bch_disk_groups_cpu *g; -+ const struct bch_devs_mask *m; -+ bool ret; -+ -+ rcu_read_lock(); -+ g = rcu_dereference(c->disk_groups); -+ m = g && t.group < g->nr && !g->entries[t.group].deleted -+ ? &g->entries[t.group].devs -+ : NULL; -+ -+ ret = m ? test_bit(dev, m->d) : false; -+ rcu_read_unlock(); -+ -+ return ret; -+ } -+ default: -+ BUG(); -+ } -+} -+ -+static int __bch2_disk_group_find(struct bch_sb_field_disk_groups *groups, -+ unsigned parent, -+ const char *name, unsigned namelen) -+{ -+ unsigned i, nr_groups = disk_groups_nr(groups); -+ -+ if (!namelen || namelen > BCH_SB_LABEL_SIZE) -+ return -EINVAL; -+ -+ for (i = 0; i < nr_groups; i++) { -+ struct bch_disk_group *g = groups->entries + i; -+ -+ if (BCH_GROUP_DELETED(g)) -+ continue; -+ -+ if (!BCH_GROUP_DELETED(g) && -+ BCH_GROUP_PARENT(g) == parent && -+ strnlen(g->label, sizeof(g->label)) == namelen && -+ !memcmp(name, g->label, namelen)) -+ return i; -+ } -+ -+ return -1; -+} -+ -+static int __bch2_disk_group_add(struct bch_sb_handle *sb, unsigned parent, -+ const char *name, unsigned namelen) -+{ -+ struct bch_sb_field_disk_groups *groups = -+ bch2_sb_field_get(sb->sb, disk_groups); -+ unsigned i, nr_groups = disk_groups_nr(groups); -+ struct bch_disk_group *g; -+ -+ if (!namelen || namelen > BCH_SB_LABEL_SIZE) -+ return -EINVAL; -+ -+ for (i = 0; -+ i < nr_groups && !BCH_GROUP_DELETED(&groups->entries[i]); -+ i++) -+ ; -+ -+ if (i == nr_groups) { -+ unsigned u64s = -+ (sizeof(struct bch_sb_field_disk_groups) + -+ sizeof(struct bch_disk_group) * (nr_groups + 1)) / -+ sizeof(u64); -+ -+ groups = bch2_sb_field_resize(sb, disk_groups, u64s); -+ if (!groups) -+ return -BCH_ERR_ENOSPC_disk_label_add; -+ -+ nr_groups = disk_groups_nr(groups); -+ } -+ -+ BUG_ON(i >= nr_groups); -+ -+ g = &groups->entries[i]; -+ -+ memcpy(g->label, name, namelen); -+ if (namelen < sizeof(g->label)) -+ g->label[namelen] = '\0'; -+ SET_BCH_GROUP_DELETED(g, 0); -+ SET_BCH_GROUP_PARENT(g, parent); -+ SET_BCH_GROUP_DATA_ALLOWED(g, ~0); -+ -+ return i; -+} -+ -+int bch2_disk_path_find(struct bch_sb_handle *sb, const char *name) -+{ -+ struct bch_sb_field_disk_groups *groups = -+ bch2_sb_field_get(sb->sb, disk_groups); -+ int v = -1; -+ -+ do { -+ const char *next = strchrnul(name, '.'); -+ unsigned len = next - name; -+ -+ if (*next == '.') -+ next++; -+ -+ v = __bch2_disk_group_find(groups, v + 1, name, len); -+ name = next; -+ } while (*name && v >= 0); -+ -+ return v; -+} -+ -+int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name) -+{ -+ struct bch_sb_field_disk_groups *groups; -+ unsigned parent = 0; -+ int v = -1; -+ -+ do { -+ const char *next = strchrnul(name, '.'); -+ unsigned len = next - name; -+ -+ if (*next == '.') -+ next++; -+ -+ groups = bch2_sb_field_get(sb->sb, disk_groups); -+ -+ v = __bch2_disk_group_find(groups, parent, name, len); -+ if (v < 0) -+ v = __bch2_disk_group_add(sb, parent, name, len); -+ if (v < 0) -+ return v; -+ -+ parent = v + 1; -+ name = next; -+ } while (*name && v >= 0); -+ -+ return v; -+} -+ -+void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) -+{ -+ struct bch_disk_groups_cpu *groups; -+ struct bch_disk_group_cpu *g; -+ unsigned nr = 0; -+ u16 path[32]; -+ -+ out->atomic++; -+ rcu_read_lock(); -+ groups = rcu_dereference(c->disk_groups); -+ if (!groups) -+ goto invalid; -+ -+ while (1) { -+ if (nr == ARRAY_SIZE(path)) -+ goto invalid; -+ -+ if (v >= groups->nr) -+ goto invalid; -+ -+ g = groups->entries + v; -+ -+ if (g->deleted) -+ goto invalid; -+ -+ path[nr++] = v; -+ -+ if (!g->parent) -+ break; -+ -+ v = g->parent - 1; -+ } -+ -+ while (nr) { -+ v = path[--nr]; -+ g = groups->entries + v; -+ -+ prt_printf(out, "%.*s", (int) sizeof(g->label), g->label); -+ if (nr) -+ prt_printf(out, "."); -+ } -+out: -+ rcu_read_unlock(); -+ out->atomic--; -+ return; -+invalid: -+ prt_printf(out, "invalid label %u", v); -+ goto out; -+} -+ -+void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) -+{ -+ struct bch_sb_field_disk_groups *groups = -+ bch2_sb_field_get(sb, disk_groups); -+ struct bch_disk_group *g; -+ unsigned nr = 0; -+ u16 path[32]; -+ -+ while (1) { -+ if (nr == ARRAY_SIZE(path)) -+ goto inval; -+ -+ if (v >= disk_groups_nr(groups)) -+ goto inval; -+ -+ g = groups->entries + v; -+ -+ if (BCH_GROUP_DELETED(g)) -+ goto inval; -+ -+ path[nr++] = v; -+ -+ if (!BCH_GROUP_PARENT(g)) -+ break; -+ -+ v = BCH_GROUP_PARENT(g) - 1; -+ } -+ -+ while (nr) { -+ v = path[--nr]; -+ g = groups->entries + v; -+ -+ prt_printf(out, "%.*s", (int) sizeof(g->label), g->label); -+ if (nr) -+ prt_printf(out, "."); -+ } -+ return; -+inval: -+ prt_printf(out, "invalid label %u", v); -+} -+ -+int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) -+{ -+ struct bch_member *mi; -+ int ret, v = -1; -+ -+ if (!strlen(name) || !strcmp(name, "none")) -+ return 0; -+ -+ v = bch2_disk_path_find_or_create(&c->disk_sb, name); -+ if (v < 0) -+ return v; -+ -+ ret = bch2_sb_disk_groups_to_cpu(c); -+ if (ret) -+ return ret; -+ -+ mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -+ SET_BCH_MEMBER_GROUP(mi, v + 1); -+ return 0; -+} -+ -+int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) -+{ -+ int ret; -+ -+ mutex_lock(&c->sb_lock); -+ ret = __bch2_dev_group_set(c, ca, name) ?: -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ -+ return ret; -+} -+ -+int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res, -+ struct printbuf *err) -+{ -+ struct bch_dev *ca; -+ int g; -+ -+ if (!val) -+ return -EINVAL; -+ -+ if (!c) -+ return 0; -+ -+ if (!strlen(val) || !strcmp(val, "none")) { -+ *res = 0; -+ return 0; -+ } -+ -+ /* Is it a device? */ -+ ca = bch2_dev_lookup(c, val); -+ if (!IS_ERR(ca)) { -+ *res = dev_to_target(ca->dev_idx); -+ percpu_ref_put(&ca->ref); -+ return 0; -+ } -+ -+ mutex_lock(&c->sb_lock); -+ g = bch2_disk_path_find(&c->disk_sb, val); -+ mutex_unlock(&c->sb_lock); -+ -+ if (g >= 0) { -+ *res = group_to_target(g); -+ return 0; -+ } -+ -+ return -EINVAL; -+} -+ -+void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) -+{ -+ struct target t = target_decode(v); -+ -+ switch (t.type) { -+ case TARGET_NULL: -+ prt_printf(out, "none"); -+ break; -+ case TARGET_DEV: { -+ struct bch_dev *ca; -+ -+ rcu_read_lock(); -+ ca = t.dev < c->sb.nr_devices -+ ? rcu_dereference(c->devs[t.dev]) -+ : NULL; -+ -+ if (ca && percpu_ref_tryget(&ca->io_ref)) { -+ prt_printf(out, "/dev/%pg", ca->disk_sb.bdev); -+ percpu_ref_put(&ca->io_ref); -+ } else if (ca) { -+ prt_printf(out, "offline device %u", t.dev); -+ } else { -+ prt_printf(out, "invalid device %u", t.dev); -+ } -+ -+ rcu_read_unlock(); -+ break; -+ } -+ case TARGET_GROUP: -+ bch2_disk_path_to_text(out, c, t.group); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) -+{ -+ struct target t = target_decode(v); -+ -+ switch (t.type) { -+ case TARGET_NULL: -+ prt_printf(out, "none"); -+ break; -+ case TARGET_DEV: { -+ struct bch_member m = bch2_sb_member_get(sb, t.dev); -+ -+ if (bch2_dev_exists(sb, t.dev)) { -+ prt_printf(out, "Device "); -+ pr_uuid(out, m.uuid.b); -+ prt_printf(out, " (%u)", t.dev); -+ } else { -+ prt_printf(out, "Bad device %u", t.dev); -+ } -+ break; -+ } -+ case TARGET_GROUP: -+ bch2_disk_path_to_text_sb(out, sb, t.group); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+void bch2_opt_target_to_text(struct printbuf *out, -+ struct bch_fs *c, -+ struct bch_sb *sb, -+ u64 v) -+{ -+ if (c) -+ bch2_target_to_text(out, c, v); -+ else -+ bch2_target_to_text_sb(out, sb, v); -+} -diff --git a/fs/bcachefs/disk_groups.h b/fs/bcachefs/disk_groups.h -new file mode 100644 -index 000000000000..441826fff224 ---- /dev/null -+++ b/fs/bcachefs/disk_groups.h -@@ -0,0 +1,111 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_DISK_GROUPS_H -+#define _BCACHEFS_DISK_GROUPS_H -+ -+#include "disk_groups_types.h" -+ -+extern const struct bch_sb_field_ops bch_sb_field_ops_disk_groups; -+ -+static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups) -+{ -+ return groups -+ ? (vstruct_end(&groups->field) - -+ (void *) &groups->entries[0]) / sizeof(struct bch_disk_group) -+ : 0; -+} -+ -+struct target { -+ enum { -+ TARGET_NULL, -+ TARGET_DEV, -+ TARGET_GROUP, -+ } type; -+ union { -+ unsigned dev; -+ unsigned group; -+ }; -+}; -+ -+#define TARGET_DEV_START 1 -+#define TARGET_GROUP_START (256 + TARGET_DEV_START) -+ -+static inline u16 dev_to_target(unsigned dev) -+{ -+ return TARGET_DEV_START + dev; -+} -+ -+static inline u16 group_to_target(unsigned group) -+{ -+ return TARGET_GROUP_START + group; -+} -+ -+static inline struct target target_decode(unsigned target) -+{ -+ if (target >= TARGET_GROUP_START) -+ return (struct target) { -+ .type = TARGET_GROUP, -+ .group = target - TARGET_GROUP_START -+ }; -+ -+ if (target >= TARGET_DEV_START) -+ return (struct target) { -+ .type = TARGET_DEV, -+ .group = target - TARGET_DEV_START -+ }; -+ -+ return (struct target) { .type = TARGET_NULL }; -+} -+ -+const struct bch_devs_mask *bch2_target_to_mask(struct bch_fs *, unsigned); -+ -+static inline struct bch_devs_mask target_rw_devs(struct bch_fs *c, -+ enum bch_data_type data_type, -+ u16 target) -+{ -+ struct bch_devs_mask devs = c->rw_devs[data_type]; -+ const struct bch_devs_mask *t = bch2_target_to_mask(c, target); -+ -+ if (t) -+ bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX); -+ return devs; -+} -+ -+static inline bool bch2_target_accepts_data(struct bch_fs *c, -+ enum bch_data_type data_type, -+ u16 target) -+{ -+ struct bch_devs_mask rw_devs = target_rw_devs(c, data_type, target); -+ return !bitmap_empty(rw_devs.d, BCH_SB_MEMBERS_MAX); -+} -+ -+bool bch2_dev_in_target(struct bch_fs *, unsigned, unsigned); -+ -+int bch2_disk_path_find(struct bch_sb_handle *, const char *); -+ -+/* Exported for userspace bcachefs-tools: */ -+int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *); -+ -+void bch2_disk_path_to_text(struct printbuf *, struct bch_fs *, unsigned); -+void bch2_disk_path_to_text_sb(struct printbuf *, struct bch_sb *, unsigned); -+ -+void bch2_target_to_text(struct printbuf *out, struct bch_fs *, unsigned); -+ -+int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *, struct printbuf *); -+void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); -+ -+#define bch2_opt_target (struct bch_opt_fn) { \ -+ .parse = bch2_opt_target_parse, \ -+ .to_text = bch2_opt_target_to_text, \ -+} -+ -+int bch2_sb_disk_groups_to_cpu(struct bch_fs *); -+ -+int __bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *); -+int bch2_dev_group_set(struct bch_fs *, struct bch_dev *, const char *); -+ -+const char *bch2_sb_validate_disk_groups(struct bch_sb *, -+ struct bch_sb_field *); -+ -+void bch2_disk_groups_to_text(struct printbuf *, struct bch_fs *); -+ -+#endif /* _BCACHEFS_DISK_GROUPS_H */ -diff --git a/fs/bcachefs/disk_groups_types.h b/fs/bcachefs/disk_groups_types.h -new file mode 100644 -index 000000000000..a54ef085b13d ---- /dev/null -+++ b/fs/bcachefs/disk_groups_types.h -@@ -0,0 +1,18 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_DISK_GROUPS_TYPES_H -+#define _BCACHEFS_DISK_GROUPS_TYPES_H -+ -+struct bch_disk_group_cpu { -+ bool deleted; -+ u16 parent; -+ u8 label[BCH_SB_LABEL_SIZE]; -+ struct bch_devs_mask devs; -+}; -+ -+struct bch_disk_groups_cpu { -+ struct rcu_head rcu; -+ unsigned nr; -+ struct bch_disk_group_cpu entries[] __counted_by(nr); -+}; -+ -+#endif /* _BCACHEFS_DISK_GROUPS_TYPES_H */ -diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c -new file mode 100644 -index 000000000000..875f7c5a6fca ---- /dev/null -+++ b/fs/bcachefs/ec.c -@@ -0,0 +1,1969 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+/* erasure coding */ -+ -+#include "bcachefs.h" -+#include "alloc_foreground.h" -+#include "backpointers.h" -+#include "bkey_buf.h" -+#include "bset.h" -+#include "btree_gc.h" -+#include "btree_update.h" -+#include "btree_write_buffer.h" -+#include "buckets.h" -+#include "checksum.h" -+#include "disk_groups.h" -+#include "ec.h" -+#include "error.h" -+#include "io_read.h" -+#include "keylist.h" -+#include "recovery.h" -+#include "replicas.h" -+#include "super-io.h" -+#include "util.h" -+ -+#include -+ -+#ifdef __KERNEL__ -+ -+#include -+#include -+ -+static void raid5_recov(unsigned disks, unsigned failed_idx, -+ size_t size, void **data) -+{ -+ unsigned i = 2, nr; -+ -+ BUG_ON(failed_idx >= disks); -+ -+ swap(data[0], data[failed_idx]); -+ memcpy(data[0], data[1], size); -+ -+ while (i < disks) { -+ nr = min_t(unsigned, disks - i, MAX_XOR_BLOCKS); -+ xor_blocks(nr, size, data[0], data + i); -+ i += nr; -+ } -+ -+ swap(data[0], data[failed_idx]); -+} -+ -+static void raid_gen(int nd, int np, size_t size, void **v) -+{ -+ if (np >= 1) -+ raid5_recov(nd + np, nd, size, v); -+ if (np >= 2) -+ raid6_call.gen_syndrome(nd + np, size, v); -+ BUG_ON(np > 2); -+} -+ -+static void raid_rec(int nr, int *ir, int nd, int np, size_t size, void **v) -+{ -+ switch (nr) { -+ case 0: -+ break; -+ case 1: -+ if (ir[0] < nd + 1) -+ raid5_recov(nd + 1, ir[0], size, v); -+ else -+ raid6_call.gen_syndrome(nd + np, size, v); -+ break; -+ case 2: -+ if (ir[1] < nd) { -+ /* data+data failure. */ -+ raid6_2data_recov(nd + np, size, ir[0], ir[1], v); -+ } else if (ir[0] < nd) { -+ /* data + p/q failure */ -+ -+ if (ir[1] == nd) /* data + p failure */ -+ raid6_datap_recov(nd + np, size, ir[0], v); -+ else { /* data + q failure */ -+ raid5_recov(nd + 1, ir[0], size, v); -+ raid6_call.gen_syndrome(nd + np, size, v); -+ } -+ } else { -+ raid_gen(nd, np, size, v); -+ } -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+#else -+ -+#include -+ -+#endif -+ -+struct ec_bio { -+ struct bch_dev *ca; -+ struct ec_stripe_buf *buf; -+ size_t idx; -+ struct bio bio; -+}; -+ -+/* Stripes btree keys: */ -+ -+int bch2_stripe_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; -+ int ret = 0; -+ -+ bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) || -+ bpos_gt(k.k->p, POS(0, U32_MAX)), c, err, -+ stripe_pos_bad, -+ "stripe at bad pos"); -+ -+ bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), c, err, -+ stripe_val_size_bad, -+ "incorrect value size (%zu < %u)", -+ bkey_val_u64s(k.k), stripe_val_u64s(s)); -+ -+ ret = bch2_bkey_ptrs_invalid(c, k, flags, err); -+fsck_err: -+ return ret; -+} -+ -+void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; -+ unsigned i, nr_data = s->nr_blocks - s->nr_redundant; -+ -+ prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u", -+ s->algorithm, -+ le16_to_cpu(s->sectors), -+ nr_data, -+ s->nr_redundant, -+ s->csum_type, -+ 1U << s->csum_granularity_bits); -+ -+ for (i = 0; i < s->nr_blocks; i++) { -+ const struct bch_extent_ptr *ptr = s->ptrs + i; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); -+ u32 offset; -+ u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); -+ -+ prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset); -+ if (i < nr_data) -+ prt_printf(out, "#%u", stripe_blockcount_get(s, i)); -+ prt_printf(out, " gen %u", ptr->gen); -+ if (ptr_stale(ca, ptr)) -+ prt_printf(out, " stale"); -+ } -+} -+ -+/* returns blocknr in stripe that we matched: */ -+static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s, -+ struct bkey_s_c k, unsigned *block) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const struct bch_extent_ptr *ptr; -+ unsigned i, nr_data = s->nr_blocks - s->nr_redundant; -+ -+ bkey_for_each_ptr(ptrs, ptr) -+ for (i = 0; i < nr_data; i++) -+ if (__bch2_ptr_matches_stripe(&s->ptrs[i], ptr, -+ le16_to_cpu(s->sectors))) { -+ *block = i; -+ return ptr; -+ } -+ -+ return NULL; -+} -+ -+static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx) -+{ -+ switch (k.k->type) { -+ case KEY_TYPE_extent: { -+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k); -+ const union bch_extent_entry *entry; -+ -+ extent_for_each_entry(e, entry) -+ if (extent_entry_type(entry) == -+ BCH_EXTENT_ENTRY_stripe_ptr && -+ entry->stripe_ptr.idx == idx) -+ return true; -+ -+ break; -+ } -+ } -+ -+ return false; -+} -+ -+/* Stripe bufs: */ -+ -+static void ec_stripe_buf_exit(struct ec_stripe_buf *buf) -+{ -+ if (buf->key.k.type == KEY_TYPE_stripe) { -+ struct bkey_i_stripe *s = bkey_i_to_stripe(&buf->key); -+ unsigned i; -+ -+ for (i = 0; i < s->v.nr_blocks; i++) { -+ kvpfree(buf->data[i], buf->size << 9); -+ buf->data[i] = NULL; -+ } -+ } -+} -+ -+/* XXX: this is a non-mempoolified memory allocation: */ -+static int ec_stripe_buf_init(struct ec_stripe_buf *buf, -+ unsigned offset, unsigned size) -+{ -+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v; -+ unsigned csum_granularity = 1U << v->csum_granularity_bits; -+ unsigned end = offset + size; -+ unsigned i; -+ -+ BUG_ON(end > le16_to_cpu(v->sectors)); -+ -+ offset = round_down(offset, csum_granularity); -+ end = min_t(unsigned, le16_to_cpu(v->sectors), -+ round_up(end, csum_granularity)); -+ -+ buf->offset = offset; -+ buf->size = end - offset; -+ -+ memset(buf->valid, 0xFF, sizeof(buf->valid)); -+ -+ for (i = 0; i < v->nr_blocks; i++) { -+ buf->data[i] = kvpmalloc(buf->size << 9, GFP_KERNEL); -+ if (!buf->data[i]) -+ goto err; -+ } -+ -+ return 0; -+err: -+ ec_stripe_buf_exit(buf); -+ return -BCH_ERR_ENOMEM_stripe_buf; -+} -+ -+/* Checksumming: */ -+ -+static struct bch_csum ec_block_checksum(struct ec_stripe_buf *buf, -+ unsigned block, unsigned offset) -+{ -+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v; -+ unsigned csum_granularity = 1 << v->csum_granularity_bits; -+ unsigned end = buf->offset + buf->size; -+ unsigned len = min(csum_granularity, end - offset); -+ -+ BUG_ON(offset >= end); -+ BUG_ON(offset < buf->offset); -+ BUG_ON(offset & (csum_granularity - 1)); -+ BUG_ON(offset + len != le16_to_cpu(v->sectors) && -+ (len & (csum_granularity - 1))); -+ -+ return bch2_checksum(NULL, v->csum_type, -+ null_nonce(), -+ buf->data[block] + ((offset - buf->offset) << 9), -+ len << 9); -+} -+ -+static void ec_generate_checksums(struct ec_stripe_buf *buf) -+{ -+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v; -+ unsigned i, j, csums_per_device = stripe_csums_per_device(v); -+ -+ if (!v->csum_type) -+ return; -+ -+ BUG_ON(buf->offset); -+ BUG_ON(buf->size != le16_to_cpu(v->sectors)); -+ -+ for (i = 0; i < v->nr_blocks; i++) -+ for (j = 0; j < csums_per_device; j++) -+ stripe_csum_set(v, i, j, -+ ec_block_checksum(buf, i, j << v->csum_granularity_bits)); -+} -+ -+static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) -+{ -+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v; -+ unsigned csum_granularity = 1 << v->csum_granularity_bits; -+ unsigned i; -+ -+ if (!v->csum_type) -+ return; -+ -+ for (i = 0; i < v->nr_blocks; i++) { -+ unsigned offset = buf->offset; -+ unsigned end = buf->offset + buf->size; -+ -+ if (!test_bit(i, buf->valid)) -+ continue; -+ -+ while (offset < end) { -+ unsigned j = offset >> v->csum_granularity_bits; -+ unsigned len = min(csum_granularity, end - offset); -+ struct bch_csum want = stripe_csum_get(v, i, j); -+ struct bch_csum got = ec_block_checksum(buf, i, offset); -+ -+ if (bch2_crc_cmp(want, got)) { -+ struct printbuf err = PRINTBUF; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev); -+ -+ prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n", -+ want.hi, want.lo, -+ got.hi, got.lo, -+ bch2_csum_types[v->csum_type]); -+ prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i); -+ bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key)); -+ bch_err_ratelimited(ca, "%s", err.buf); -+ printbuf_exit(&err); -+ -+ clear_bit(i, buf->valid); -+ -+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -+ break; -+ } -+ -+ offset += len; -+ } -+ } -+} -+ -+/* Erasure coding: */ -+ -+static void ec_generate_ec(struct ec_stripe_buf *buf) -+{ -+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v; -+ unsigned nr_data = v->nr_blocks - v->nr_redundant; -+ unsigned bytes = le16_to_cpu(v->sectors) << 9; -+ -+ raid_gen(nr_data, v->nr_redundant, bytes, buf->data); -+} -+ -+static unsigned ec_nr_failed(struct ec_stripe_buf *buf) -+{ -+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v; -+ -+ return v->nr_blocks - bitmap_weight(buf->valid, v->nr_blocks); -+} -+ -+static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf) -+{ -+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v; -+ unsigned i, failed[BCH_BKEY_PTRS_MAX], nr_failed = 0; -+ unsigned nr_data = v->nr_blocks - v->nr_redundant; -+ unsigned bytes = buf->size << 9; -+ -+ if (ec_nr_failed(buf) > v->nr_redundant) { -+ bch_err_ratelimited(c, -+ "error doing reconstruct read: unable to read enough blocks"); -+ return -1; -+ } -+ -+ for (i = 0; i < nr_data; i++) -+ if (!test_bit(i, buf->valid)) -+ failed[nr_failed++] = i; -+ -+ raid_rec(nr_failed, failed, nr_data, v->nr_redundant, bytes, buf->data); -+ return 0; -+} -+ -+/* IO: */ -+ -+static void ec_block_endio(struct bio *bio) -+{ -+ struct ec_bio *ec_bio = container_of(bio, struct ec_bio, bio); -+ struct bch_stripe *v = &bkey_i_to_stripe(&ec_bio->buf->key)->v; -+ struct bch_extent_ptr *ptr = &v->ptrs[ec_bio->idx]; -+ struct bch_dev *ca = ec_bio->ca; -+ struct closure *cl = bio->bi_private; -+ -+ if (bch2_dev_io_err_on(bio->bi_status, ca, -+ bio_data_dir(bio) -+ ? BCH_MEMBER_ERROR_write -+ : BCH_MEMBER_ERROR_read, -+ "erasure coding %s error: %s", -+ bio_data_dir(bio) ? "write" : "read", -+ bch2_blk_status_to_str(bio->bi_status))) -+ clear_bit(ec_bio->idx, ec_bio->buf->valid); -+ -+ if (ptr_stale(ca, ptr)) { -+ bch_err_ratelimited(ca->fs, -+ "error %s stripe: stale pointer after io", -+ bio_data_dir(bio) == READ ? "reading from" : "writing to"); -+ clear_bit(ec_bio->idx, ec_bio->buf->valid); -+ } -+ -+ bio_put(&ec_bio->bio); -+ percpu_ref_put(&ca->io_ref); -+ closure_put(cl); -+} -+ -+static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, -+ blk_opf_t opf, unsigned idx, struct closure *cl) -+{ -+ struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v; -+ unsigned offset = 0, bytes = buf->size << 9; -+ struct bch_extent_ptr *ptr = &v->ptrs[idx]; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); -+ enum bch_data_type data_type = idx < v->nr_blocks - v->nr_redundant -+ ? BCH_DATA_user -+ : BCH_DATA_parity; -+ int rw = op_is_write(opf); -+ -+ if (ptr_stale(ca, ptr)) { -+ bch_err_ratelimited(c, -+ "error %s stripe: stale pointer", -+ rw == READ ? "reading from" : "writing to"); -+ clear_bit(idx, buf->valid); -+ return; -+ } -+ -+ if (!bch2_dev_get_ioref(ca, rw)) { -+ clear_bit(idx, buf->valid); -+ return; -+ } -+ -+ this_cpu_add(ca->io_done->sectors[rw][data_type], buf->size); -+ -+ while (offset < bytes) { -+ unsigned nr_iovecs = min_t(size_t, BIO_MAX_VECS, -+ DIV_ROUND_UP(bytes, PAGE_SIZE)); -+ unsigned b = min_t(size_t, bytes - offset, -+ nr_iovecs << PAGE_SHIFT); -+ struct ec_bio *ec_bio; -+ -+ ec_bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, -+ nr_iovecs, -+ opf, -+ GFP_KERNEL, -+ &c->ec_bioset), -+ struct ec_bio, bio); -+ -+ ec_bio->ca = ca; -+ ec_bio->buf = buf; -+ ec_bio->idx = idx; -+ -+ ec_bio->bio.bi_iter.bi_sector = ptr->offset + buf->offset + (offset >> 9); -+ ec_bio->bio.bi_end_io = ec_block_endio; -+ ec_bio->bio.bi_private = cl; -+ -+ bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b); -+ -+ closure_get(cl); -+ percpu_ref_get(&ca->io_ref); -+ -+ submit_bio(&ec_bio->bio); -+ -+ offset += b; -+ } -+ -+ percpu_ref_put(&ca->io_ref); -+} -+ -+static int get_stripe_key_trans(struct btree_trans *trans, u64 idx, -+ struct ec_stripe_buf *stripe) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes, -+ POS(0, idx), BTREE_ITER_SLOTS); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ if (k.k->type != KEY_TYPE_stripe) { -+ ret = -ENOENT; -+ goto err; -+ } -+ bkey_reassemble(&stripe->key, k); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+/* recovery read path: */ -+int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio) -+{ -+ struct bch_fs *c = trans->c; -+ struct ec_stripe_buf *buf; -+ struct closure cl; -+ struct bch_stripe *v; -+ unsigned i, offset; -+ int ret = 0; -+ -+ closure_init_stack(&cl); -+ -+ BUG_ON(!rbio->pick.has_ec); -+ -+ buf = kzalloc(sizeof(*buf), GFP_NOFS); -+ if (!buf) -+ return -BCH_ERR_ENOMEM_ec_read_extent; -+ -+ ret = lockrestart_do(trans, get_stripe_key_trans(trans, rbio->pick.ec.idx, buf)); -+ if (ret) { -+ bch_err_ratelimited(c, -+ "error doing reconstruct read: error %i looking up stripe", ret); -+ kfree(buf); -+ return -EIO; -+ } -+ -+ v = &bkey_i_to_stripe(&buf->key)->v; -+ -+ if (!bch2_ptr_matches_stripe(v, rbio->pick)) { -+ bch_err_ratelimited(c, -+ "error doing reconstruct read: pointer doesn't match stripe"); -+ ret = -EIO; -+ goto err; -+ } -+ -+ offset = rbio->bio.bi_iter.bi_sector - v->ptrs[rbio->pick.ec.block].offset; -+ if (offset + bio_sectors(&rbio->bio) > le16_to_cpu(v->sectors)) { -+ bch_err_ratelimited(c, -+ "error doing reconstruct read: read is bigger than stripe"); -+ ret = -EIO; -+ goto err; -+ } -+ -+ ret = ec_stripe_buf_init(buf, offset, bio_sectors(&rbio->bio)); -+ if (ret) -+ goto err; -+ -+ for (i = 0; i < v->nr_blocks; i++) -+ ec_block_io(c, buf, REQ_OP_READ, i, &cl); -+ -+ closure_sync(&cl); -+ -+ if (ec_nr_failed(buf) > v->nr_redundant) { -+ bch_err_ratelimited(c, -+ "error doing reconstruct read: unable to read enough blocks"); -+ ret = -EIO; -+ goto err; -+ } -+ -+ ec_validate_checksums(c, buf); -+ -+ ret = ec_do_recov(c, buf); -+ if (ret) -+ goto err; -+ -+ memcpy_to_bio(&rbio->bio, rbio->bio.bi_iter, -+ buf->data[rbio->pick.ec.block] + ((offset - buf->offset) << 9)); -+err: -+ ec_stripe_buf_exit(buf); -+ kfree(buf); -+ return ret; -+} -+ -+/* stripe bucket accounting: */ -+ -+static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) -+{ -+ ec_stripes_heap n, *h = &c->ec_stripes_heap; -+ -+ if (idx >= h->size) { -+ if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), gfp)) -+ return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc; -+ -+ mutex_lock(&c->ec_stripes_heap_lock); -+ if (n.size > h->size) { -+ memcpy(n.data, h->data, h->used * sizeof(h->data[0])); -+ n.used = h->used; -+ swap(*h, n); -+ } -+ mutex_unlock(&c->ec_stripes_heap_lock); -+ -+ free_heap(&n); -+ } -+ -+ if (!genradix_ptr_alloc(&c->stripes, idx, gfp)) -+ return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc; -+ -+ if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING && -+ !genradix_ptr_alloc(&c->gc_stripes, idx, gfp)) -+ return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc; -+ -+ return 0; -+} -+ -+static int ec_stripe_mem_alloc(struct btree_trans *trans, -+ struct btree_iter *iter) -+{ -+ return allocate_dropping_locks_errcode(trans, -+ __ec_stripe_mem_alloc(trans->c, iter->pos.offset, _gfp)); -+} -+ -+/* -+ * Hash table of open stripes: -+ * Stripes that are being created or modified are kept in a hash table, so that -+ * stripe deletion can skip them. -+ */ -+ -+static bool __bch2_stripe_is_open(struct bch_fs *c, u64 idx) -+{ -+ unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new))); -+ struct ec_stripe_new *s; -+ -+ hlist_for_each_entry(s, &c->ec_stripes_new[hash], hash) -+ if (s->idx == idx) -+ return true; -+ return false; -+} -+ -+static bool bch2_stripe_is_open(struct bch_fs *c, u64 idx) -+{ -+ bool ret = false; -+ -+ spin_lock(&c->ec_stripes_new_lock); -+ ret = __bch2_stripe_is_open(c, idx); -+ spin_unlock(&c->ec_stripes_new_lock); -+ -+ return ret; -+} -+ -+static bool bch2_try_open_stripe(struct bch_fs *c, -+ struct ec_stripe_new *s, -+ u64 idx) -+{ -+ bool ret; -+ -+ spin_lock(&c->ec_stripes_new_lock); -+ ret = !__bch2_stripe_is_open(c, idx); -+ if (ret) { -+ unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new))); -+ -+ s->idx = idx; -+ hlist_add_head(&s->hash, &c->ec_stripes_new[hash]); -+ } -+ spin_unlock(&c->ec_stripes_new_lock); -+ -+ return ret; -+} -+ -+static void bch2_stripe_close(struct bch_fs *c, struct ec_stripe_new *s) -+{ -+ BUG_ON(!s->idx); -+ -+ spin_lock(&c->ec_stripes_new_lock); -+ hlist_del_init(&s->hash); -+ spin_unlock(&c->ec_stripes_new_lock); -+ -+ s->idx = 0; -+} -+ -+/* Heap of all existing stripes, ordered by blocks_nonempty */ -+ -+static u64 stripe_idx_to_delete(struct bch_fs *c) -+{ -+ ec_stripes_heap *h = &c->ec_stripes_heap; -+ -+ lockdep_assert_held(&c->ec_stripes_heap_lock); -+ -+ if (h->used && -+ h->data[0].blocks_nonempty == 0 && -+ !bch2_stripe_is_open(c, h->data[0].idx)) -+ return h->data[0].idx; -+ -+ return 0; -+} -+ -+static inline int ec_stripes_heap_cmp(ec_stripes_heap *h, -+ struct ec_stripe_heap_entry l, -+ struct ec_stripe_heap_entry r) -+{ -+ return ((l.blocks_nonempty > r.blocks_nonempty) - -+ (l.blocks_nonempty < r.blocks_nonempty)); -+} -+ -+static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h, -+ size_t i) -+{ -+ struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap); -+ -+ genradix_ptr(&c->stripes, h->data[i].idx)->heap_idx = i; -+} -+ -+static void heap_verify_backpointer(struct bch_fs *c, size_t idx) -+{ -+ ec_stripes_heap *h = &c->ec_stripes_heap; -+ struct stripe *m = genradix_ptr(&c->stripes, idx); -+ -+ BUG_ON(m->heap_idx >= h->used); -+ BUG_ON(h->data[m->heap_idx].idx != idx); -+} -+ -+void bch2_stripes_heap_del(struct bch_fs *c, -+ struct stripe *m, size_t idx) -+{ -+ mutex_lock(&c->ec_stripes_heap_lock); -+ heap_verify_backpointer(c, idx); -+ -+ heap_del(&c->ec_stripes_heap, m->heap_idx, -+ ec_stripes_heap_cmp, -+ ec_stripes_heap_set_backpointer); -+ mutex_unlock(&c->ec_stripes_heap_lock); -+} -+ -+void bch2_stripes_heap_insert(struct bch_fs *c, -+ struct stripe *m, size_t idx) -+{ -+ mutex_lock(&c->ec_stripes_heap_lock); -+ BUG_ON(heap_full(&c->ec_stripes_heap)); -+ -+ heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) { -+ .idx = idx, -+ .blocks_nonempty = m->blocks_nonempty, -+ }), -+ ec_stripes_heap_cmp, -+ ec_stripes_heap_set_backpointer); -+ -+ heap_verify_backpointer(c, idx); -+ mutex_unlock(&c->ec_stripes_heap_lock); -+} -+ -+void bch2_stripes_heap_update(struct bch_fs *c, -+ struct stripe *m, size_t idx) -+{ -+ ec_stripes_heap *h = &c->ec_stripes_heap; -+ bool do_deletes; -+ size_t i; -+ -+ mutex_lock(&c->ec_stripes_heap_lock); -+ heap_verify_backpointer(c, idx); -+ -+ h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty; -+ -+ i = m->heap_idx; -+ heap_sift_up(h, i, ec_stripes_heap_cmp, -+ ec_stripes_heap_set_backpointer); -+ heap_sift_down(h, i, ec_stripes_heap_cmp, -+ ec_stripes_heap_set_backpointer); -+ -+ heap_verify_backpointer(c, idx); -+ -+ do_deletes = stripe_idx_to_delete(c) != 0; -+ mutex_unlock(&c->ec_stripes_heap_lock); -+ -+ if (do_deletes) -+ bch2_do_stripe_deletes(c); -+} -+ -+/* stripe deletion */ -+ -+static int ec_stripe_delete(struct btree_trans *trans, u64 idx) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bkey_s_c_stripe s; -+ int ret; -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes, POS(0, idx), -+ BTREE_ITER_INTENT); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (k.k->type != KEY_TYPE_stripe) { -+ bch2_fs_inconsistent(c, "attempting to delete nonexistent stripe %llu", idx); -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ s = bkey_s_c_to_stripe(k); -+ for (unsigned i = 0; i < s.v->nr_blocks; i++) -+ if (stripe_blockcount_get(s.v, i)) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, k); -+ bch2_fs_inconsistent(c, "attempting to delete nonempty stripe %s", buf.buf); -+ printbuf_exit(&buf); -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ ret = bch2_btree_delete_at(trans, &iter, 0); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static void ec_stripe_delete_work(struct work_struct *work) -+{ -+ struct bch_fs *c = -+ container_of(work, struct bch_fs, ec_stripe_delete_work); -+ struct btree_trans *trans = bch2_trans_get(c); -+ int ret; -+ u64 idx; -+ -+ while (1) { -+ mutex_lock(&c->ec_stripes_heap_lock); -+ idx = stripe_idx_to_delete(c); -+ mutex_unlock(&c->ec_stripes_heap_lock); -+ -+ if (!idx) -+ break; -+ -+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, -+ ec_stripe_delete(trans, idx)); -+ if (ret) { -+ bch_err_fn(c, ret); -+ break; -+ } -+ } -+ -+ bch2_trans_put(trans); -+ -+ bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete); -+} -+ -+void bch2_do_stripe_deletes(struct bch_fs *c) -+{ -+ if (bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_delete) && -+ !queue_work(c->write_ref_wq, &c->ec_stripe_delete_work)) -+ bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete); -+} -+ -+/* stripe creation: */ -+ -+static int ec_stripe_key_update(struct btree_trans *trans, -+ struct bkey_i_stripe *new, -+ bool create) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes, -+ new->k.p, BTREE_ITER_INTENT); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (k.k->type != (create ? KEY_TYPE_deleted : KEY_TYPE_stripe)) { -+ bch2_fs_inconsistent(c, "error %s stripe: got existing key type %s", -+ create ? "creating" : "updating", -+ bch2_bkey_types[k.k->type]); -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ if (k.k->type == KEY_TYPE_stripe) { -+ const struct bch_stripe *old = bkey_s_c_to_stripe(k).v; -+ unsigned i; -+ -+ if (old->nr_blocks != new->v.nr_blocks) { -+ bch_err(c, "error updating stripe: nr_blocks does not match"); -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ for (i = 0; i < new->v.nr_blocks; i++) { -+ unsigned v = stripe_blockcount_get(old, i); -+ -+ BUG_ON(v && -+ (old->ptrs[i].dev != new->v.ptrs[i].dev || -+ old->ptrs[i].gen != new->v.ptrs[i].gen || -+ old->ptrs[i].offset != new->v.ptrs[i].offset)); -+ -+ stripe_blockcount_set(&new->v, i, v); -+ } -+ } -+ -+ ret = bch2_trans_update(trans, &iter, &new->k_i, 0); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int ec_stripe_update_extent(struct btree_trans *trans, -+ struct bpos bucket, u8 gen, -+ struct ec_stripe_buf *s, -+ struct bpos *bp_pos) -+{ -+ struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; -+ struct bch_fs *c = trans->c; -+ struct bch_backpointer bp; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ const struct bch_extent_ptr *ptr_c; -+ struct bch_extent_ptr *ptr, *ec_ptr = NULL; -+ struct bch_extent_stripe_ptr stripe_ptr; -+ struct bkey_i *n; -+ int ret, dev, block; -+ -+ ret = bch2_get_next_backpointer(trans, bucket, gen, -+ bp_pos, &bp, BTREE_ITER_CACHED); -+ if (ret) -+ return ret; -+ if (bpos_eq(*bp_pos, SPOS_MAX)) -+ return 0; -+ -+ if (bp.level) { -+ struct printbuf buf = PRINTBUF; -+ struct btree_iter node_iter; -+ struct btree *b; -+ -+ b = bch2_backpointer_get_node(trans, &node_iter, *bp_pos, bp); -+ bch2_trans_iter_exit(trans, &node_iter); -+ -+ if (!b) -+ return 0; -+ -+ prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b); -+ bch2_backpointer_to_text(&buf, &bp); -+ -+ bch2_fs_inconsistent(c, "%s", buf.buf); -+ printbuf_exit(&buf); -+ return -EIO; -+ } -+ -+ k = bch2_backpointer_get_key(trans, &iter, *bp_pos, bp, BTREE_ITER_INTENT); -+ ret = bkey_err(k); -+ if (ret) -+ return ret; -+ if (!k.k) { -+ /* -+ * extent no longer exists - we could flush the btree -+ * write buffer and retry to verify, but no need: -+ */ -+ return 0; -+ } -+ -+ if (extent_has_stripe_ptr(k, s->key.k.p.offset)) -+ goto out; -+ -+ ptr_c = bkey_matches_stripe(v, k, &block); -+ /* -+ * It doesn't generally make sense to erasure code cached ptrs: -+ * XXX: should we be incrementing a counter? -+ */ -+ if (!ptr_c || ptr_c->cached) -+ goto out; -+ -+ dev = v->ptrs[block].dev; -+ -+ n = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + sizeof(stripe_ptr)); -+ ret = PTR_ERR_OR_ZERO(n); -+ if (ret) -+ goto out; -+ -+ bkey_reassemble(n, k); -+ -+ bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, ptr->dev != dev); -+ ec_ptr = bch2_bkey_has_device(bkey_i_to_s(n), dev); -+ BUG_ON(!ec_ptr); -+ -+ stripe_ptr = (struct bch_extent_stripe_ptr) { -+ .type = 1 << BCH_EXTENT_ENTRY_stripe_ptr, -+ .block = block, -+ .redundancy = v->nr_redundant, -+ .idx = s->key.k.p.offset, -+ }; -+ -+ __extent_entry_insert(n, -+ (union bch_extent_entry *) ec_ptr, -+ (union bch_extent_entry *) &stripe_ptr); -+ -+ ret = bch2_trans_update(trans, &iter, n, 0); -+out: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_buf *s, -+ unsigned block) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; -+ struct bch_extent_ptr bucket = v->ptrs[block]; -+ struct bpos bucket_pos = PTR_BUCKET_POS(c, &bucket); -+ struct bpos bp_pos = POS_MIN; -+ int ret = 0; -+ -+ while (1) { -+ ret = commit_do(trans, NULL, NULL, -+ BTREE_INSERT_NOCHECK_RW| -+ BTREE_INSERT_NOFAIL, -+ ec_stripe_update_extent(trans, bucket_pos, bucket.gen, -+ s, &bp_pos)); -+ if (ret) -+ break; -+ if (bkey_eq(bp_pos, POS_MAX)) -+ break; -+ -+ bp_pos = bpos_nosnap_successor(bp_pos); -+ } -+ -+ return ret; -+} -+ -+static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; -+ unsigned i, nr_data = v->nr_blocks - v->nr_redundant; -+ int ret = 0; -+ -+ ret = bch2_btree_write_buffer_flush(trans); -+ if (ret) -+ goto err; -+ -+ for (i = 0; i < nr_data; i++) { -+ ret = ec_stripe_update_bucket(trans, s, i); -+ if (ret) -+ break; -+ } -+err: -+ bch2_trans_put(trans); -+ -+ return ret; -+} -+ -+static void zero_out_rest_of_ec_bucket(struct bch_fs *c, -+ struct ec_stripe_new *s, -+ unsigned block, -+ struct open_bucket *ob) -+{ -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); -+ unsigned offset = ca->mi.bucket_size - ob->sectors_free; -+ int ret; -+ -+ if (!bch2_dev_get_ioref(ca, WRITE)) { -+ s->err = -BCH_ERR_erofs_no_writes; -+ return; -+ } -+ -+ memset(s->new_stripe.data[block] + (offset << 9), -+ 0, -+ ob->sectors_free << 9); -+ -+ ret = blkdev_issue_zeroout(ca->disk_sb.bdev, -+ ob->bucket * ca->mi.bucket_size + offset, -+ ob->sectors_free, -+ GFP_KERNEL, 0); -+ -+ percpu_ref_put(&ca->io_ref); -+ -+ if (ret) -+ s->err = ret; -+} -+ -+void bch2_ec_stripe_new_free(struct bch_fs *c, struct ec_stripe_new *s) -+{ -+ if (s->idx) -+ bch2_stripe_close(c, s); -+ kfree(s); -+} -+ -+/* -+ * data buckets of new stripe all written: create the stripe -+ */ -+static void ec_stripe_create(struct ec_stripe_new *s) -+{ -+ struct bch_fs *c = s->c; -+ struct open_bucket *ob; -+ struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v; -+ unsigned i, nr_data = v->nr_blocks - v->nr_redundant; -+ int ret; -+ -+ BUG_ON(s->h->s == s); -+ -+ closure_sync(&s->iodone); -+ -+ if (!s->err) { -+ for (i = 0; i < nr_data; i++) -+ if (s->blocks[i]) { -+ ob = c->open_buckets + s->blocks[i]; -+ -+ if (ob->sectors_free) -+ zero_out_rest_of_ec_bucket(c, s, i, ob); -+ } -+ } -+ -+ if (s->err) { -+ if (!bch2_err_matches(s->err, EROFS)) -+ bch_err(c, "error creating stripe: error writing data buckets"); -+ goto err; -+ } -+ -+ if (s->have_existing_stripe) { -+ ec_validate_checksums(c, &s->existing_stripe); -+ -+ if (ec_do_recov(c, &s->existing_stripe)) { -+ bch_err(c, "error creating stripe: error reading existing stripe"); -+ goto err; -+ } -+ -+ for (i = 0; i < nr_data; i++) -+ if (stripe_blockcount_get(&bkey_i_to_stripe(&s->existing_stripe.key)->v, i)) -+ swap(s->new_stripe.data[i], -+ s->existing_stripe.data[i]); -+ -+ ec_stripe_buf_exit(&s->existing_stripe); -+ } -+ -+ BUG_ON(!s->allocated); -+ BUG_ON(!s->idx); -+ -+ ec_generate_ec(&s->new_stripe); -+ -+ ec_generate_checksums(&s->new_stripe); -+ -+ /* write p/q: */ -+ for (i = nr_data; i < v->nr_blocks; i++) -+ ec_block_io(c, &s->new_stripe, REQ_OP_WRITE, i, &s->iodone); -+ closure_sync(&s->iodone); -+ -+ if (ec_nr_failed(&s->new_stripe)) { -+ bch_err(c, "error creating stripe: error writing redundancy buckets"); -+ goto err; -+ } -+ -+ ret = bch2_trans_do(c, &s->res, NULL, -+ BTREE_INSERT_NOCHECK_RW| -+ BTREE_INSERT_NOFAIL, -+ ec_stripe_key_update(trans, -+ bkey_i_to_stripe(&s->new_stripe.key), -+ !s->have_existing_stripe)); -+ if (ret) { -+ bch_err(c, "error creating stripe: error creating stripe key"); -+ goto err; -+ } -+ -+ ret = ec_stripe_update_extents(c, &s->new_stripe); -+ if (ret) { -+ bch_err_msg(c, ret, "creating stripe: error updating pointers"); -+ goto err; -+ } -+err: -+ bch2_disk_reservation_put(c, &s->res); -+ -+ for (i = 0; i < v->nr_blocks; i++) -+ if (s->blocks[i]) { -+ ob = c->open_buckets + s->blocks[i]; -+ -+ if (i < nr_data) { -+ ob->ec = NULL; -+ __bch2_open_bucket_put(c, ob); -+ } else { -+ bch2_open_bucket_put(c, ob); -+ } -+ } -+ -+ mutex_lock(&c->ec_stripe_new_lock); -+ list_del(&s->list); -+ mutex_unlock(&c->ec_stripe_new_lock); -+ wake_up(&c->ec_stripe_new_wait); -+ -+ ec_stripe_buf_exit(&s->existing_stripe); -+ ec_stripe_buf_exit(&s->new_stripe); -+ closure_debug_destroy(&s->iodone); -+ -+ ec_stripe_new_put(c, s, STRIPE_REF_stripe); -+} -+ -+static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c) -+{ -+ struct ec_stripe_new *s; -+ -+ mutex_lock(&c->ec_stripe_new_lock); -+ list_for_each_entry(s, &c->ec_stripe_new_list, list) -+ if (!atomic_read(&s->ref[STRIPE_REF_io])) -+ goto out; -+ s = NULL; -+out: -+ mutex_unlock(&c->ec_stripe_new_lock); -+ -+ return s; -+} -+ -+static void ec_stripe_create_work(struct work_struct *work) -+{ -+ struct bch_fs *c = container_of(work, -+ struct bch_fs, ec_stripe_create_work); -+ struct ec_stripe_new *s; -+ -+ while ((s = get_pending_stripe(c))) -+ ec_stripe_create(s); -+ -+ bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create); -+} -+ -+void bch2_ec_do_stripe_creates(struct bch_fs *c) -+{ -+ bch2_write_ref_get(c, BCH_WRITE_REF_stripe_create); -+ -+ if (!queue_work(system_long_wq, &c->ec_stripe_create_work)) -+ bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create); -+} -+ -+static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h) -+{ -+ struct ec_stripe_new *s = h->s; -+ -+ BUG_ON(!s->allocated && !s->err); -+ -+ h->s = NULL; -+ s->pending = true; -+ -+ mutex_lock(&c->ec_stripe_new_lock); -+ list_add(&s->list, &c->ec_stripe_new_list); -+ mutex_unlock(&c->ec_stripe_new_lock); -+ -+ ec_stripe_new_put(c, s, STRIPE_REF_io); -+} -+ -+void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob) -+{ -+ struct ec_stripe_new *s = ob->ec; -+ -+ s->err = -EIO; -+} -+ -+void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp) -+{ -+ struct open_bucket *ob = ec_open_bucket(c, &wp->ptrs); -+ struct bch_dev *ca; -+ unsigned offset; -+ -+ if (!ob) -+ return NULL; -+ -+ BUG_ON(!ob->ec->new_stripe.data[ob->ec_idx]); -+ -+ ca = bch_dev_bkey_exists(c, ob->dev); -+ offset = ca->mi.bucket_size - ob->sectors_free; -+ -+ return ob->ec->new_stripe.data[ob->ec_idx] + (offset << 9); -+} -+ -+static int unsigned_cmp(const void *_l, const void *_r) -+{ -+ unsigned l = *((const unsigned *) _l); -+ unsigned r = *((const unsigned *) _r); -+ -+ return cmp_int(l, r); -+} -+ -+/* pick most common bucket size: */ -+static unsigned pick_blocksize(struct bch_fs *c, -+ struct bch_devs_mask *devs) -+{ -+ struct bch_dev *ca; -+ unsigned i, nr = 0, sizes[BCH_SB_MEMBERS_MAX]; -+ struct { -+ unsigned nr, size; -+ } cur = { 0, 0 }, best = { 0, 0 }; -+ -+ for_each_member_device_rcu(ca, c, i, devs) -+ sizes[nr++] = ca->mi.bucket_size; -+ -+ sort(sizes, nr, sizeof(unsigned), unsigned_cmp, NULL); -+ -+ for (i = 0; i < nr; i++) { -+ if (sizes[i] != cur.size) { -+ if (cur.nr > best.nr) -+ best = cur; -+ -+ cur.nr = 0; -+ cur.size = sizes[i]; -+ } -+ -+ cur.nr++; -+ } -+ -+ if (cur.nr > best.nr) -+ best = cur; -+ -+ return best.size; -+} -+ -+static bool may_create_new_stripe(struct bch_fs *c) -+{ -+ return false; -+} -+ -+static void ec_stripe_key_init(struct bch_fs *c, -+ struct bkey_i *k, -+ unsigned nr_data, -+ unsigned nr_parity, -+ unsigned stripe_size) -+{ -+ struct bkey_i_stripe *s = bkey_stripe_init(k); -+ unsigned u64s; -+ -+ s->v.sectors = cpu_to_le16(stripe_size); -+ s->v.algorithm = 0; -+ s->v.nr_blocks = nr_data + nr_parity; -+ s->v.nr_redundant = nr_parity; -+ s->v.csum_granularity_bits = ilog2(c->opts.encoded_extent_max >> 9); -+ s->v.csum_type = BCH_CSUM_crc32c; -+ s->v.pad = 0; -+ -+ while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) { -+ BUG_ON(1 << s->v.csum_granularity_bits >= -+ le16_to_cpu(s->v.sectors) || -+ s->v.csum_granularity_bits == U8_MAX); -+ s->v.csum_granularity_bits++; -+ } -+ -+ set_bkey_val_u64s(&s->k, u64s); -+} -+ -+static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) -+{ -+ struct ec_stripe_new *s; -+ -+ lockdep_assert_held(&h->lock); -+ -+ s = kzalloc(sizeof(*s), GFP_KERNEL); -+ if (!s) -+ return -BCH_ERR_ENOMEM_ec_new_stripe_alloc; -+ -+ mutex_init(&s->lock); -+ closure_init(&s->iodone, NULL); -+ atomic_set(&s->ref[STRIPE_REF_stripe], 1); -+ atomic_set(&s->ref[STRIPE_REF_io], 1); -+ s->c = c; -+ s->h = h; -+ s->nr_data = min_t(unsigned, h->nr_active_devs, -+ BCH_BKEY_PTRS_MAX) - h->redundancy; -+ s->nr_parity = h->redundancy; -+ -+ ec_stripe_key_init(c, &s->new_stripe.key, -+ s->nr_data, s->nr_parity, h->blocksize); -+ -+ h->s = s; -+ return 0; -+} -+ -+static struct ec_stripe_head * -+ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target, -+ unsigned algo, unsigned redundancy, -+ enum bch_watermark watermark) -+{ -+ struct ec_stripe_head *h; -+ struct bch_dev *ca; -+ unsigned i; -+ -+ h = kzalloc(sizeof(*h), GFP_KERNEL); -+ if (!h) -+ return NULL; -+ -+ mutex_init(&h->lock); -+ BUG_ON(!mutex_trylock(&h->lock)); -+ -+ h->target = target; -+ h->algo = algo; -+ h->redundancy = redundancy; -+ h->watermark = watermark; -+ -+ rcu_read_lock(); -+ h->devs = target_rw_devs(c, BCH_DATA_user, target); -+ -+ for_each_member_device_rcu(ca, c, i, &h->devs) -+ if (!ca->mi.durability) -+ __clear_bit(i, h->devs.d); -+ -+ h->blocksize = pick_blocksize(c, &h->devs); -+ -+ for_each_member_device_rcu(ca, c, i, &h->devs) -+ if (ca->mi.bucket_size == h->blocksize) -+ h->nr_active_devs++; -+ -+ rcu_read_unlock(); -+ list_add(&h->list, &c->ec_stripe_head_list); -+ return h; -+} -+ -+void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h) -+{ -+ if (h->s && -+ h->s->allocated && -+ bitmap_weight(h->s->blocks_allocated, -+ h->s->nr_data) == h->s->nr_data) -+ ec_stripe_set_pending(c, h); -+ -+ mutex_unlock(&h->lock); -+} -+ -+static struct ec_stripe_head * -+__bch2_ec_stripe_head_get(struct btree_trans *trans, -+ unsigned target, -+ unsigned algo, -+ unsigned redundancy, -+ enum bch_watermark watermark) -+{ -+ struct bch_fs *c = trans->c; -+ struct ec_stripe_head *h; -+ int ret; -+ -+ if (!redundancy) -+ return NULL; -+ -+ ret = bch2_trans_mutex_lock(trans, &c->ec_stripe_head_lock); -+ if (ret) -+ return ERR_PTR(ret); -+ -+ if (test_bit(BCH_FS_GOING_RO, &c->flags)) { -+ h = ERR_PTR(-BCH_ERR_erofs_no_writes); -+ goto found; -+ } -+ -+ list_for_each_entry(h, &c->ec_stripe_head_list, list) -+ if (h->target == target && -+ h->algo == algo && -+ h->redundancy == redundancy && -+ h->watermark == watermark) { -+ ret = bch2_trans_mutex_lock(trans, &h->lock); -+ if (ret) -+ h = ERR_PTR(ret); -+ goto found; -+ } -+ -+ h = ec_new_stripe_head_alloc(c, target, algo, redundancy, watermark); -+found: -+ mutex_unlock(&c->ec_stripe_head_lock); -+ return h; -+} -+ -+static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h, -+ enum bch_watermark watermark, struct closure *cl) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_devs_mask devs = h->devs; -+ struct open_bucket *ob; -+ struct open_buckets buckets; -+ struct bch_stripe *v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v; -+ unsigned i, j, nr_have_parity = 0, nr_have_data = 0; -+ bool have_cache = true; -+ int ret = 0; -+ -+ BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity); -+ BUG_ON(v->nr_redundant != h->s->nr_parity); -+ -+ for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) { -+ __clear_bit(v->ptrs[i].dev, devs.d); -+ if (i < h->s->nr_data) -+ nr_have_data++; -+ else -+ nr_have_parity++; -+ } -+ -+ BUG_ON(nr_have_data > h->s->nr_data); -+ BUG_ON(nr_have_parity > h->s->nr_parity); -+ -+ buckets.nr = 0; -+ if (nr_have_parity < h->s->nr_parity) { -+ ret = bch2_bucket_alloc_set_trans(trans, &buckets, -+ &h->parity_stripe, -+ &devs, -+ h->s->nr_parity, -+ &nr_have_parity, -+ &have_cache, 0, -+ BCH_DATA_parity, -+ watermark, -+ cl); -+ -+ open_bucket_for_each(c, &buckets, ob, i) { -+ j = find_next_zero_bit(h->s->blocks_gotten, -+ h->s->nr_data + h->s->nr_parity, -+ h->s->nr_data); -+ BUG_ON(j >= h->s->nr_data + h->s->nr_parity); -+ -+ h->s->blocks[j] = buckets.v[i]; -+ v->ptrs[j] = bch2_ob_ptr(c, ob); -+ __set_bit(j, h->s->blocks_gotten); -+ } -+ -+ if (ret) -+ return ret; -+ } -+ -+ buckets.nr = 0; -+ if (nr_have_data < h->s->nr_data) { -+ ret = bch2_bucket_alloc_set_trans(trans, &buckets, -+ &h->block_stripe, -+ &devs, -+ h->s->nr_data, -+ &nr_have_data, -+ &have_cache, 0, -+ BCH_DATA_user, -+ watermark, -+ cl); -+ -+ open_bucket_for_each(c, &buckets, ob, i) { -+ j = find_next_zero_bit(h->s->blocks_gotten, -+ h->s->nr_data, 0); -+ BUG_ON(j >= h->s->nr_data); -+ -+ h->s->blocks[j] = buckets.v[i]; -+ v->ptrs[j] = bch2_ob_ptr(c, ob); -+ __set_bit(j, h->s->blocks_gotten); -+ } -+ -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+/* XXX: doesn't obey target: */ -+static s64 get_existing_stripe(struct bch_fs *c, -+ struct ec_stripe_head *head) -+{ -+ ec_stripes_heap *h = &c->ec_stripes_heap; -+ struct stripe *m; -+ size_t heap_idx; -+ u64 stripe_idx; -+ s64 ret = -1; -+ -+ if (may_create_new_stripe(c)) -+ return -1; -+ -+ mutex_lock(&c->ec_stripes_heap_lock); -+ for (heap_idx = 0; heap_idx < h->used; heap_idx++) { -+ /* No blocks worth reusing, stripe will just be deleted: */ -+ if (!h->data[heap_idx].blocks_nonempty) -+ continue; -+ -+ stripe_idx = h->data[heap_idx].idx; -+ -+ m = genradix_ptr(&c->stripes, stripe_idx); -+ -+ if (m->algorithm == head->algo && -+ m->nr_redundant == head->redundancy && -+ m->sectors == head->blocksize && -+ m->blocks_nonempty < m->nr_blocks - m->nr_redundant && -+ bch2_try_open_stripe(c, head->s, stripe_idx)) { -+ ret = stripe_idx; -+ break; -+ } -+ } -+ mutex_unlock(&c->ec_stripes_heap_lock); -+ return ret; -+} -+ -+static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_stripe *new_v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v; -+ struct bch_stripe *existing_v; -+ unsigned i; -+ s64 idx; -+ int ret; -+ -+ /* -+ * If we can't allocate a new stripe, and there's no stripes with empty -+ * blocks for us to reuse, that means we have to wait on copygc: -+ */ -+ idx = get_existing_stripe(c, h); -+ if (idx < 0) -+ return -BCH_ERR_stripe_alloc_blocked; -+ -+ ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe); -+ if (ret) { -+ bch2_stripe_close(c, h->s); -+ if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ bch2_fs_fatal_error(c, "error reading stripe key: %s", bch2_err_str(ret)); -+ return ret; -+ } -+ -+ existing_v = &bkey_i_to_stripe(&h->s->existing_stripe.key)->v; -+ -+ BUG_ON(existing_v->nr_redundant != h->s->nr_parity); -+ h->s->nr_data = existing_v->nr_blocks - -+ existing_v->nr_redundant; -+ -+ ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize); -+ if (ret) { -+ bch2_stripe_close(c, h->s); -+ return ret; -+ } -+ -+ BUG_ON(h->s->existing_stripe.size != h->blocksize); -+ BUG_ON(h->s->existing_stripe.size != le16_to_cpu(existing_v->sectors)); -+ -+ /* -+ * Free buckets we initially allocated - they might conflict with -+ * blocks from the stripe we're reusing: -+ */ -+ for_each_set_bit(i, h->s->blocks_gotten, new_v->nr_blocks) { -+ bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]); -+ h->s->blocks[i] = 0; -+ } -+ memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten)); -+ memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated)); -+ -+ for (i = 0; i < existing_v->nr_blocks; i++) { -+ if (stripe_blockcount_get(existing_v, i)) { -+ __set_bit(i, h->s->blocks_gotten); -+ __set_bit(i, h->s->blocks_allocated); -+ } -+ -+ ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone); -+ } -+ -+ bkey_copy(&h->s->new_stripe.key, &h->s->existing_stripe.key); -+ h->s->have_existing_stripe = true; -+ -+ return 0; -+} -+ -+static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bpos min_pos = POS(0, 1); -+ struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint)); -+ int ret; -+ -+ if (!h->s->res.sectors) { -+ ret = bch2_disk_reservation_get(c, &h->s->res, -+ h->blocksize, -+ h->s->nr_parity, -+ BCH_DISK_RESERVATION_NOFAIL); -+ if (ret) -+ return ret; -+ } -+ -+ for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos, -+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { -+ if (bkey_gt(k.k->p, POS(0, U32_MAX))) { -+ if (start_pos.offset) { -+ start_pos = min_pos; -+ bch2_btree_iter_set_pos(&iter, start_pos); -+ continue; -+ } -+ -+ ret = -BCH_ERR_ENOSPC_stripe_create; -+ break; -+ } -+ -+ if (bkey_deleted(k.k) && -+ bch2_try_open_stripe(c, h->s, k.k->p.offset)) -+ break; -+ } -+ -+ c->ec_stripe_hint = iter.pos.offset; -+ -+ if (ret) -+ goto err; -+ -+ ret = ec_stripe_mem_alloc(trans, &iter); -+ if (ret) { -+ bch2_stripe_close(c, h->s); -+ goto err; -+ } -+ -+ h->s->new_stripe.key.k.p = iter.pos; -+out: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+err: -+ bch2_disk_reservation_put(c, &h->s->res); -+ goto out; -+} -+ -+struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, -+ unsigned target, -+ unsigned algo, -+ unsigned redundancy, -+ enum bch_watermark watermark, -+ struct closure *cl) -+{ -+ struct bch_fs *c = trans->c; -+ struct ec_stripe_head *h; -+ bool waiting = false; -+ int ret; -+ -+ h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, watermark); -+ if (!h) -+ bch_err(c, "no stripe head"); -+ if (IS_ERR_OR_NULL(h)) -+ return h; -+ -+ if (!h->s) { -+ ret = ec_new_stripe_alloc(c, h); -+ if (ret) { -+ bch_err(c, "failed to allocate new stripe"); -+ goto err; -+ } -+ } -+ -+ if (h->s->allocated) -+ goto allocated; -+ -+ if (h->s->have_existing_stripe) -+ goto alloc_existing; -+ -+ /* First, try to allocate a full stripe: */ -+ ret = new_stripe_alloc_buckets(trans, h, BCH_WATERMARK_stripe, NULL) ?: -+ __bch2_ec_stripe_head_reserve(trans, h); -+ if (!ret) -+ goto allocate_buf; -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || -+ bch2_err_matches(ret, ENOMEM)) -+ goto err; -+ -+ /* -+ * Not enough buckets available for a full stripe: we must reuse an -+ * existing stripe: -+ */ -+ while (1) { -+ ret = __bch2_ec_stripe_head_reuse(trans, h); -+ if (!ret) -+ break; -+ if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked) -+ goto err; -+ -+ if (watermark == BCH_WATERMARK_copygc) { -+ ret = new_stripe_alloc_buckets(trans, h, watermark, NULL) ?: -+ __bch2_ec_stripe_head_reserve(trans, h); -+ if (ret) -+ goto err; -+ goto allocate_buf; -+ } -+ -+ /* XXX freelist_wait? */ -+ closure_wait(&c->freelist_wait, cl); -+ waiting = true; -+ } -+ -+ if (waiting) -+ closure_wake_up(&c->freelist_wait); -+alloc_existing: -+ /* -+ * Retry allocating buckets, with the watermark for this -+ * particular write: -+ */ -+ ret = new_stripe_alloc_buckets(trans, h, watermark, cl); -+ if (ret) -+ goto err; -+ -+allocate_buf: -+ ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize); -+ if (ret) -+ goto err; -+ -+ h->s->allocated = true; -+allocated: -+ BUG_ON(!h->s->idx); -+ BUG_ON(!h->s->new_stripe.data[0]); -+ BUG_ON(trans->restarted); -+ return h; -+err: -+ bch2_ec_stripe_head_put(c, h); -+ return ERR_PTR(ret); -+} -+ -+static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca) -+{ -+ struct ec_stripe_head *h; -+ struct open_bucket *ob; -+ unsigned i; -+ -+ mutex_lock(&c->ec_stripe_head_lock); -+ list_for_each_entry(h, &c->ec_stripe_head_list, list) { -+ mutex_lock(&h->lock); -+ if (!h->s) -+ goto unlock; -+ -+ if (!ca) -+ goto found; -+ -+ for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) { -+ if (!h->s->blocks[i]) -+ continue; -+ -+ ob = c->open_buckets + h->s->blocks[i]; -+ if (ob->dev == ca->dev_idx) -+ goto found; -+ } -+ goto unlock; -+found: -+ h->s->err = -BCH_ERR_erofs_no_writes; -+ ec_stripe_set_pending(c, h); -+unlock: -+ mutex_unlock(&h->lock); -+ } -+ mutex_unlock(&c->ec_stripe_head_lock); -+} -+ -+void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) -+{ -+ __bch2_ec_stop(c, ca); -+} -+ -+void bch2_fs_ec_stop(struct bch_fs *c) -+{ -+ __bch2_ec_stop(c, NULL); -+} -+ -+static bool bch2_fs_ec_flush_done(struct bch_fs *c) -+{ -+ bool ret; -+ -+ mutex_lock(&c->ec_stripe_new_lock); -+ ret = list_empty(&c->ec_stripe_new_list); -+ mutex_unlock(&c->ec_stripe_new_lock); -+ -+ return ret; -+} -+ -+void bch2_fs_ec_flush(struct bch_fs *c) -+{ -+ wait_event(c->ec_stripe_new_wait, bch2_fs_ec_flush_done(c)); -+} -+ -+int bch2_stripes_read(struct bch_fs *c) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ const struct bch_stripe *s; -+ struct stripe *m; -+ unsigned i; -+ int ret; -+ -+ for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN, -+ BTREE_ITER_PREFETCH, k, ret) { -+ if (k.k->type != KEY_TYPE_stripe) -+ continue; -+ -+ ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL); -+ if (ret) -+ break; -+ -+ s = bkey_s_c_to_stripe(k).v; -+ -+ m = genradix_ptr(&c->stripes, k.k->p.offset); -+ m->sectors = le16_to_cpu(s->sectors); -+ m->algorithm = s->algorithm; -+ m->nr_blocks = s->nr_blocks; -+ m->nr_redundant = s->nr_redundant; -+ m->blocks_nonempty = 0; -+ -+ for (i = 0; i < s->nr_blocks; i++) -+ m->blocks_nonempty += !!stripe_blockcount_get(s, i); -+ -+ bch2_stripes_heap_insert(c, m, k.k->p.offset); -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ bch2_trans_put(trans); -+ -+ if (ret) -+ bch_err_fn(c, ret); -+ -+ return ret; -+} -+ -+void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ ec_stripes_heap *h = &c->ec_stripes_heap; -+ struct stripe *m; -+ size_t i; -+ -+ mutex_lock(&c->ec_stripes_heap_lock); -+ for (i = 0; i < min_t(size_t, h->used, 50); i++) { -+ m = genradix_ptr(&c->stripes, h->data[i].idx); -+ -+ prt_printf(out, "%zu %u/%u+%u", h->data[i].idx, -+ h->data[i].blocks_nonempty, -+ m->nr_blocks - m->nr_redundant, -+ m->nr_redundant); -+ if (bch2_stripe_is_open(c, h->data[i].idx)) -+ prt_str(out, " open"); -+ prt_newline(out); -+ } -+ mutex_unlock(&c->ec_stripes_heap_lock); -+} -+ -+void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ struct ec_stripe_head *h; -+ struct ec_stripe_new *s; -+ -+ mutex_lock(&c->ec_stripe_head_lock); -+ list_for_each_entry(h, &c->ec_stripe_head_list, list) { -+ prt_printf(out, "target %u algo %u redundancy %u %s:\n", -+ h->target, h->algo, h->redundancy, -+ bch2_watermarks[h->watermark]); -+ -+ if (h->s) -+ prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n", -+ h->s->idx, h->s->nr_data, h->s->nr_parity, -+ bitmap_weight(h->s->blocks_allocated, -+ h->s->nr_data)); -+ } -+ mutex_unlock(&c->ec_stripe_head_lock); -+ -+ prt_printf(out, "in flight:\n"); -+ -+ mutex_lock(&c->ec_stripe_new_lock); -+ list_for_each_entry(s, &c->ec_stripe_new_list, list) { -+ prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n", -+ s->idx, s->nr_data, s->nr_parity, -+ atomic_read(&s->ref[STRIPE_REF_io]), -+ atomic_read(&s->ref[STRIPE_REF_stripe]), -+ bch2_watermarks[s->h->watermark]); -+ } -+ mutex_unlock(&c->ec_stripe_new_lock); -+} -+ -+void bch2_fs_ec_exit(struct bch_fs *c) -+{ -+ struct ec_stripe_head *h; -+ unsigned i; -+ -+ while (1) { -+ mutex_lock(&c->ec_stripe_head_lock); -+ h = list_first_entry_or_null(&c->ec_stripe_head_list, -+ struct ec_stripe_head, list); -+ if (h) -+ list_del(&h->list); -+ mutex_unlock(&c->ec_stripe_head_lock); -+ if (!h) -+ break; -+ -+ if (h->s) { -+ for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) -+ BUG_ON(h->s->blocks[i]); -+ -+ kfree(h->s); -+ } -+ kfree(h); -+ } -+ -+ BUG_ON(!list_empty(&c->ec_stripe_new_list)); -+ -+ free_heap(&c->ec_stripes_heap); -+ genradix_free(&c->stripes); -+ bioset_exit(&c->ec_bioset); -+} -+ -+void bch2_fs_ec_init_early(struct bch_fs *c) -+{ -+ spin_lock_init(&c->ec_stripes_new_lock); -+ mutex_init(&c->ec_stripes_heap_lock); -+ -+ INIT_LIST_HEAD(&c->ec_stripe_head_list); -+ mutex_init(&c->ec_stripe_head_lock); -+ -+ INIT_LIST_HEAD(&c->ec_stripe_new_list); -+ mutex_init(&c->ec_stripe_new_lock); -+ init_waitqueue_head(&c->ec_stripe_new_wait); -+ -+ INIT_WORK(&c->ec_stripe_create_work, ec_stripe_create_work); -+ INIT_WORK(&c->ec_stripe_delete_work, ec_stripe_delete_work); -+} -+ -+int bch2_fs_ec_init(struct bch_fs *c) -+{ -+ return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio), -+ BIOSET_NEED_BVECS); -+} -diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h -new file mode 100644 -index 000000000000..7d0237c9819f ---- /dev/null -+++ b/fs/bcachefs/ec.h -@@ -0,0 +1,260 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_EC_H -+#define _BCACHEFS_EC_H -+ -+#include "ec_types.h" -+#include "buckets_types.h" -+#include "extents_types.h" -+ -+enum bkey_invalid_flags; -+ -+int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, -+ struct bkey_s_c); -+ -+#define bch2_bkey_ops_stripe ((struct bkey_ops) { \ -+ .key_invalid = bch2_stripe_invalid, \ -+ .val_to_text = bch2_stripe_to_text, \ -+ .swab = bch2_ptr_swab, \ -+ .trans_trigger = bch2_trans_mark_stripe, \ -+ .atomic_trigger = bch2_mark_stripe, \ -+ .min_val_size = 8, \ -+}) -+ -+static inline unsigned stripe_csums_per_device(const struct bch_stripe *s) -+{ -+ return DIV_ROUND_UP(le16_to_cpu(s->sectors), -+ 1 << s->csum_granularity_bits); -+} -+ -+static inline unsigned stripe_csum_offset(const struct bch_stripe *s, -+ unsigned dev, unsigned csum_idx) -+{ -+ unsigned csum_bytes = bch_crc_bytes[s->csum_type]; -+ -+ return sizeof(struct bch_stripe) + -+ sizeof(struct bch_extent_ptr) * s->nr_blocks + -+ (dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes; -+} -+ -+static inline unsigned stripe_blockcount_offset(const struct bch_stripe *s, -+ unsigned idx) -+{ -+ return stripe_csum_offset(s, s->nr_blocks, 0) + -+ sizeof(u16) * idx; -+} -+ -+static inline unsigned stripe_blockcount_get(const struct bch_stripe *s, -+ unsigned idx) -+{ -+ return le16_to_cpup((void *) s + stripe_blockcount_offset(s, idx)); -+} -+ -+static inline void stripe_blockcount_set(struct bch_stripe *s, -+ unsigned idx, unsigned v) -+{ -+ __le16 *p = (void *) s + stripe_blockcount_offset(s, idx); -+ -+ *p = cpu_to_le16(v); -+} -+ -+static inline unsigned stripe_val_u64s(const struct bch_stripe *s) -+{ -+ return DIV_ROUND_UP(stripe_blockcount_offset(s, s->nr_blocks), -+ sizeof(u64)); -+} -+ -+static inline void *stripe_csum(struct bch_stripe *s, -+ unsigned block, unsigned csum_idx) -+{ -+ EBUG_ON(block >= s->nr_blocks); -+ EBUG_ON(csum_idx >= stripe_csums_per_device(s)); -+ -+ return (void *) s + stripe_csum_offset(s, block, csum_idx); -+} -+ -+static inline struct bch_csum stripe_csum_get(struct bch_stripe *s, -+ unsigned block, unsigned csum_idx) -+{ -+ struct bch_csum csum = { 0 }; -+ -+ memcpy(&csum, stripe_csum(s, block, csum_idx), bch_crc_bytes[s->csum_type]); -+ return csum; -+} -+ -+static inline void stripe_csum_set(struct bch_stripe *s, -+ unsigned block, unsigned csum_idx, -+ struct bch_csum csum) -+{ -+ memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]); -+} -+ -+static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe_ptr, -+ const struct bch_extent_ptr *data_ptr, -+ unsigned sectors) -+{ -+ return data_ptr->dev == stripe_ptr->dev && -+ data_ptr->gen == stripe_ptr->gen && -+ data_ptr->offset >= stripe_ptr->offset && -+ data_ptr->offset < stripe_ptr->offset + sectors; -+} -+ -+static inline bool bch2_ptr_matches_stripe(const struct bch_stripe *s, -+ struct extent_ptr_decoded p) -+{ -+ unsigned nr_data = s->nr_blocks - s->nr_redundant; -+ -+ BUG_ON(!p.has_ec); -+ -+ if (p.ec.block >= nr_data) -+ return false; -+ -+ return __bch2_ptr_matches_stripe(&s->ptrs[p.ec.block], &p.ptr, -+ le16_to_cpu(s->sectors)); -+} -+ -+static inline bool bch2_ptr_matches_stripe_m(const struct gc_stripe *m, -+ struct extent_ptr_decoded p) -+{ -+ unsigned nr_data = m->nr_blocks - m->nr_redundant; -+ -+ BUG_ON(!p.has_ec); -+ -+ if (p.ec.block >= nr_data) -+ return false; -+ -+ return __bch2_ptr_matches_stripe(&m->ptrs[p.ec.block], &p.ptr, -+ m->sectors); -+} -+ -+struct bch_read_bio; -+ -+struct ec_stripe_buf { -+ /* might not be buffering the entire stripe: */ -+ unsigned offset; -+ unsigned size; -+ unsigned long valid[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)]; -+ -+ void *data[BCH_BKEY_PTRS_MAX]; -+ -+ __BKEY_PADDED(key, 255); -+}; -+ -+struct ec_stripe_head; -+ -+enum ec_stripe_ref { -+ STRIPE_REF_io, -+ STRIPE_REF_stripe, -+ STRIPE_REF_NR -+}; -+ -+struct ec_stripe_new { -+ struct bch_fs *c; -+ struct ec_stripe_head *h; -+ struct mutex lock; -+ struct list_head list; -+ -+ struct hlist_node hash; -+ u64 idx; -+ -+ struct closure iodone; -+ -+ atomic_t ref[STRIPE_REF_NR]; -+ -+ int err; -+ -+ u8 nr_data; -+ u8 nr_parity; -+ bool allocated; -+ bool pending; -+ bool have_existing_stripe; -+ -+ unsigned long blocks_gotten[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)]; -+ unsigned long blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)]; -+ open_bucket_idx_t blocks[BCH_BKEY_PTRS_MAX]; -+ struct disk_reservation res; -+ -+ struct ec_stripe_buf new_stripe; -+ struct ec_stripe_buf existing_stripe; -+}; -+ -+struct ec_stripe_head { -+ struct list_head list; -+ struct mutex lock; -+ -+ unsigned target; -+ unsigned algo; -+ unsigned redundancy; -+ enum bch_watermark watermark; -+ -+ struct bch_devs_mask devs; -+ unsigned nr_active_devs; -+ -+ unsigned blocksize; -+ -+ struct dev_stripe_state block_stripe; -+ struct dev_stripe_state parity_stripe; -+ -+ struct ec_stripe_new *s; -+}; -+ -+int bch2_ec_read_extent(struct btree_trans *, struct bch_read_bio *); -+ -+void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *); -+ -+void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *); -+ -+int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *); -+ -+void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *); -+struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *, -+ unsigned, unsigned, unsigned, -+ enum bch_watermark, struct closure *); -+ -+void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t); -+void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t); -+void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t); -+ -+void bch2_do_stripe_deletes(struct bch_fs *); -+void bch2_ec_do_stripe_creates(struct bch_fs *); -+void bch2_ec_stripe_new_free(struct bch_fs *, struct ec_stripe_new *); -+ -+static inline void ec_stripe_new_get(struct ec_stripe_new *s, -+ enum ec_stripe_ref ref) -+{ -+ atomic_inc(&s->ref[ref]); -+} -+ -+static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s, -+ enum ec_stripe_ref ref) -+{ -+ BUG_ON(atomic_read(&s->ref[ref]) <= 0); -+ -+ if (atomic_dec_and_test(&s->ref[ref])) -+ switch (ref) { -+ case STRIPE_REF_stripe: -+ bch2_ec_stripe_new_free(c, s); -+ break; -+ case STRIPE_REF_io: -+ bch2_ec_do_stripe_creates(c); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *); -+void bch2_fs_ec_stop(struct bch_fs *); -+void bch2_fs_ec_flush(struct bch_fs *); -+ -+int bch2_stripes_read(struct bch_fs *); -+ -+void bch2_stripes_heap_to_text(struct printbuf *, struct bch_fs *); -+void bch2_new_stripes_to_text(struct printbuf *, struct bch_fs *); -+ -+void bch2_fs_ec_exit(struct bch_fs *); -+void bch2_fs_ec_init_early(struct bch_fs *); -+int bch2_fs_ec_init(struct bch_fs *); -+ -+#endif /* _BCACHEFS_EC_H */ -diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h -new file mode 100644 -index 000000000000..e2b02a82de32 ---- /dev/null -+++ b/fs/bcachefs/ec_types.h -@@ -0,0 +1,41 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_EC_TYPES_H -+#define _BCACHEFS_EC_TYPES_H -+ -+#include "bcachefs_format.h" -+ -+struct bch_replicas_padded { -+ struct bch_replicas_entry e; -+ u8 pad[BCH_BKEY_PTRS_MAX]; -+}; -+ -+struct stripe { -+ size_t heap_idx; -+ u16 sectors; -+ u8 algorithm; -+ u8 nr_blocks; -+ u8 nr_redundant; -+ u8 blocks_nonempty; -+}; -+ -+struct gc_stripe { -+ u16 sectors; -+ -+ u8 nr_blocks; -+ u8 nr_redundant; -+ -+ unsigned alive:1; /* does a corresponding key exist in stripes btree? */ -+ u16 block_sectors[BCH_BKEY_PTRS_MAX]; -+ struct bch_extent_ptr ptrs[BCH_BKEY_PTRS_MAX]; -+ -+ struct bch_replicas_padded r; -+}; -+ -+struct ec_stripe_heap_entry { -+ size_t idx; -+ unsigned blocks_nonempty; -+}; -+ -+typedef HEAP(struct ec_stripe_heap_entry) ec_stripes_heap; -+ -+#endif /* _BCACHEFS_EC_TYPES_H */ -diff --git a/fs/bcachefs/errcode.c b/fs/bcachefs/errcode.c -new file mode 100644 -index 000000000000..d260ff9bbfeb ---- /dev/null -+++ b/fs/bcachefs/errcode.c -@@ -0,0 +1,68 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "errcode.h" -+ -+#include -+ -+static const char * const bch2_errcode_strs[] = { -+#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = #err, -+ BCH_ERRCODES() -+#undef x -+ NULL -+}; -+ -+static unsigned bch2_errcode_parents[] = { -+#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = class, -+ BCH_ERRCODES() -+#undef x -+}; -+ -+const char *bch2_err_str(int err) -+{ -+ const char *errstr; -+ -+ err = abs(err); -+ -+ BUG_ON(err >= BCH_ERR_MAX); -+ -+ if (err >= BCH_ERR_START) -+ errstr = bch2_errcode_strs[err - BCH_ERR_START]; -+ else if (err) -+ errstr = errname(err); -+ else -+ errstr = "(No error)"; -+ return errstr ?: "(Invalid error)"; -+} -+ -+bool __bch2_err_matches(int err, int class) -+{ -+ err = abs(err); -+ class = abs(class); -+ -+ BUG_ON(err >= BCH_ERR_MAX); -+ BUG_ON(class >= BCH_ERR_MAX); -+ -+ while (err >= BCH_ERR_START && err != class) -+ err = bch2_errcode_parents[err - BCH_ERR_START]; -+ -+ return err == class; -+} -+ -+int __bch2_err_class(int err) -+{ -+ err = -err; -+ BUG_ON((unsigned) err >= BCH_ERR_MAX); -+ -+ while (err >= BCH_ERR_START && bch2_errcode_parents[err - BCH_ERR_START]) -+ err = bch2_errcode_parents[err - BCH_ERR_START]; -+ -+ return -err; -+} -+ -+const char *bch2_blk_status_to_str(blk_status_t status) -+{ -+ if (status == BLK_STS_REMOVED) -+ return "device removed"; -+ return blk_status_to_str(status); -+} -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -new file mode 100644 -index 000000000000..68a1a96bb7ca ---- /dev/null -+++ b/fs/bcachefs/errcode.h -@@ -0,0 +1,269 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_ERRCODE_H -+#define _BCACHEFS_ERRCODE_H -+ -+#define BCH_ERRCODES() \ -+ x(ERANGE, ERANGE_option_too_small) \ -+ x(ERANGE, ERANGE_option_too_big) \ -+ x(ENOMEM, ENOMEM_stripe_buf) \ -+ x(ENOMEM, ENOMEM_replicas_table) \ -+ x(ENOMEM, ENOMEM_cpu_replicas) \ -+ x(ENOMEM, ENOMEM_replicas_gc) \ -+ x(ENOMEM, ENOMEM_disk_groups_validate) \ -+ x(ENOMEM, ENOMEM_disk_groups_to_cpu) \ -+ x(ENOMEM, ENOMEM_mark_snapshot) \ -+ x(ENOMEM, ENOMEM_mark_stripe) \ -+ x(ENOMEM, ENOMEM_mark_stripe_ptr) \ -+ x(ENOMEM, ENOMEM_btree_key_cache_create) \ -+ x(ENOMEM, ENOMEM_btree_key_cache_fill) \ -+ x(ENOMEM, ENOMEM_btree_key_cache_insert) \ -+ x(ENOMEM, ENOMEM_trans_kmalloc) \ -+ x(ENOMEM, ENOMEM_trans_log_msg) \ -+ x(ENOMEM, ENOMEM_do_encrypt) \ -+ x(ENOMEM, ENOMEM_ec_read_extent) \ -+ x(ENOMEM, ENOMEM_ec_stripe_mem_alloc) \ -+ x(ENOMEM, ENOMEM_ec_new_stripe_alloc) \ -+ x(ENOMEM, ENOMEM_fs_btree_cache_init) \ -+ x(ENOMEM, ENOMEM_fs_btree_key_cache_init) \ -+ x(ENOMEM, ENOMEM_fs_counters_init) \ -+ x(ENOMEM, ENOMEM_fs_btree_write_buffer_init) \ -+ x(ENOMEM, ENOMEM_io_clock_init) \ -+ x(ENOMEM, ENOMEM_blacklist_table_init) \ -+ x(ENOMEM, ENOMEM_sb_realloc_injected) \ -+ x(ENOMEM, ENOMEM_sb_bio_realloc) \ -+ x(ENOMEM, ENOMEM_sb_buf_realloc) \ -+ x(ENOMEM, ENOMEM_sb_journal_validate) \ -+ x(ENOMEM, ENOMEM_sb_journal_v2_validate) \ -+ x(ENOMEM, ENOMEM_journal_entry_add) \ -+ x(ENOMEM, ENOMEM_journal_read_buf_realloc) \ -+ x(ENOMEM, ENOMEM_btree_interior_update_worker_init)\ -+ x(ENOMEM, ENOMEM_btree_interior_update_pool_init) \ -+ x(ENOMEM, ENOMEM_bio_read_init) \ -+ x(ENOMEM, ENOMEM_bio_read_split_init) \ -+ x(ENOMEM, ENOMEM_bio_write_init) \ -+ x(ENOMEM, ENOMEM_bio_bounce_pages_init) \ -+ x(ENOMEM, ENOMEM_writepage_bioset_init) \ -+ x(ENOMEM, ENOMEM_dio_read_bioset_init) \ -+ x(ENOMEM, ENOMEM_dio_write_bioset_init) \ -+ x(ENOMEM, ENOMEM_nocow_flush_bioset_init) \ -+ x(ENOMEM, ENOMEM_promote_table_init) \ -+ x(ENOMEM, ENOMEM_compression_bounce_read_init) \ -+ x(ENOMEM, ENOMEM_compression_bounce_write_init) \ -+ x(ENOMEM, ENOMEM_compression_workspace_init) \ -+ x(ENOMEM, ENOMEM_decompression_workspace_init) \ -+ x(ENOMEM, ENOMEM_bucket_gens) \ -+ x(ENOMEM, ENOMEM_buckets_nouse) \ -+ x(ENOMEM, ENOMEM_usage_init) \ -+ x(ENOMEM, ENOMEM_btree_node_read_all_replicas) \ -+ x(ENOMEM, ENOMEM_btree_node_reclaim) \ -+ x(ENOMEM, ENOMEM_btree_node_mem_alloc) \ -+ x(ENOMEM, ENOMEM_btree_cache_cannibalize_lock) \ -+ x(ENOMEM, ENOMEM_buckets_waiting_for_journal_init)\ -+ x(ENOMEM, ENOMEM_buckets_waiting_for_journal_set) \ -+ x(ENOMEM, ENOMEM_set_nr_journal_buckets) \ -+ x(ENOMEM, ENOMEM_dev_journal_init) \ -+ x(ENOMEM, ENOMEM_journal_pin_fifo) \ -+ x(ENOMEM, ENOMEM_journal_buf) \ -+ x(ENOMEM, ENOMEM_gc_start) \ -+ x(ENOMEM, ENOMEM_gc_alloc_start) \ -+ x(ENOMEM, ENOMEM_gc_reflink_start) \ -+ x(ENOMEM, ENOMEM_gc_gens) \ -+ x(ENOMEM, ENOMEM_gc_repair_key) \ -+ x(ENOMEM, ENOMEM_fsck_extent_ends_at) \ -+ x(ENOMEM, ENOMEM_fsck_add_nlink) \ -+ x(ENOMEM, ENOMEM_journal_key_insert) \ -+ x(ENOMEM, ENOMEM_journal_keys_sort) \ -+ x(ENOMEM, ENOMEM_journal_replay) \ -+ x(ENOMEM, ENOMEM_read_superblock_clean) \ -+ x(ENOMEM, ENOMEM_fs_alloc) \ -+ x(ENOMEM, ENOMEM_fs_name_alloc) \ -+ x(ENOMEM, ENOMEM_fs_other_alloc) \ -+ x(ENOMEM, ENOMEM_dev_alloc) \ -+ x(ENOSPC, ENOSPC_disk_reservation) \ -+ x(ENOSPC, ENOSPC_bucket_alloc) \ -+ x(ENOSPC, ENOSPC_disk_label_add) \ -+ x(ENOSPC, ENOSPC_stripe_create) \ -+ x(ENOSPC, ENOSPC_inode_create) \ -+ x(ENOSPC, ENOSPC_str_hash_create) \ -+ x(ENOSPC, ENOSPC_snapshot_create) \ -+ x(ENOSPC, ENOSPC_subvolume_create) \ -+ x(ENOSPC, ENOSPC_sb) \ -+ x(ENOSPC, ENOSPC_sb_journal) \ -+ x(ENOSPC, ENOSPC_sb_journal_seq_blacklist) \ -+ x(ENOSPC, ENOSPC_sb_quota) \ -+ x(ENOSPC, ENOSPC_sb_replicas) \ -+ x(ENOSPC, ENOSPC_sb_members) \ -+ x(ENOSPC, ENOSPC_sb_members_v2) \ -+ x(ENOSPC, ENOSPC_sb_crypt) \ -+ x(ENOSPC, ENOSPC_btree_slot) \ -+ x(ENOSPC, ENOSPC_snapshot_tree) \ -+ x(ENOENT, ENOENT_bkey_type_mismatch) \ -+ x(ENOENT, ENOENT_str_hash_lookup) \ -+ x(ENOENT, ENOENT_str_hash_set_must_replace) \ -+ x(ENOENT, ENOENT_inode) \ -+ x(ENOENT, ENOENT_not_subvol) \ -+ x(ENOENT, ENOENT_not_directory) \ -+ x(ENOENT, ENOENT_directory_dead) \ -+ x(ENOENT, ENOENT_subvolume) \ -+ x(ENOENT, ENOENT_snapshot_tree) \ -+ x(ENOENT, ENOENT_dirent_doesnt_match_inode) \ -+ x(ENOENT, ENOENT_dev_not_found) \ -+ x(ENOENT, ENOENT_dev_idx_not_found) \ -+ x(0, open_buckets_empty) \ -+ x(0, freelist_empty) \ -+ x(BCH_ERR_freelist_empty, no_buckets_found) \ -+ x(0, transaction_restart) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_fault_inject) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_relock) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_relock_path) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_relock_path_intent) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_relock_after_fill) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_too_many_iters) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_lock_node_reused) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_fill_relock) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_fill_mem_alloc_fail)\ -+ x(BCH_ERR_transaction_restart, transaction_restart_mem_realloced) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_in_traverse_all) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock_write)\ -+ x(BCH_ERR_transaction_restart, transaction_restart_deadlock_recursion_limit)\ -+ x(BCH_ERR_transaction_restart, transaction_restart_upgrade) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_upgrade) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_fill) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_raced) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\ -+ x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_split_race) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_write_buffer_flush) \ -+ x(BCH_ERR_transaction_restart, transaction_restart_nested) \ -+ x(0, no_btree_node) \ -+ x(BCH_ERR_no_btree_node, no_btree_node_relock) \ -+ x(BCH_ERR_no_btree_node, no_btree_node_upgrade) \ -+ x(BCH_ERR_no_btree_node, no_btree_node_drop) \ -+ x(BCH_ERR_no_btree_node, no_btree_node_lock_root) \ -+ x(BCH_ERR_no_btree_node, no_btree_node_up) \ -+ x(BCH_ERR_no_btree_node, no_btree_node_down) \ -+ x(BCH_ERR_no_btree_node, no_btree_node_init) \ -+ x(BCH_ERR_no_btree_node, no_btree_node_cached) \ -+ x(BCH_ERR_no_btree_node, no_btree_node_srcu_reset) \ -+ x(0, btree_insert_fail) \ -+ x(BCH_ERR_btree_insert_fail, btree_insert_btree_node_full) \ -+ x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \ -+ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \ -+ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \ -+ x(BCH_ERR_btree_insert_fail, btree_insert_need_flush_buffer) \ -+ x(0, backpointer_to_overwritten_btree_node) \ -+ x(0, lock_fail_root_changed) \ -+ x(0, journal_reclaim_would_deadlock) \ -+ x(EINVAL, fsck) \ -+ x(BCH_ERR_fsck, fsck_fix) \ -+ x(BCH_ERR_fsck, fsck_ignore) \ -+ x(BCH_ERR_fsck, fsck_errors_not_fixed) \ -+ x(BCH_ERR_fsck, fsck_repair_unimplemented) \ -+ x(BCH_ERR_fsck, fsck_repair_impossible) \ -+ x(0, restart_recovery) \ -+ x(0, unwritten_extent_update) \ -+ x(EINVAL, device_state_not_allowed) \ -+ x(EINVAL, member_info_missing) \ -+ x(EINVAL, mismatched_block_size) \ -+ x(EINVAL, block_size_too_small) \ -+ x(EINVAL, bucket_size_too_small) \ -+ x(EINVAL, device_size_too_small) \ -+ x(EINVAL, device_not_a_member_of_filesystem) \ -+ x(EINVAL, device_has_been_removed) \ -+ x(EINVAL, device_already_online) \ -+ x(EINVAL, insufficient_devices_to_start) \ -+ x(EINVAL, invalid) \ -+ x(EINVAL, internal_fsck_err) \ -+ x(EROFS, erofs_trans_commit) \ -+ x(EROFS, erofs_no_writes) \ -+ x(EROFS, erofs_journal_err) \ -+ x(EROFS, erofs_sb_err) \ -+ x(EROFS, erofs_unfixed_errors) \ -+ x(EROFS, erofs_norecovery) \ -+ x(EROFS, erofs_nochanges) \ -+ x(EROFS, insufficient_devices) \ -+ x(0, operation_blocked) \ -+ x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \ -+ x(BCH_ERR_operation_blocked, journal_res_get_blocked) \ -+ x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \ -+ x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \ -+ x(BCH_ERR_operation_blocked, stripe_alloc_blocked) \ -+ x(BCH_ERR_invalid, invalid_sb) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_magic) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_version) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_features) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_too_big) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_csum_type) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_csum) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_block_size) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_uuid) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_too_many_members) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_dev_idx) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_time_precision) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_field_size) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_layout) \ -+ x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_type) \ -+ x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_nr_superblocks) \ -+ x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_superblocks_overlap) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_members_missing) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_members) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_disk_groups) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_replicas) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_journal) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_journal_seq_blacklist) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_crypt) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_clean) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_quota) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_errors) \ -+ x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \ -+ x(BCH_ERR_invalid, invalid_bkey) \ -+ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ -+ x(EIO, btree_node_read_err) \ -+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \ -+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \ -+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \ -+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_bad_node) \ -+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_incompatible) \ -+ x(0, nopromote) \ -+ x(BCH_ERR_nopromote, nopromote_may_not) \ -+ x(BCH_ERR_nopromote, nopromote_already_promoted) \ -+ x(BCH_ERR_nopromote, nopromote_unwritten) \ -+ x(BCH_ERR_nopromote, nopromote_congested) \ -+ x(BCH_ERR_nopromote, nopromote_in_flight) \ -+ x(BCH_ERR_nopromote, nopromote_enomem) -+ -+enum bch_errcode { -+ BCH_ERR_START = 2048, -+#define x(class, err) BCH_ERR_##err, -+ BCH_ERRCODES() -+#undef x -+ BCH_ERR_MAX -+}; -+ -+const char *bch2_err_str(int); -+bool __bch2_err_matches(int, int); -+ -+static inline bool _bch2_err_matches(int err, int class) -+{ -+ return err < 0 && __bch2_err_matches(err, class); -+} -+ -+#define bch2_err_matches(_err, _class) \ -+({ \ -+ BUILD_BUG_ON(!__builtin_constant_p(_class)); \ -+ unlikely(_bch2_err_matches(_err, _class)); \ -+}) -+ -+int __bch2_err_class(int); -+ -+static inline long bch2_err_class(long err) -+{ -+ return err < 0 ? __bch2_err_class(err) : err; -+} -+ -+#define BLK_STS_REMOVED ((__force blk_status_t)128) -+ -+const char *bch2_blk_status_to_str(blk_status_t); -+ -+#endif /* _BCACHFES_ERRCODE_H */ -diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c -new file mode 100644 -index 000000000000..7b28d37922fd ---- /dev/null -+++ b/fs/bcachefs/error.c -@@ -0,0 +1,299 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include "bcachefs.h" -+#include "error.h" -+#include "super.h" -+ -+#define FSCK_ERR_RATELIMIT_NR 10 -+ -+bool bch2_inconsistent_error(struct bch_fs *c) -+{ -+ set_bit(BCH_FS_ERROR, &c->flags); -+ -+ switch (c->opts.errors) { -+ case BCH_ON_ERROR_continue: -+ return false; -+ case BCH_ON_ERROR_ro: -+ if (bch2_fs_emergency_read_only(c)) -+ bch_err(c, "inconsistency detected - emergency read only"); -+ return true; -+ case BCH_ON_ERROR_panic: -+ panic(bch2_fmt(c, "panic after error")); -+ return true; -+ default: -+ BUG(); -+ } -+} -+ -+void bch2_topology_error(struct bch_fs *c) -+{ -+ set_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags); -+ if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) -+ bch2_inconsistent_error(c); -+} -+ -+void bch2_fatal_error(struct bch_fs *c) -+{ -+ if (bch2_fs_emergency_read_only(c)) -+ bch_err(c, "fatal error - emergency read only"); -+} -+ -+void bch2_io_error_work(struct work_struct *work) -+{ -+ struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work); -+ struct bch_fs *c = ca->fs; -+ bool dev; -+ -+ down_write(&c->state_lock); -+ dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro, -+ BCH_FORCE_IF_DEGRADED); -+ if (dev -+ ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, -+ BCH_FORCE_IF_DEGRADED) -+ : bch2_fs_emergency_read_only(c)) -+ bch_err(ca, -+ "too many IO errors, setting %s RO", -+ dev ? "device" : "filesystem"); -+ up_write(&c->state_lock); -+} -+ -+void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) -+{ -+ atomic64_inc(&ca->errors[type]); -+ //queue_work(system_long_wq, &ca->io_error_work); -+} -+ -+enum ask_yn { -+ YN_NO, -+ YN_YES, -+ YN_ALLNO, -+ YN_ALLYES, -+}; -+ -+#ifdef __KERNEL__ -+#define bch2_fsck_ask_yn() YN_NO -+#else -+ -+#include "tools-util.h" -+ -+enum ask_yn bch2_fsck_ask_yn(void) -+{ -+ char *buf = NULL; -+ size_t buflen = 0; -+ bool ret; -+ -+ while (true) { -+ fputs(" (y,n, or Y,N for all errors of this type) ", stdout); -+ fflush(stdout); -+ -+ if (getline(&buf, &buflen, stdin) < 0) -+ die("error reading from standard input"); -+ -+ strim(buf); -+ if (strlen(buf) != 1) -+ continue; -+ -+ switch (buf[0]) { -+ case 'n': -+ return YN_NO; -+ case 'y': -+ return YN_YES; -+ case 'N': -+ return YN_ALLNO; -+ case 'Y': -+ return YN_ALLYES; -+ } -+ } -+ -+ free(buf); -+ return ret; -+} -+ -+#endif -+ -+static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) -+{ -+ struct fsck_err_state *s; -+ -+ if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) -+ return NULL; -+ -+ list_for_each_entry(s, &c->fsck_error_msgs, list) -+ if (s->fmt == fmt) { -+ /* -+ * move it to the head of the list: repeated fsck errors -+ * are common -+ */ -+ list_move(&s->list, &c->fsck_error_msgs); -+ return s; -+ } -+ -+ s = kzalloc(sizeof(*s), GFP_NOFS); -+ if (!s) { -+ if (!c->fsck_alloc_msgs_err) -+ bch_err(c, "kmalloc err, cannot ratelimit fsck errs"); -+ c->fsck_alloc_msgs_err = true; -+ return NULL; -+ } -+ -+ INIT_LIST_HEAD(&s->list); -+ s->fmt = fmt; -+ list_add(&s->list, &c->fsck_error_msgs); -+ return s; -+} -+ -+int bch2_fsck_err(struct bch_fs *c, -+ enum bch_fsck_flags flags, -+ enum bch_sb_error_id err, -+ const char *fmt, ...) -+{ -+ struct fsck_err_state *s = NULL; -+ va_list args; -+ bool print = true, suppressing = false, inconsistent = false; -+ struct printbuf buf = PRINTBUF, *out = &buf; -+ int ret = -BCH_ERR_fsck_ignore; -+ -+ bch2_sb_error_count(c, err); -+ -+ va_start(args, fmt); -+ prt_vprintf(out, fmt, args); -+ va_end(args); -+ -+ mutex_lock(&c->fsck_error_msgs_lock); -+ s = fsck_err_get(c, fmt); -+ if (s) { -+ /* -+ * We may be called multiple times for the same error on -+ * transaction restart - this memoizes instead of asking the user -+ * multiple times for the same error: -+ */ -+ if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { -+ ret = s->ret; -+ mutex_unlock(&c->fsck_error_msgs_lock); -+ printbuf_exit(&buf); -+ return ret; -+ } -+ -+ kfree(s->last_msg); -+ s->last_msg = kstrdup(buf.buf, GFP_KERNEL); -+ -+ if (c->opts.ratelimit_errors && -+ !(flags & FSCK_NO_RATELIMIT) && -+ s->nr >= FSCK_ERR_RATELIMIT_NR) { -+ if (s->nr == FSCK_ERR_RATELIMIT_NR) -+ suppressing = true; -+ else -+ print = false; -+ } -+ -+ s->nr++; -+ } -+ -+#ifdef BCACHEFS_LOG_PREFIX -+ if (!strncmp(fmt, "bcachefs:", 9)) -+ prt_printf(out, bch2_log_msg(c, "")); -+#endif -+ -+ if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) { -+ if (c->opts.errors != BCH_ON_ERROR_continue || -+ !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { -+ prt_str(out, ", shutting down"); -+ inconsistent = true; -+ ret = -BCH_ERR_fsck_errors_not_fixed; -+ } else if (flags & FSCK_CAN_FIX) { -+ prt_str(out, ", fixing"); -+ ret = -BCH_ERR_fsck_fix; -+ } else { -+ prt_str(out, ", continuing"); -+ ret = -BCH_ERR_fsck_ignore; -+ } -+ } else if (c->opts.fix_errors == FSCK_FIX_exit) { -+ prt_str(out, ", exiting"); -+ ret = -BCH_ERR_fsck_errors_not_fixed; -+ } else if (flags & FSCK_CAN_FIX) { -+ int fix = s && s->fix -+ ? s->fix -+ : c->opts.fix_errors; -+ -+ if (fix == FSCK_FIX_ask) { -+ int ask; -+ -+ prt_str(out, ": fix?"); -+ bch2_print_string_as_lines(KERN_ERR, out->buf); -+ print = false; -+ -+ ask = bch2_fsck_ask_yn(); -+ -+ if (ask >= YN_ALLNO && s) -+ s->fix = ask == YN_ALLNO -+ ? FSCK_FIX_no -+ : FSCK_FIX_yes; -+ -+ ret = ask & 1 -+ ? -BCH_ERR_fsck_fix -+ : -BCH_ERR_fsck_ignore; -+ } else if (fix == FSCK_FIX_yes || -+ (c->opts.nochanges && -+ !(flags & FSCK_CAN_IGNORE))) { -+ prt_str(out, ", fixing"); -+ ret = -BCH_ERR_fsck_fix; -+ } else { -+ prt_str(out, ", not fixing"); -+ } -+ } else if (flags & FSCK_NEED_FSCK) { -+ prt_str(out, " (run fsck to correct)"); -+ } else { -+ prt_str(out, " (repair unimplemented)"); -+ } -+ -+ if (ret == -BCH_ERR_fsck_ignore && -+ (c->opts.fix_errors == FSCK_FIX_exit || -+ !(flags & FSCK_CAN_IGNORE))) -+ ret = -BCH_ERR_fsck_errors_not_fixed; -+ -+ if (print) -+ bch2_print_string_as_lines(KERN_ERR, out->buf); -+ -+ if (!test_bit(BCH_FS_FSCK_DONE, &c->flags) && -+ (ret != -BCH_ERR_fsck_fix && -+ ret != -BCH_ERR_fsck_ignore)) -+ bch_err(c, "Unable to continue, halting"); -+ else if (suppressing) -+ bch_err(c, "Ratelimiting new instances of previous error"); -+ -+ if (s) -+ s->ret = ret; -+ -+ mutex_unlock(&c->fsck_error_msgs_lock); -+ -+ printbuf_exit(&buf); -+ -+ if (inconsistent) -+ bch2_inconsistent_error(c); -+ -+ if (ret == -BCH_ERR_fsck_fix) { -+ set_bit(BCH_FS_ERRORS_FIXED, &c->flags); -+ } else { -+ set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags); -+ set_bit(BCH_FS_ERROR, &c->flags); -+ } -+ -+ return ret; -+} -+ -+void bch2_flush_fsck_errs(struct bch_fs *c) -+{ -+ struct fsck_err_state *s, *n; -+ -+ mutex_lock(&c->fsck_error_msgs_lock); -+ -+ list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { -+ if (s->ratelimited && s->last_msg) -+ bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); -+ -+ list_del(&s->list); -+ kfree(s->last_msg); -+ kfree(s); -+ } -+ -+ mutex_unlock(&c->fsck_error_msgs_lock); -+} -diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h -new file mode 100644 -index 000000000000..d167d65986e0 ---- /dev/null -+++ b/fs/bcachefs/error.h -@@ -0,0 +1,242 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_ERROR_H -+#define _BCACHEFS_ERROR_H -+ -+#include -+#include -+#include "sb-errors.h" -+ -+struct bch_dev; -+struct bch_fs; -+struct work_struct; -+ -+/* -+ * XXX: separate out errors that indicate on disk data is inconsistent, and flag -+ * superblock as such -+ */ -+ -+/* Error messages: */ -+ -+/* -+ * Inconsistency errors: The on disk data is inconsistent. If these occur during -+ * initial recovery, they don't indicate a bug in the running code - we walk all -+ * the metadata before modifying anything. If they occur at runtime, they -+ * indicate either a bug in the running code or (less likely) data is being -+ * silently corrupted under us. -+ * -+ * XXX: audit all inconsistent errors and make sure they're all recoverable, in -+ * BCH_ON_ERROR_CONTINUE mode -+ */ -+ -+bool bch2_inconsistent_error(struct bch_fs *); -+ -+void bch2_topology_error(struct bch_fs *); -+ -+#define bch2_fs_inconsistent(c, ...) \ -+({ \ -+ bch_err(c, __VA_ARGS__); \ -+ bch2_inconsistent_error(c); \ -+}) -+ -+#define bch2_fs_inconsistent_on(cond, c, ...) \ -+({ \ -+ bool _ret = unlikely(!!(cond)); \ -+ \ -+ if (_ret) \ -+ bch2_fs_inconsistent(c, __VA_ARGS__); \ -+ _ret; \ -+}) -+ -+/* -+ * Later we might want to mark only the particular device inconsistent, not the -+ * entire filesystem: -+ */ -+ -+#define bch2_dev_inconsistent(ca, ...) \ -+do { \ -+ bch_err(ca, __VA_ARGS__); \ -+ bch2_inconsistent_error((ca)->fs); \ -+} while (0) -+ -+#define bch2_dev_inconsistent_on(cond, ca, ...) \ -+({ \ -+ bool _ret = unlikely(!!(cond)); \ -+ \ -+ if (_ret) \ -+ bch2_dev_inconsistent(ca, __VA_ARGS__); \ -+ _ret; \ -+}) -+ -+/* -+ * When a transaction update discovers or is causing a fs inconsistency, it's -+ * helpful to also dump the pending updates: -+ */ -+#define bch2_trans_inconsistent(trans, ...) \ -+({ \ -+ bch_err(trans->c, __VA_ARGS__); \ -+ bch2_dump_trans_updates(trans); \ -+ bch2_inconsistent_error(trans->c); \ -+}) -+ -+#define bch2_trans_inconsistent_on(cond, trans, ...) \ -+({ \ -+ bool _ret = unlikely(!!(cond)); \ -+ \ -+ if (_ret) \ -+ bch2_trans_inconsistent(trans, __VA_ARGS__); \ -+ _ret; \ -+}) -+ -+/* -+ * Fsck errors: inconsistency errors we detect at mount time, and should ideally -+ * be able to repair: -+ */ -+ -+struct fsck_err_state { -+ struct list_head list; -+ const char *fmt; -+ u64 nr; -+ bool ratelimited; -+ int ret; -+ int fix; -+ char *last_msg; -+}; -+ -+enum bch_fsck_flags { -+ FSCK_CAN_FIX = 1 << 0, -+ FSCK_CAN_IGNORE = 1 << 1, -+ FSCK_NEED_FSCK = 1 << 2, -+ FSCK_NO_RATELIMIT = 1 << 3, -+}; -+ -+#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err) -+ -+__printf(4, 5) __cold -+int bch2_fsck_err(struct bch_fs *, -+ enum bch_fsck_flags, -+ enum bch_sb_error_id, -+ const char *, ...); -+void bch2_flush_fsck_errs(struct bch_fs *); -+ -+#define __fsck_err(c, _flags, _err_type, ...) \ -+({ \ -+ int _ret = bch2_fsck_err(c, _flags, BCH_FSCK_ERR_##_err_type, \ -+ __VA_ARGS__); \ -+ \ -+ if (_ret != -BCH_ERR_fsck_fix && \ -+ _ret != -BCH_ERR_fsck_ignore) { \ -+ ret = _ret; \ -+ goto fsck_err; \ -+ } \ -+ \ -+ _ret == -BCH_ERR_fsck_fix; \ -+}) -+ -+/* These macros return true if error should be fixed: */ -+ -+/* XXX: mark in superblock that filesystem contains errors, if we ignore: */ -+ -+#define __fsck_err_on(cond, c, _flags, _err_type, ...) \ -+ (unlikely(cond) ? __fsck_err(c, _flags, _err_type, __VA_ARGS__) : false) -+ -+#define need_fsck_err_on(cond, c, _err_type, ...) \ -+ __fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__) -+ -+#define need_fsck_err(c, _err_type, ...) \ -+ __fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__) -+ -+#define mustfix_fsck_err(c, _err_type, ...) \ -+ __fsck_err(c, FSCK_CAN_FIX, _err_type, __VA_ARGS__) -+ -+#define mustfix_fsck_err_on(cond, c, _err_type, ...) \ -+ __fsck_err_on(cond, c, FSCK_CAN_FIX, _err_type, __VA_ARGS__) -+ -+#define fsck_err(c, _err_type, ...) \ -+ __fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -+ -+#define fsck_err_on(cond, c, _err_type, ...) \ -+ __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -+ -+static inline void bch2_bkey_fsck_err(struct bch_fs *c, -+ struct printbuf *err_msg, -+ enum bch_sb_error_id err_type, -+ const char *fmt, ...) -+{ -+ va_list args; -+ -+ va_start(args, fmt); -+ prt_vprintf(err_msg, fmt, args); -+ va_end(args); -+ -+} -+ -+#define bkey_fsck_err(c, _err_msg, _err_type, ...) \ -+do { \ -+ prt_printf(_err_msg, __VA_ARGS__); \ -+ bch2_sb_error_count(c, BCH_FSCK_ERR_##_err_type); \ -+ ret = -BCH_ERR_invalid_bkey; \ -+ goto fsck_err; \ -+} while (0) -+ -+#define bkey_fsck_err_on(cond, ...) \ -+do { \ -+ if (unlikely(cond)) \ -+ bkey_fsck_err(__VA_ARGS__); \ -+} while (0) -+ -+/* -+ * Fatal errors: these don't indicate a bug, but we can't continue running in RW -+ * mode - pretty much just due to metadata IO errors: -+ */ -+ -+void bch2_fatal_error(struct bch_fs *); -+ -+#define bch2_fs_fatal_error(c, ...) \ -+do { \ -+ bch_err(c, __VA_ARGS__); \ -+ bch2_fatal_error(c); \ -+} while (0) -+ -+#define bch2_fs_fatal_err_on(cond, c, ...) \ -+({ \ -+ bool _ret = unlikely(!!(cond)); \ -+ \ -+ if (_ret) \ -+ bch2_fs_fatal_error(c, __VA_ARGS__); \ -+ _ret; \ -+}) -+ -+/* -+ * IO errors: either recoverable metadata IO (because we have replicas), or data -+ * IO - we need to log it and print out a message, but we don't (necessarily) -+ * want to shut down the fs: -+ */ -+ -+void bch2_io_error_work(struct work_struct *); -+ -+/* Does the error handling without logging a message */ -+void bch2_io_error(struct bch_dev *, enum bch_member_error_type); -+ -+#define bch2_dev_io_err_on(cond, ca, _type, ...) \ -+({ \ -+ bool _ret = (cond); \ -+ \ -+ if (_ret) { \ -+ bch_err_dev_ratelimited(ca, __VA_ARGS__); \ -+ bch2_io_error(ca, _type); \ -+ } \ -+ _ret; \ -+}) -+ -+#define bch2_dev_inum_io_err_on(cond, ca, _type, ...) \ -+({ \ -+ bool _ret = (cond); \ -+ \ -+ if (_ret) { \ -+ bch_err_inum_offset_ratelimited(ca, __VA_ARGS__); \ -+ bch2_io_error(ca, _type); \ -+ } \ -+ _ret; \ -+}) -+ -+#endif /* _BCACHEFS_ERROR_H */ -diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c -new file mode 100644 -index 000000000000..21af6fb8cecf ---- /dev/null -+++ b/fs/bcachefs/extent_update.c -@@ -0,0 +1,173 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include "bcachefs.h" -+#include "btree_update.h" -+#include "btree_update_interior.h" -+#include "buckets.h" -+#include "debug.h" -+#include "extents.h" -+#include "extent_update.h" -+ -+/* -+ * This counts the number of iterators to the alloc & ec btrees we'll need -+ * inserting/removing this extent: -+ */ -+static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ unsigned ret = 0, lru = 0; -+ -+ bkey_extent_entry_for_each(ptrs, entry) { -+ switch (__extent_entry_type(entry)) { -+ case BCH_EXTENT_ENTRY_ptr: -+ /* Might also be updating LRU btree */ -+ if (entry->ptr.cached) -+ lru++; -+ -+ fallthrough; -+ case BCH_EXTENT_ENTRY_stripe_ptr: -+ ret++; -+ } -+ } -+ -+ /* -+ * Updating keys in the alloc btree may also update keys in the -+ * freespace or discard btrees: -+ */ -+ return lru + ret * 2; -+} -+ -+static int count_iters_for_insert(struct btree_trans *trans, -+ struct bkey_s_c k, -+ unsigned offset, -+ struct bpos *end, -+ unsigned *nr_iters, -+ unsigned max_iters) -+{ -+ int ret = 0, ret2 = 0; -+ -+ if (*nr_iters >= max_iters) { -+ *end = bpos_min(*end, k.k->p); -+ ret = 1; -+ } -+ -+ switch (k.k->type) { -+ case KEY_TYPE_extent: -+ case KEY_TYPE_reflink_v: -+ *nr_iters += bch2_bkey_nr_alloc_ptrs(k); -+ -+ if (*nr_iters >= max_iters) { -+ *end = bpos_min(*end, k.k->p); -+ ret = 1; -+ } -+ -+ break; -+ case KEY_TYPE_reflink_p: { -+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); -+ u64 idx = le64_to_cpu(p.v->idx); -+ unsigned sectors = bpos_min(*end, p.k->p).offset - -+ bkey_start_offset(p.k); -+ struct btree_iter iter; -+ struct bkey_s_c r_k; -+ -+ for_each_btree_key_norestart(trans, iter, -+ BTREE_ID_reflink, POS(0, idx + offset), -+ BTREE_ITER_SLOTS, r_k, ret2) { -+ if (bkey_ge(bkey_start_pos(r_k.k), POS(0, idx + sectors))) -+ break; -+ -+ /* extent_update_to_keys(), for the reflink_v update */ -+ *nr_iters += 1; -+ -+ *nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k); -+ -+ if (*nr_iters >= max_iters) { -+ struct bpos pos = bkey_start_pos(k.k); -+ pos.offset += min_t(u64, k.k->size, -+ r_k.k->p.offset - idx); -+ -+ *end = bpos_min(*end, pos); -+ ret = 1; -+ break; -+ } -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ break; -+ } -+ } -+ -+ return ret2 ?: ret; -+} -+ -+#define EXTENT_ITERS_MAX (BTREE_ITER_MAX / 3) -+ -+int bch2_extent_atomic_end(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_i *insert, -+ struct bpos *end) -+{ -+ struct btree_iter copy; -+ struct bkey_s_c k; -+ unsigned nr_iters = 0; -+ int ret; -+ -+ ret = bch2_btree_iter_traverse(iter); -+ if (ret) -+ return ret; -+ -+ *end = insert->k.p; -+ -+ /* extent_update_to_keys(): */ -+ nr_iters += 1; -+ -+ ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end, -+ &nr_iters, EXTENT_ITERS_MAX / 2); -+ if (ret < 0) -+ return ret; -+ -+ bch2_trans_copy_iter(©, iter); -+ -+ for_each_btree_key_upto_continue_norestart(copy, insert->k.p, 0, k, ret) { -+ unsigned offset = 0; -+ -+ if (bkey_gt(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) -+ offset = bkey_start_offset(&insert->k) - -+ bkey_start_offset(k.k); -+ -+ /* extent_handle_overwrites(): */ -+ switch (bch2_extent_overlap(&insert->k, k.k)) { -+ case BCH_EXTENT_OVERLAP_ALL: -+ case BCH_EXTENT_OVERLAP_FRONT: -+ nr_iters += 1; -+ break; -+ case BCH_EXTENT_OVERLAP_BACK: -+ case BCH_EXTENT_OVERLAP_MIDDLE: -+ nr_iters += 2; -+ break; -+ } -+ -+ ret = count_iters_for_insert(trans, k, offset, end, -+ &nr_iters, EXTENT_ITERS_MAX); -+ if (ret) -+ break; -+ } -+ -+ bch2_trans_iter_exit(trans, ©); -+ return ret < 0 ? ret : 0; -+} -+ -+int bch2_extent_trim_atomic(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_i *k) -+{ -+ struct bpos end; -+ int ret; -+ -+ ret = bch2_extent_atomic_end(trans, iter, k, &end); -+ if (ret) -+ return ret; -+ -+ bch2_cut_back(end, k); -+ return 0; -+} -diff --git a/fs/bcachefs/extent_update.h b/fs/bcachefs/extent_update.h -new file mode 100644 -index 000000000000..6f5cf449361a ---- /dev/null -+++ b/fs/bcachefs/extent_update.h -@@ -0,0 +1,12 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_EXTENT_UPDATE_H -+#define _BCACHEFS_EXTENT_UPDATE_H -+ -+#include "bcachefs.h" -+ -+int bch2_extent_atomic_end(struct btree_trans *, struct btree_iter *, -+ struct bkey_i *, struct bpos *); -+int bch2_extent_trim_atomic(struct btree_trans *, struct btree_iter *, -+ struct bkey_i *); -+ -+#endif /* _BCACHEFS_EXTENT_UPDATE_H */ -diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c -new file mode 100644 -index 000000000000..a864de231b69 ---- /dev/null -+++ b/fs/bcachefs/extents.c -@@ -0,0 +1,1516 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2010 Kent Overstreet -+ * -+ * Code for managing the extent btree and dynamically updating the writeback -+ * dirty sector count. -+ */ -+ -+#include "bcachefs.h" -+#include "bkey_methods.h" -+#include "btree_gc.h" -+#include "btree_io.h" -+#include "btree_iter.h" -+#include "buckets.h" -+#include "checksum.h" -+#include "compress.h" -+#include "debug.h" -+#include "disk_groups.h" -+#include "error.h" -+#include "extents.h" -+#include "inode.h" -+#include "journal.h" -+#include "replicas.h" -+#include "super.h" -+#include "super-io.h" -+#include "trace.h" -+#include "util.h" -+ -+static unsigned bch2_crc_field_size_max[] = { -+ [BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX, -+ [BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX, -+ [BCH_EXTENT_ENTRY_crc128] = CRC128_SIZE_MAX, -+}; -+ -+static void bch2_extent_crc_pack(union bch_extent_crc *, -+ struct bch_extent_crc_unpacked, -+ enum bch_extent_entry_type); -+ -+static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f, -+ unsigned dev) -+{ -+ struct bch_dev_io_failures *i; -+ -+ for (i = f->devs; i < f->devs + f->nr; i++) -+ if (i->dev == dev) -+ return i; -+ -+ return NULL; -+} -+ -+void bch2_mark_io_failure(struct bch_io_failures *failed, -+ struct extent_ptr_decoded *p) -+{ -+ struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev); -+ -+ if (!f) { -+ BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs)); -+ -+ f = &failed->devs[failed->nr++]; -+ f->dev = p->ptr.dev; -+ f->idx = p->idx; -+ f->nr_failed = 1; -+ f->nr_retries = 0; -+ } else if (p->idx != f->idx) { -+ f->idx = p->idx; -+ f->nr_failed = 1; -+ f->nr_retries = 0; -+ } else { -+ f->nr_failed++; -+ } -+} -+ -+/* -+ * returns true if p1 is better than p2: -+ */ -+static inline bool ptr_better(struct bch_fs *c, -+ const struct extent_ptr_decoded p1, -+ const struct extent_ptr_decoded p2) -+{ -+ if (likely(!p1.idx && !p2.idx)) { -+ struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev); -+ struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev); -+ -+ u64 l1 = atomic64_read(&dev1->cur_latency[READ]); -+ u64 l2 = atomic64_read(&dev2->cur_latency[READ]); -+ -+ /* Pick at random, biased in favor of the faster device: */ -+ -+ return bch2_rand_range(l1 + l2) > l1; -+ } -+ -+ if (bch2_force_reconstruct_read) -+ return p1.idx > p2.idx; -+ -+ return p1.idx < p2.idx; -+} -+ -+/* -+ * This picks a non-stale pointer, preferably from a device other than @avoid. -+ * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to -+ * other devices, it will still pick a pointer from avoid. -+ */ -+int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, -+ struct bch_io_failures *failed, -+ struct extent_ptr_decoded *pick) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ struct bch_dev_io_failures *f; -+ struct bch_dev *ca; -+ int ret = 0; -+ -+ if (k.k->type == KEY_TYPE_error) -+ return -EIO; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ /* -+ * Unwritten extent: no need to actually read, treat it as a -+ * hole and return 0s: -+ */ -+ if (p.ptr.unwritten) -+ return 0; -+ -+ ca = bch_dev_bkey_exists(c, p.ptr.dev); -+ -+ /* -+ * If there are any dirty pointers it's an error if we can't -+ * read: -+ */ -+ if (!ret && !p.ptr.cached) -+ ret = -EIO; -+ -+ if (p.ptr.cached && ptr_stale(ca, &p.ptr)) -+ continue; -+ -+ f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL; -+ if (f) -+ p.idx = f->nr_failed < f->nr_retries -+ ? f->idx -+ : f->idx + 1; -+ -+ if (!p.idx && -+ !bch2_dev_is_readable(ca)) -+ p.idx++; -+ -+ if (bch2_force_reconstruct_read && -+ !p.idx && p.has_ec) -+ p.idx++; -+ -+ if (p.idx >= (unsigned) p.has_ec + 1) -+ continue; -+ -+ if (ret > 0 && !ptr_better(c, p, *pick)) -+ continue; -+ -+ *pick = p; -+ ret = 1; -+ } -+ -+ return ret; -+} -+ -+/* KEY_TYPE_btree_ptr: */ -+ -+int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err, -+ btree_ptr_val_too_big, -+ "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); -+ -+ ret = bch2_bkey_ptrs_invalid(c, k, flags, err); -+fsck_err: -+ return ret; -+} -+ -+void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ bch2_bkey_ptrs_to_text(out, c, k); -+} -+ -+int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, c, err, -+ btree_ptr_v2_val_too_big, -+ "value too big (%zu > %zu)", -+ bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); -+ -+ ret = bch2_bkey_ptrs_invalid(c, k, flags, err); -+fsck_err: -+ return ret; -+} -+ -+void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); -+ -+ prt_printf(out, "seq %llx written %u min_key %s", -+ le64_to_cpu(bp.v->seq), -+ le16_to_cpu(bp.v->sectors_written), -+ BTREE_PTR_RANGE_UPDATED(bp.v) ? "R " : ""); -+ -+ bch2_bpos_to_text(out, bp.v->min_key); -+ prt_printf(out, " "); -+ bch2_bkey_ptrs_to_text(out, c, k); -+} -+ -+void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version, -+ unsigned big_endian, int write, -+ struct bkey_s k) -+{ -+ struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(k); -+ -+ compat_bpos(0, btree_id, version, big_endian, write, &bp.v->min_key); -+ -+ if (version < bcachefs_metadata_version_inode_btree_change && -+ btree_id_is_extents(btree_id) && -+ !bkey_eq(bp.v->min_key, POS_MIN)) -+ bp.v->min_key = write -+ ? bpos_nosnap_predecessor(bp.v->min_key) -+ : bpos_nosnap_successor(bp.v->min_key); -+} -+ -+/* KEY_TYPE_extent: */ -+ -+bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) -+{ -+ struct bkey_ptrs l_ptrs = bch2_bkey_ptrs(l); -+ struct bkey_ptrs_c r_ptrs = bch2_bkey_ptrs_c(r); -+ union bch_extent_entry *en_l; -+ const union bch_extent_entry *en_r; -+ struct extent_ptr_decoded lp, rp; -+ bool use_right_ptr; -+ struct bch_dev *ca; -+ -+ en_l = l_ptrs.start; -+ en_r = r_ptrs.start; -+ while (en_l < l_ptrs.end && en_r < r_ptrs.end) { -+ if (extent_entry_type(en_l) != extent_entry_type(en_r)) -+ return false; -+ -+ en_l = extent_entry_next(en_l); -+ en_r = extent_entry_next(en_r); -+ } -+ -+ if (en_l < l_ptrs.end || en_r < r_ptrs.end) -+ return false; -+ -+ en_l = l_ptrs.start; -+ en_r = r_ptrs.start; -+ lp.crc = bch2_extent_crc_unpack(l.k, NULL); -+ rp.crc = bch2_extent_crc_unpack(r.k, NULL); -+ -+ while (__bkey_ptr_next_decode(l.k, l_ptrs.end, lp, en_l) && -+ __bkey_ptr_next_decode(r.k, r_ptrs.end, rp, en_r)) { -+ if (lp.ptr.offset + lp.crc.offset + lp.crc.live_size != -+ rp.ptr.offset + rp.crc.offset || -+ lp.ptr.dev != rp.ptr.dev || -+ lp.ptr.gen != rp.ptr.gen || -+ lp.ptr.unwritten != rp.ptr.unwritten || -+ lp.has_ec != rp.has_ec) -+ return false; -+ -+ /* Extents may not straddle buckets: */ -+ ca = bch_dev_bkey_exists(c, lp.ptr.dev); -+ if (PTR_BUCKET_NR(ca, &lp.ptr) != PTR_BUCKET_NR(ca, &rp.ptr)) -+ return false; -+ -+ if (lp.has_ec != rp.has_ec || -+ (lp.has_ec && -+ (lp.ec.block != rp.ec.block || -+ lp.ec.redundancy != rp.ec.redundancy || -+ lp.ec.idx != rp.ec.idx))) -+ return false; -+ -+ if (lp.crc.compression_type != rp.crc.compression_type || -+ lp.crc.nonce != rp.crc.nonce) -+ return false; -+ -+ if (lp.crc.offset + lp.crc.live_size + rp.crc.live_size <= -+ lp.crc.uncompressed_size) { -+ /* can use left extent's crc entry */ -+ } else if (lp.crc.live_size <= rp.crc.offset) { -+ /* can use right extent's crc entry */ -+ } else { -+ /* check if checksums can be merged: */ -+ if (lp.crc.csum_type != rp.crc.csum_type || -+ lp.crc.nonce != rp.crc.nonce || -+ crc_is_compressed(lp.crc) || -+ !bch2_checksum_mergeable(lp.crc.csum_type)) -+ return false; -+ -+ if (lp.crc.offset + lp.crc.live_size != lp.crc.compressed_size || -+ rp.crc.offset) -+ return false; -+ -+ if (lp.crc.csum_type && -+ lp.crc.uncompressed_size + -+ rp.crc.uncompressed_size > (c->opts.encoded_extent_max >> 9)) -+ return false; -+ } -+ -+ en_l = extent_entry_next(en_l); -+ en_r = extent_entry_next(en_r); -+ } -+ -+ en_l = l_ptrs.start; -+ en_r = r_ptrs.start; -+ while (en_l < l_ptrs.end && en_r < r_ptrs.end) { -+ if (extent_entry_is_crc(en_l)) { -+ struct bch_extent_crc_unpacked crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l)); -+ struct bch_extent_crc_unpacked crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r)); -+ -+ if (crc_l.uncompressed_size + crc_r.uncompressed_size > -+ bch2_crc_field_size_max[extent_entry_type(en_l)]) -+ return false; -+ } -+ -+ en_l = extent_entry_next(en_l); -+ en_r = extent_entry_next(en_r); -+ } -+ -+ use_right_ptr = false; -+ en_l = l_ptrs.start; -+ en_r = r_ptrs.start; -+ while (en_l < l_ptrs.end) { -+ if (extent_entry_type(en_l) == BCH_EXTENT_ENTRY_ptr && -+ use_right_ptr) -+ en_l->ptr = en_r->ptr; -+ -+ if (extent_entry_is_crc(en_l)) { -+ struct bch_extent_crc_unpacked crc_l = -+ bch2_extent_crc_unpack(l.k, entry_to_crc(en_l)); -+ struct bch_extent_crc_unpacked crc_r = -+ bch2_extent_crc_unpack(r.k, entry_to_crc(en_r)); -+ -+ use_right_ptr = false; -+ -+ if (crc_l.offset + crc_l.live_size + crc_r.live_size <= -+ crc_l.uncompressed_size) { -+ /* can use left extent's crc entry */ -+ } else if (crc_l.live_size <= crc_r.offset) { -+ /* can use right extent's crc entry */ -+ crc_r.offset -= crc_l.live_size; -+ bch2_extent_crc_pack(entry_to_crc(en_l), crc_r, -+ extent_entry_type(en_l)); -+ use_right_ptr = true; -+ } else { -+ crc_l.csum = bch2_checksum_merge(crc_l.csum_type, -+ crc_l.csum, -+ crc_r.csum, -+ crc_r.uncompressed_size << 9); -+ -+ crc_l.uncompressed_size += crc_r.uncompressed_size; -+ crc_l.compressed_size += crc_r.compressed_size; -+ bch2_extent_crc_pack(entry_to_crc(en_l), crc_l, -+ extent_entry_type(en_l)); -+ } -+ } -+ -+ en_l = extent_entry_next(en_l); -+ en_r = extent_entry_next(en_r); -+ } -+ -+ bch2_key_resize(l.k, l.k->size + r.k->size); -+ return true; -+} -+ -+/* KEY_TYPE_reservation: */ -+ -+int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); -+ int ret = 0; -+ -+ bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err, -+ reservation_key_nr_replicas_invalid, -+ "invalid nr_replicas (%u)", r.v->nr_replicas); -+fsck_err: -+ return ret; -+} -+ -+void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); -+ -+ prt_printf(out, "generation %u replicas %u", -+ le32_to_cpu(r.v->generation), -+ r.v->nr_replicas); -+} -+ -+bool bch2_reservation_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r) -+{ -+ struct bkey_s_reservation l = bkey_s_to_reservation(_l); -+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(_r); -+ -+ if (l.v->generation != r.v->generation || -+ l.v->nr_replicas != r.v->nr_replicas) -+ return false; -+ -+ bch2_key_resize(l.k, l.k->size + r.k->size); -+ return true; -+} -+ -+/* Extent checksum entries: */ -+ -+/* returns true if not equal */ -+static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l, -+ struct bch_extent_crc_unpacked r) -+{ -+ return (l.csum_type != r.csum_type || -+ l.compression_type != r.compression_type || -+ l.compressed_size != r.compressed_size || -+ l.uncompressed_size != r.uncompressed_size || -+ l.offset != r.offset || -+ l.live_size != r.live_size || -+ l.nonce != r.nonce || -+ bch2_crc_cmp(l.csum, r.csum)); -+} -+ -+static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u, -+ struct bch_extent_crc_unpacked n) -+{ -+ return !crc_is_compressed(u) && -+ u.csum_type && -+ u.uncompressed_size > u.live_size && -+ bch2_csum_type_is_encryption(u.csum_type) == -+ bch2_csum_type_is_encryption(n.csum_type); -+} -+ -+bool bch2_can_narrow_extent_crcs(struct bkey_s_c k, -+ struct bch_extent_crc_unpacked n) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ struct bch_extent_crc_unpacked crc; -+ const union bch_extent_entry *i; -+ -+ if (!n.csum_type) -+ return false; -+ -+ bkey_for_each_crc(k.k, ptrs, crc, i) -+ if (can_narrow_crc(crc, n)) -+ return true; -+ -+ return false; -+} -+ -+/* -+ * We're writing another replica for this extent, so while we've got the data in -+ * memory we'll be computing a new checksum for the currently live data. -+ * -+ * If there are other replicas we aren't moving, and they are checksummed but -+ * not compressed, we can modify them to point to only the data that is -+ * currently live (so that readers won't have to bounce) while we've got the -+ * checksum we need: -+ */ -+bool bch2_bkey_narrow_crcs(struct bkey_i *k, struct bch_extent_crc_unpacked n) -+{ -+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k)); -+ struct bch_extent_crc_unpacked u; -+ struct extent_ptr_decoded p; -+ union bch_extent_entry *i; -+ bool ret = false; -+ -+ /* Find a checksum entry that covers only live data: */ -+ if (!n.csum_type) { -+ bkey_for_each_crc(&k->k, ptrs, u, i) -+ if (!crc_is_compressed(u) && -+ u.csum_type && -+ u.live_size == u.uncompressed_size) { -+ n = u; -+ goto found; -+ } -+ return false; -+ } -+found: -+ BUG_ON(crc_is_compressed(n)); -+ BUG_ON(n.offset); -+ BUG_ON(n.live_size != k->k.size); -+ -+restart_narrow_pointers: -+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(k)); -+ -+ bkey_for_each_ptr_decode(&k->k, ptrs, p, i) -+ if (can_narrow_crc(p.crc, n)) { -+ bch2_bkey_drop_ptr_noerror(bkey_i_to_s(k), &i->ptr); -+ p.ptr.offset += p.crc.offset; -+ p.crc = n; -+ bch2_extent_ptr_decoded_append(k, &p); -+ ret = true; -+ goto restart_narrow_pointers; -+ } -+ -+ return ret; -+} -+ -+static void bch2_extent_crc_pack(union bch_extent_crc *dst, -+ struct bch_extent_crc_unpacked src, -+ enum bch_extent_entry_type type) -+{ -+#define set_common_fields(_dst, _src) \ -+ _dst.type = 1 << type; \ -+ _dst.csum_type = _src.csum_type, \ -+ _dst.compression_type = _src.compression_type, \ -+ _dst._compressed_size = _src.compressed_size - 1, \ -+ _dst._uncompressed_size = _src.uncompressed_size - 1, \ -+ _dst.offset = _src.offset -+ -+ switch (type) { -+ case BCH_EXTENT_ENTRY_crc32: -+ set_common_fields(dst->crc32, src); -+ dst->crc32.csum = (u32 __force) *((__le32 *) &src.csum.lo); -+ break; -+ case BCH_EXTENT_ENTRY_crc64: -+ set_common_fields(dst->crc64, src); -+ dst->crc64.nonce = src.nonce; -+ dst->crc64.csum_lo = (u64 __force) src.csum.lo; -+ dst->crc64.csum_hi = (u64 __force) *((__le16 *) &src.csum.hi); -+ break; -+ case BCH_EXTENT_ENTRY_crc128: -+ set_common_fields(dst->crc128, src); -+ dst->crc128.nonce = src.nonce; -+ dst->crc128.csum = src.csum; -+ break; -+ default: -+ BUG(); -+ } -+#undef set_common_fields -+} -+ -+void bch2_extent_crc_append(struct bkey_i *k, -+ struct bch_extent_crc_unpacked new) -+{ -+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k)); -+ union bch_extent_crc *crc = (void *) ptrs.end; -+ enum bch_extent_entry_type type; -+ -+ if (bch_crc_bytes[new.csum_type] <= 4 && -+ new.uncompressed_size <= CRC32_SIZE_MAX && -+ new.nonce <= CRC32_NONCE_MAX) -+ type = BCH_EXTENT_ENTRY_crc32; -+ else if (bch_crc_bytes[new.csum_type] <= 10 && -+ new.uncompressed_size <= CRC64_SIZE_MAX && -+ new.nonce <= CRC64_NONCE_MAX) -+ type = BCH_EXTENT_ENTRY_crc64; -+ else if (bch_crc_bytes[new.csum_type] <= 16 && -+ new.uncompressed_size <= CRC128_SIZE_MAX && -+ new.nonce <= CRC128_NONCE_MAX) -+ type = BCH_EXTENT_ENTRY_crc128; -+ else -+ BUG(); -+ -+ bch2_extent_crc_pack(crc, new, type); -+ -+ k->k.u64s += extent_entry_u64s(ptrs.end); -+ -+ EBUG_ON(bkey_val_u64s(&k->k) > BKEY_EXTENT_VAL_U64s_MAX); -+} -+ -+/* Generic code for keys with pointers: */ -+ -+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k) -+{ -+ return bch2_bkey_devs(k).nr; -+} -+ -+unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c k) -+{ -+ return k.k->type == KEY_TYPE_reservation -+ ? bkey_s_c_to_reservation(k).v->nr_replicas -+ : bch2_bkey_dirty_devs(k).nr; -+} -+ -+unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c k) -+{ -+ unsigned ret = 0; -+ -+ if (k.k->type == KEY_TYPE_reservation) { -+ ret = bkey_s_c_to_reservation(k).v->nr_replicas; -+ } else { -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -+ ret += !p.ptr.cached && !crc_is_compressed(p.crc); -+ } -+ -+ return ret; -+} -+ -+unsigned bch2_bkey_sectors_compressed(struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ unsigned ret = 0; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -+ if (!p.ptr.cached && crc_is_compressed(p.crc)) -+ ret += p.crc.compressed_size; -+ -+ return ret; -+} -+ -+bool bch2_bkey_is_incompressible(struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct bch_extent_crc_unpacked crc; -+ -+ bkey_for_each_crc(k.k, ptrs, crc, entry) -+ if (crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) -+ return true; -+ return false; -+} -+ -+unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p = { 0 }; -+ unsigned replicas = 0; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ if (p.ptr.cached) -+ continue; -+ -+ if (p.has_ec) -+ replicas += p.ec.redundancy; -+ -+ replicas++; -+ -+ } -+ -+ return replicas; -+} -+ -+unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p) -+{ -+ struct bch_dev *ca; -+ -+ if (p->ptr.cached) -+ return 0; -+ -+ ca = bch_dev_bkey_exists(c, p->ptr.dev); -+ -+ return ca->mi.durability + -+ (p->has_ec -+ ? p->ec.redundancy -+ : 0); -+} -+ -+unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p) -+{ -+ struct bch_dev *ca; -+ -+ if (p->ptr.cached) -+ return 0; -+ -+ ca = bch_dev_bkey_exists(c, p->ptr.dev); -+ -+ if (ca->mi.state == BCH_MEMBER_STATE_failed) -+ return 0; -+ -+ return ca->mi.durability + -+ (p->has_ec -+ ? p->ec.redundancy -+ : 0); -+} -+ -+unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ unsigned durability = 0; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -+ durability += bch2_extent_ptr_durability(c, &p); -+ -+ return durability; -+} -+ -+static unsigned bch2_bkey_durability_safe(struct bch_fs *c, struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ unsigned durability = 0; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -+ if (p.ptr.dev < c->sb.nr_devices && c->devs[p.ptr.dev]) -+ durability += bch2_extent_ptr_durability(c, &p); -+ -+ return durability; -+} -+ -+void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry) -+{ -+ union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k)); -+ union bch_extent_entry *next = extent_entry_next(entry); -+ -+ memmove_u64s(entry, next, (u64 *) end - (u64 *) next); -+ k->k.u64s -= extent_entry_u64s(entry); -+} -+ -+void bch2_extent_ptr_decoded_append(struct bkey_i *k, -+ struct extent_ptr_decoded *p) -+{ -+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k)); -+ struct bch_extent_crc_unpacked crc = -+ bch2_extent_crc_unpack(&k->k, NULL); -+ union bch_extent_entry *pos; -+ -+ if (!bch2_crc_unpacked_cmp(crc, p->crc)) { -+ pos = ptrs.start; -+ goto found; -+ } -+ -+ bkey_for_each_crc(&k->k, ptrs, crc, pos) -+ if (!bch2_crc_unpacked_cmp(crc, p->crc)) { -+ pos = extent_entry_next(pos); -+ goto found; -+ } -+ -+ bch2_extent_crc_append(k, p->crc); -+ pos = bkey_val_end(bkey_i_to_s(k)); -+found: -+ p->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr; -+ __extent_entry_insert(k, pos, to_entry(&p->ptr)); -+ -+ if (p->has_ec) { -+ p->ec.type = 1 << BCH_EXTENT_ENTRY_stripe_ptr; -+ __extent_entry_insert(k, pos, to_entry(&p->ec)); -+ } -+} -+ -+static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs, -+ union bch_extent_entry *entry) -+{ -+ union bch_extent_entry *i = ptrs.start; -+ -+ if (i == entry) -+ return NULL; -+ -+ while (extent_entry_next(i) != entry) -+ i = extent_entry_next(i); -+ return i; -+} -+ -+/* -+ * Returns pointer to the next entry after the one being dropped: -+ */ -+union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k, -+ struct bch_extent_ptr *ptr) -+{ -+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); -+ union bch_extent_entry *entry = to_entry(ptr), *next; -+ union bch_extent_entry *ret = entry; -+ bool drop_crc = true; -+ -+ EBUG_ON(ptr < &ptrs.start->ptr || -+ ptr >= &ptrs.end->ptr); -+ EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr); -+ -+ for (next = extent_entry_next(entry); -+ next != ptrs.end; -+ next = extent_entry_next(next)) { -+ if (extent_entry_is_crc(next)) { -+ break; -+ } else if (extent_entry_is_ptr(next)) { -+ drop_crc = false; -+ break; -+ } -+ } -+ -+ extent_entry_drop(k, entry); -+ -+ while ((entry = extent_entry_prev(ptrs, entry))) { -+ if (extent_entry_is_ptr(entry)) -+ break; -+ -+ if ((extent_entry_is_crc(entry) && drop_crc) || -+ extent_entry_is_stripe_ptr(entry)) { -+ ret = (void *) ret - extent_entry_bytes(entry); -+ extent_entry_drop(k, entry); -+ } -+ } -+ -+ return ret; -+} -+ -+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k, -+ struct bch_extent_ptr *ptr) -+{ -+ bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr; -+ union bch_extent_entry *ret = -+ bch2_bkey_drop_ptr_noerror(k, ptr); -+ -+ /* -+ * If we deleted all the dirty pointers and there's still cached -+ * pointers, we could set the cached pointers to dirty if they're not -+ * stale - but to do that correctly we'd need to grab an open_bucket -+ * reference so that we don't race with bucket reuse: -+ */ -+ if (have_dirty && -+ !bch2_bkey_dirty_devs(k.s_c).nr) { -+ k.k->type = KEY_TYPE_error; -+ set_bkey_val_u64s(k.k, 0); -+ ret = NULL; -+ } else if (!bch2_bkey_nr_ptrs(k.s_c)) { -+ k.k->type = KEY_TYPE_deleted; -+ set_bkey_val_u64s(k.k, 0); -+ ret = NULL; -+ } -+ -+ return ret; -+} -+ -+void bch2_bkey_drop_device(struct bkey_s k, unsigned dev) -+{ -+ struct bch_extent_ptr *ptr; -+ -+ bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev); -+} -+ -+void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev) -+{ -+ struct bch_extent_ptr *ptr = bch2_bkey_has_device(k, dev); -+ -+ if (ptr) -+ bch2_bkey_drop_ptr_noerror(k, ptr); -+} -+ -+const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned dev) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const struct bch_extent_ptr *ptr; -+ -+ bkey_for_each_ptr(ptrs, ptr) -+ if (ptr->dev == dev) -+ return ptr; -+ -+ return NULL; -+} -+ -+bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const struct bch_extent_ptr *ptr; -+ -+ bkey_for_each_ptr(ptrs, ptr) -+ if (bch2_dev_in_target(c, ptr->dev, target) && -+ (!ptr->cached || -+ !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr))) -+ return true; -+ -+ return false; -+} -+ -+bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k, -+ struct bch_extent_ptr m, u64 offset) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -+ if (p.ptr.dev == m.dev && -+ p.ptr.gen == m.gen && -+ (s64) p.ptr.offset + p.crc.offset - bkey_start_offset(k.k) == -+ (s64) m.offset - offset) -+ return true; -+ -+ return false; -+} -+ -+/* -+ * Returns true if two extents refer to the same data: -+ */ -+bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2) -+{ -+ if (k1.k->type != k2.k->type) -+ return false; -+ -+ if (bkey_extent_is_direct_data(k1.k)) { -+ struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(k1); -+ struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2); -+ const union bch_extent_entry *entry1, *entry2; -+ struct extent_ptr_decoded p1, p2; -+ -+ if (bkey_extent_is_unwritten(k1) != bkey_extent_is_unwritten(k2)) -+ return false; -+ -+ bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1) -+ bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2) -+ if (p1.ptr.dev == p2.ptr.dev && -+ p1.ptr.gen == p2.ptr.gen && -+ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) == -+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k)) -+ return true; -+ -+ return false; -+ } else { -+ /* KEY_TYPE_deleted, etc. */ -+ return true; -+ } -+} -+ -+struct bch_extent_ptr * -+bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bkey_s k2) -+{ -+ struct bkey_ptrs ptrs2 = bch2_bkey_ptrs(k2); -+ union bch_extent_entry *entry2; -+ struct extent_ptr_decoded p2; -+ -+ bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2) -+ if (p1.ptr.dev == p2.ptr.dev && -+ p1.ptr.gen == p2.ptr.gen && -+ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) == -+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k)) -+ return &entry2->ptr; -+ -+ return NULL; -+} -+ -+void bch2_extent_ptr_set_cached(struct bkey_s k, struct bch_extent_ptr *ptr) -+{ -+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); -+ union bch_extent_entry *entry; -+ union bch_extent_entry *ec = NULL; -+ -+ bkey_extent_entry_for_each(ptrs, entry) { -+ if (&entry->ptr == ptr) { -+ ptr->cached = true; -+ if (ec) -+ extent_entry_drop(k, ec); -+ return; -+ } -+ -+ if (extent_entry_is_stripe_ptr(entry)) -+ ec = entry; -+ else if (extent_entry_is_ptr(entry)) -+ ec = NULL; -+ } -+ -+ BUG(); -+} -+ -+/* -+ * bch_extent_normalize - clean up an extent, dropping stale pointers etc. -+ * -+ * Returns true if @k should be dropped entirely -+ * -+ * For existing keys, only called when btree nodes are being rewritten, not when -+ * they're merely being compacted/resorted in memory. -+ */ -+bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) -+{ -+ struct bch_extent_ptr *ptr; -+ -+ bch2_bkey_drop_ptrs(k, ptr, -+ ptr->cached && -+ ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)); -+ -+ return bkey_deleted(k.k); -+} -+ -+void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ bool first = true; -+ -+ if (c) -+ prt_printf(out, "durability: %u ", bch2_bkey_durability_safe(c, k)); -+ -+ bkey_extent_entry_for_each(ptrs, entry) { -+ if (!first) -+ prt_printf(out, " "); -+ -+ switch (__extent_entry_type(entry)) { -+ case BCH_EXTENT_ENTRY_ptr: { -+ const struct bch_extent_ptr *ptr = entry_to_ptr(entry); -+ struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] -+ ? bch_dev_bkey_exists(c, ptr->dev) -+ : NULL; -+ -+ if (!ca) { -+ prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev, -+ (u64) ptr->offset, ptr->gen, -+ ptr->cached ? " cached" : ""); -+ } else { -+ u32 offset; -+ u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); -+ -+ prt_printf(out, "ptr: %u:%llu:%u gen %u", -+ ptr->dev, b, offset, ptr->gen); -+ if (ptr->cached) -+ prt_str(out, " cached"); -+ if (ptr->unwritten) -+ prt_str(out, " unwritten"); -+ if (ca && ptr_stale(ca, ptr)) -+ prt_printf(out, " stale"); -+ } -+ break; -+ } -+ case BCH_EXTENT_ENTRY_crc32: -+ case BCH_EXTENT_ENTRY_crc64: -+ case BCH_EXTENT_ENTRY_crc128: { -+ struct bch_extent_crc_unpacked crc = -+ bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); -+ -+ prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s", -+ crc.compressed_size, -+ crc.uncompressed_size, -+ crc.offset, crc.nonce, -+ bch2_csum_types[crc.csum_type], -+ bch2_compression_types[crc.compression_type]); -+ break; -+ } -+ case BCH_EXTENT_ENTRY_stripe_ptr: { -+ const struct bch_extent_stripe_ptr *ec = &entry->stripe_ptr; -+ -+ prt_printf(out, "ec: idx %llu block %u", -+ (u64) ec->idx, ec->block); -+ break; -+ } -+ case BCH_EXTENT_ENTRY_rebalance: { -+ const struct bch_extent_rebalance *r = &entry->rebalance; -+ -+ prt_str(out, "rebalance: target "); -+ if (c) -+ bch2_target_to_text(out, c, r->target); -+ else -+ prt_printf(out, "%u", r->target); -+ prt_str(out, " compression "); -+ bch2_compression_opt_to_text(out, r->compression); -+ break; -+ } -+ default: -+ prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); -+ return; -+ } -+ -+ first = false; -+ } -+} -+ -+static int extent_ptr_invalid(struct bch_fs *c, -+ struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ const struct bch_extent_ptr *ptr, -+ unsigned size_ondisk, -+ bool metadata, -+ struct printbuf *err) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const struct bch_extent_ptr *ptr2; -+ u64 bucket; -+ u32 bucket_offset; -+ struct bch_dev *ca; -+ int ret = 0; -+ -+ if (!bch2_dev_exists2(c, ptr->dev)) { -+ /* -+ * If we're in the write path this key might have already been -+ * overwritten, and we could be seeing a device that doesn't -+ * exist anymore due to racing with device removal: -+ */ -+ if (flags & BKEY_INVALID_WRITE) -+ return 0; -+ -+ bkey_fsck_err(c, err, ptr_to_invalid_device, -+ "pointer to invalid device (%u)", ptr->dev); -+ } -+ -+ ca = bch_dev_bkey_exists(c, ptr->dev); -+ bkey_for_each_ptr(ptrs, ptr2) -+ bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err, -+ ptr_to_duplicate_device, -+ "multiple pointers to same device (%u)", ptr->dev); -+ -+ bucket = sector_to_bucket_and_offset(ca, ptr->offset, &bucket_offset); -+ -+ bkey_fsck_err_on(bucket >= ca->mi.nbuckets, c, err, -+ ptr_after_last_bucket, -+ "pointer past last bucket (%llu > %llu)", bucket, ca->mi.nbuckets); -+ bkey_fsck_err_on(ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket), c, err, -+ ptr_before_first_bucket, -+ "pointer before first bucket (%llu < %u)", bucket, ca->mi.first_bucket); -+ bkey_fsck_err_on(bucket_offset + size_ondisk > ca->mi.bucket_size, c, err, -+ ptr_spans_multiple_buckets, -+ "pointer spans multiple buckets (%u + %u > %u)", -+ bucket_offset, size_ondisk, ca->mi.bucket_size); -+fsck_err: -+ return ret; -+} -+ -+int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct bch_extent_crc_unpacked crc; -+ unsigned size_ondisk = k.k->size; -+ unsigned nonce = UINT_MAX; -+ unsigned nr_ptrs = 0; -+ bool have_written = false, have_unwritten = false, have_ec = false, crc_since_last_ptr = false; -+ int ret = 0; -+ -+ if (bkey_is_btree_ptr(k.k)) -+ size_ondisk = btree_sectors(c); -+ -+ bkey_extent_entry_for_each(ptrs, entry) { -+ bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err, -+ extent_ptrs_invalid_entry, -+ "invalid extent entry type (got %u, max %u)", -+ __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); -+ -+ bkey_fsck_err_on(bkey_is_btree_ptr(k.k) && -+ !extent_entry_is_ptr(entry), c, err, -+ btree_ptr_has_non_ptr, -+ "has non ptr field"); -+ -+ switch (extent_entry_type(entry)) { -+ case BCH_EXTENT_ENTRY_ptr: -+ ret = extent_ptr_invalid(c, k, flags, &entry->ptr, -+ size_ondisk, false, err); -+ if (ret) -+ return ret; -+ -+ bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err, -+ ptr_cached_and_erasure_coded, -+ "cached, erasure coded ptr"); -+ -+ if (!entry->ptr.unwritten) -+ have_written = true; -+ else -+ have_unwritten = true; -+ -+ have_ec = false; -+ crc_since_last_ptr = false; -+ nr_ptrs++; -+ break; -+ case BCH_EXTENT_ENTRY_crc32: -+ case BCH_EXTENT_ENTRY_crc64: -+ case BCH_EXTENT_ENTRY_crc128: -+ crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); -+ -+ bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err, -+ ptr_crc_uncompressed_size_too_small, -+ "checksum offset + key size > uncompressed size"); -+ bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err, -+ ptr_crc_csum_type_unknown, -+ "invalid checksum type"); -+ bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err, -+ ptr_crc_compression_type_unknown, -+ "invalid compression type"); -+ -+ if (bch2_csum_type_is_encryption(crc.csum_type)) { -+ if (nonce == UINT_MAX) -+ nonce = crc.offset + crc.nonce; -+ else if (nonce != crc.offset + crc.nonce) -+ bkey_fsck_err(c, err, ptr_crc_nonce_mismatch, -+ "incorrect nonce"); -+ } -+ -+ bkey_fsck_err_on(crc_since_last_ptr, c, err, -+ ptr_crc_redundant, -+ "redundant crc entry"); -+ crc_since_last_ptr = true; -+ -+ bkey_fsck_err_on(crc_is_encoded(crc) && -+ (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && -+ (flags & (BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT)), c, err, -+ ptr_crc_uncompressed_size_too_big, -+ "too large encoded extent"); -+ -+ size_ondisk = crc.compressed_size; -+ break; -+ case BCH_EXTENT_ENTRY_stripe_ptr: -+ bkey_fsck_err_on(have_ec, c, err, -+ ptr_stripe_redundant, -+ "redundant stripe entry"); -+ have_ec = true; -+ break; -+ case BCH_EXTENT_ENTRY_rebalance: { -+ const struct bch_extent_rebalance *r = &entry->rebalance; -+ -+ if (!bch2_compression_opt_valid(r->compression)) { -+ struct bch_compression_opt opt = __bch2_compression_decode(r->compression); -+ prt_printf(err, "invalid compression opt %u:%u", -+ opt.type, opt.level); -+ return -BCH_ERR_invalid_bkey; -+ } -+ break; -+ } -+ } -+ } -+ -+ bkey_fsck_err_on(!nr_ptrs, c, err, -+ extent_ptrs_no_ptrs, -+ "no ptrs"); -+ bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err, -+ extent_ptrs_too_many_ptrs, -+ "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX); -+ bkey_fsck_err_on(have_written && have_unwritten, c, err, -+ extent_ptrs_written_and_unwritten, -+ "extent with unwritten and written ptrs"); -+ bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err, -+ extent_ptrs_unwritten, -+ "has unwritten ptrs"); -+ bkey_fsck_err_on(crc_since_last_ptr, c, err, -+ extent_ptrs_redundant_crc, -+ "redundant crc entry"); -+ bkey_fsck_err_on(have_ec, c, err, -+ extent_ptrs_redundant_stripe, -+ "redundant stripe entry"); -+fsck_err: -+ return ret; -+} -+ -+void bch2_ptr_swab(struct bkey_s k) -+{ -+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); -+ union bch_extent_entry *entry; -+ u64 *d; -+ -+ for (d = (u64 *) ptrs.start; -+ d != (u64 *) ptrs.end; -+ d++) -+ *d = swab64(*d); -+ -+ for (entry = ptrs.start; -+ entry < ptrs.end; -+ entry = extent_entry_next(entry)) { -+ switch (extent_entry_type(entry)) { -+ case BCH_EXTENT_ENTRY_ptr: -+ break; -+ case BCH_EXTENT_ENTRY_crc32: -+ entry->crc32.csum = swab32(entry->crc32.csum); -+ break; -+ case BCH_EXTENT_ENTRY_crc64: -+ entry->crc64.csum_hi = swab16(entry->crc64.csum_hi); -+ entry->crc64.csum_lo = swab64(entry->crc64.csum_lo); -+ break; -+ case BCH_EXTENT_ENTRY_crc128: -+ entry->crc128.csum.hi = (__force __le64) -+ swab64((__force u64) entry->crc128.csum.hi); -+ entry->crc128.csum.lo = (__force __le64) -+ swab64((__force u64) entry->crc128.csum.lo); -+ break; -+ case BCH_EXTENT_ENTRY_stripe_ptr: -+ break; -+ case BCH_EXTENT_ENTRY_rebalance: -+ break; -+ } -+ } -+} -+ -+const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ -+ bkey_extent_entry_for_each(ptrs, entry) -+ if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance) -+ return &entry->rebalance; -+ -+ return NULL; -+} -+ -+unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, -+ unsigned target, unsigned compression) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ unsigned rewrite_ptrs = 0; -+ -+ if (compression) { -+ unsigned compression_type = bch2_compression_opt_to_type(compression); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ unsigned i = 0; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) { -+ rewrite_ptrs = 0; -+ goto incompressible; -+ } -+ -+ if (!p.ptr.cached && p.crc.compression_type != compression_type) -+ rewrite_ptrs |= 1U << i; -+ i++; -+ } -+ } -+incompressible: -+ if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { -+ const struct bch_extent_ptr *ptr; -+ unsigned i = 0; -+ -+ bkey_for_each_ptr(ptrs, ptr) { -+ if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target)) -+ rewrite_ptrs |= 1U << i; -+ i++; -+ } -+ } -+ -+ return rewrite_ptrs; -+} -+ -+bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k) -+{ -+ const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); -+ -+ /* -+ * If it's an indirect extent, we don't delete the rebalance entry when -+ * done so that we know what options were applied - check if it still -+ * needs work done: -+ */ -+ if (r && -+ k.k->type == KEY_TYPE_reflink_v && -+ !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression)) -+ r = NULL; -+ -+ return r != NULL; -+} -+ -+int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, -+ unsigned target, unsigned compression) -+{ -+ struct bkey_s k = bkey_i_to_s(_k); -+ struct bch_extent_rebalance *r; -+ bool needs_rebalance; -+ -+ if (!bkey_extent_is_direct_data(k.k)) -+ return 0; -+ -+ /* get existing rebalance entry: */ -+ r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); -+ if (r) { -+ if (k.k->type == KEY_TYPE_reflink_v) { -+ /* -+ * indirect extents: existing options take precedence, -+ * so that we don't move extents back and forth if -+ * they're referenced by different inodes with different -+ * options: -+ */ -+ if (r->target) -+ target = r->target; -+ if (r->compression) -+ compression = r->compression; -+ } -+ -+ r->target = target; -+ r->compression = compression; -+ } -+ -+ needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression); -+ -+ if (needs_rebalance && !r) { -+ union bch_extent_entry *new = bkey_val_end(k); -+ -+ new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance; -+ new->rebalance.compression = compression; -+ new->rebalance.target = target; -+ new->rebalance.unused = 0; -+ k.k->u64s += extent_entry_u64s(new); -+ } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) { -+ /* -+ * For indirect extents, don't delete the rebalance entry when -+ * we're finished so that we know we specifically moved it or -+ * compressed it to its current location/compression type -+ */ -+ extent_entry_drop(k, (union bch_extent_entry *) r); -+ } -+ -+ return 0; -+} -+ -+/* Generic extent code: */ -+ -+int bch2_cut_front_s(struct bpos where, struct bkey_s k) -+{ -+ unsigned new_val_u64s = bkey_val_u64s(k.k); -+ int val_u64s_delta; -+ u64 sub; -+ -+ if (bkey_le(where, bkey_start_pos(k.k))) -+ return 0; -+ -+ EBUG_ON(bkey_gt(where, k.k->p)); -+ -+ sub = where.offset - bkey_start_offset(k.k); -+ -+ k.k->size -= sub; -+ -+ if (!k.k->size) { -+ k.k->type = KEY_TYPE_deleted; -+ new_val_u64s = 0; -+ } -+ -+ switch (k.k->type) { -+ case KEY_TYPE_extent: -+ case KEY_TYPE_reflink_v: { -+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); -+ union bch_extent_entry *entry; -+ bool seen_crc = false; -+ -+ bkey_extent_entry_for_each(ptrs, entry) { -+ switch (extent_entry_type(entry)) { -+ case BCH_EXTENT_ENTRY_ptr: -+ if (!seen_crc) -+ entry->ptr.offset += sub; -+ break; -+ case BCH_EXTENT_ENTRY_crc32: -+ entry->crc32.offset += sub; -+ break; -+ case BCH_EXTENT_ENTRY_crc64: -+ entry->crc64.offset += sub; -+ break; -+ case BCH_EXTENT_ENTRY_crc128: -+ entry->crc128.offset += sub; -+ break; -+ case BCH_EXTENT_ENTRY_stripe_ptr: -+ break; -+ case BCH_EXTENT_ENTRY_rebalance: -+ break; -+ } -+ -+ if (extent_entry_is_crc(entry)) -+ seen_crc = true; -+ } -+ -+ break; -+ } -+ case KEY_TYPE_reflink_p: { -+ struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k); -+ -+ le64_add_cpu(&p.v->idx, sub); -+ break; -+ } -+ case KEY_TYPE_inline_data: -+ case KEY_TYPE_indirect_inline_data: { -+ void *p = bkey_inline_data_p(k); -+ unsigned bytes = bkey_inline_data_bytes(k.k); -+ -+ sub = min_t(u64, sub << 9, bytes); -+ -+ memmove(p, p + sub, bytes - sub); -+ -+ new_val_u64s -= sub >> 3; -+ break; -+ } -+ } -+ -+ val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s; -+ BUG_ON(val_u64s_delta < 0); -+ -+ set_bkey_val_u64s(k.k, new_val_u64s); -+ memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64)); -+ return -val_u64s_delta; -+} -+ -+int bch2_cut_back_s(struct bpos where, struct bkey_s k) -+{ -+ unsigned new_val_u64s = bkey_val_u64s(k.k); -+ int val_u64s_delta; -+ u64 len = 0; -+ -+ if (bkey_ge(where, k.k->p)) -+ return 0; -+ -+ EBUG_ON(bkey_lt(where, bkey_start_pos(k.k))); -+ -+ len = where.offset - bkey_start_offset(k.k); -+ -+ k.k->p.offset = where.offset; -+ k.k->size = len; -+ -+ if (!len) { -+ k.k->type = KEY_TYPE_deleted; -+ new_val_u64s = 0; -+ } -+ -+ switch (k.k->type) { -+ case KEY_TYPE_inline_data: -+ case KEY_TYPE_indirect_inline_data: -+ new_val_u64s = (bkey_inline_data_offset(k.k) + -+ min(bkey_inline_data_bytes(k.k), k.k->size << 9)) >> 3; -+ break; -+ } -+ -+ val_u64s_delta = bkey_val_u64s(k.k) - new_val_u64s; -+ BUG_ON(val_u64s_delta < 0); -+ -+ set_bkey_val_u64s(k.k, new_val_u64s); -+ memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64)); -+ return -val_u64s_delta; -+} -diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h -new file mode 100644 -index 000000000000..a2ce8a3be13c ---- /dev/null -+++ b/fs/bcachefs/extents.h -@@ -0,0 +1,765 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_EXTENTS_H -+#define _BCACHEFS_EXTENTS_H -+ -+#include "bcachefs.h" -+#include "bkey.h" -+#include "extents_types.h" -+ -+struct bch_fs; -+struct btree_trans; -+enum bkey_invalid_flags; -+ -+/* extent entries: */ -+ -+#define extent_entry_last(_e) \ -+ ((typeof(&(_e).v->start[0])) bkey_val_end(_e)) -+ -+#define entry_to_ptr(_entry) \ -+({ \ -+ EBUG_ON((_entry) && !extent_entry_is_ptr(_entry)); \ -+ \ -+ __builtin_choose_expr( \ -+ type_is_exact(_entry, const union bch_extent_entry *), \ -+ (const struct bch_extent_ptr *) (_entry), \ -+ (struct bch_extent_ptr *) (_entry)); \ -+}) -+ -+/* downcast, preserves const */ -+#define to_entry(_entry) \ -+({ \ -+ BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) && \ -+ !type_is(_entry, struct bch_extent_ptr *) && \ -+ !type_is(_entry, struct bch_extent_stripe_ptr *)); \ -+ \ -+ __builtin_choose_expr( \ -+ (type_is_exact(_entry, const union bch_extent_crc *) || \ -+ type_is_exact(_entry, const struct bch_extent_ptr *) ||\ -+ type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\ -+ (const union bch_extent_entry *) (_entry), \ -+ (union bch_extent_entry *) (_entry)); \ -+}) -+ -+#define extent_entry_next(_entry) \ -+ ((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry))) -+ -+static inline unsigned -+__extent_entry_type(const union bch_extent_entry *e) -+{ -+ return e->type ? __ffs(e->type) : BCH_EXTENT_ENTRY_MAX; -+} -+ -+static inline enum bch_extent_entry_type -+extent_entry_type(const union bch_extent_entry *e) -+{ -+ int ret = __ffs(e->type); -+ -+ EBUG_ON(ret < 0 || ret >= BCH_EXTENT_ENTRY_MAX); -+ -+ return ret; -+} -+ -+static inline size_t extent_entry_bytes(const union bch_extent_entry *entry) -+{ -+ switch (extent_entry_type(entry)) { -+#define x(f, n) \ -+ case BCH_EXTENT_ENTRY_##f: \ -+ return sizeof(struct bch_extent_##f); -+ BCH_EXTENT_ENTRY_TYPES() -+#undef x -+ default: -+ BUG(); -+ } -+} -+ -+static inline size_t extent_entry_u64s(const union bch_extent_entry *entry) -+{ -+ return extent_entry_bytes(entry) / sizeof(u64); -+} -+ -+static inline void __extent_entry_insert(struct bkey_i *k, -+ union bch_extent_entry *dst, -+ union bch_extent_entry *new) -+{ -+ union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k)); -+ -+ memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new), -+ dst, (u64 *) end - (u64 *) dst); -+ k->k.u64s += extent_entry_u64s(new); -+ memcpy_u64s_small(dst, new, extent_entry_u64s(new)); -+} -+ -+static inline void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry) -+{ -+ union bch_extent_entry *next = extent_entry_next(entry); -+ -+ /* stripes have ptrs, but their layout doesn't work with this code */ -+ BUG_ON(k.k->type == KEY_TYPE_stripe); -+ -+ memmove_u64s_down(entry, next, -+ (u64 *) bkey_val_end(k) - (u64 *) next); -+ k.k->u64s -= (u64 *) next - (u64 *) entry; -+} -+ -+static inline bool extent_entry_is_ptr(const union bch_extent_entry *e) -+{ -+ return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr; -+} -+ -+static inline bool extent_entry_is_stripe_ptr(const union bch_extent_entry *e) -+{ -+ return extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr; -+} -+ -+static inline bool extent_entry_is_crc(const union bch_extent_entry *e) -+{ -+ switch (extent_entry_type(e)) { -+ case BCH_EXTENT_ENTRY_crc32: -+ case BCH_EXTENT_ENTRY_crc64: -+ case BCH_EXTENT_ENTRY_crc128: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+union bch_extent_crc { -+ u8 type; -+ struct bch_extent_crc32 crc32; -+ struct bch_extent_crc64 crc64; -+ struct bch_extent_crc128 crc128; -+}; -+ -+#define __entry_to_crc(_entry) \ -+ __builtin_choose_expr( \ -+ type_is_exact(_entry, const union bch_extent_entry *), \ -+ (const union bch_extent_crc *) (_entry), \ -+ (union bch_extent_crc *) (_entry)) -+ -+#define entry_to_crc(_entry) \ -+({ \ -+ EBUG_ON((_entry) && !extent_entry_is_crc(_entry)); \ -+ \ -+ __entry_to_crc(_entry); \ -+}) -+ -+static inline struct bch_extent_crc_unpacked -+bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) -+{ -+#define common_fields(_crc) \ -+ .csum_type = _crc.csum_type, \ -+ .compression_type = _crc.compression_type, \ -+ .compressed_size = _crc._compressed_size + 1, \ -+ .uncompressed_size = _crc._uncompressed_size + 1, \ -+ .offset = _crc.offset, \ -+ .live_size = k->size -+ -+ if (!crc) -+ return (struct bch_extent_crc_unpacked) { -+ .compressed_size = k->size, -+ .uncompressed_size = k->size, -+ .live_size = k->size, -+ }; -+ -+ switch (extent_entry_type(to_entry(crc))) { -+ case BCH_EXTENT_ENTRY_crc32: { -+ struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) { -+ common_fields(crc->crc32), -+ }; -+ -+ *((__le32 *) &ret.csum.lo) = (__le32 __force) crc->crc32.csum; -+ return ret; -+ } -+ case BCH_EXTENT_ENTRY_crc64: { -+ struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) { -+ common_fields(crc->crc64), -+ .nonce = crc->crc64.nonce, -+ .csum.lo = (__force __le64) crc->crc64.csum_lo, -+ }; -+ -+ *((__le16 *) &ret.csum.hi) = (__le16 __force) crc->crc64.csum_hi; -+ -+ return ret; -+ } -+ case BCH_EXTENT_ENTRY_crc128: { -+ struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) { -+ common_fields(crc->crc128), -+ .nonce = crc->crc128.nonce, -+ .csum = crc->crc128.csum, -+ }; -+ -+ return ret; -+ } -+ default: -+ BUG(); -+ } -+#undef common_fields -+} -+ -+static inline bool crc_is_compressed(struct bch_extent_crc_unpacked crc) -+{ -+ return (crc.compression_type != BCH_COMPRESSION_TYPE_none && -+ crc.compression_type != BCH_COMPRESSION_TYPE_incompressible); -+} -+ -+static inline bool crc_is_encoded(struct bch_extent_crc_unpacked crc) -+{ -+ return crc.csum_type != BCH_CSUM_none || crc_is_compressed(crc); -+} -+ -+/* bkey_ptrs: generically over any key type that has ptrs */ -+ -+struct bkey_ptrs_c { -+ const union bch_extent_entry *start; -+ const union bch_extent_entry *end; -+}; -+ -+struct bkey_ptrs { -+ union bch_extent_entry *start; -+ union bch_extent_entry *end; -+}; -+ -+static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k) -+{ -+ switch (k.k->type) { -+ case KEY_TYPE_btree_ptr: { -+ struct bkey_s_c_btree_ptr e = bkey_s_c_to_btree_ptr(k); -+ -+ return (struct bkey_ptrs_c) { -+ to_entry(&e.v->start[0]), -+ to_entry(extent_entry_last(e)) -+ }; -+ } -+ case KEY_TYPE_extent: { -+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k); -+ -+ return (struct bkey_ptrs_c) { -+ e.v->start, -+ extent_entry_last(e) -+ }; -+ } -+ case KEY_TYPE_stripe: { -+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); -+ -+ return (struct bkey_ptrs_c) { -+ to_entry(&s.v->ptrs[0]), -+ to_entry(&s.v->ptrs[s.v->nr_blocks]), -+ }; -+ } -+ case KEY_TYPE_reflink_v: { -+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k); -+ -+ return (struct bkey_ptrs_c) { -+ r.v->start, -+ bkey_val_end(r), -+ }; -+ } -+ case KEY_TYPE_btree_ptr_v2: { -+ struct bkey_s_c_btree_ptr_v2 e = bkey_s_c_to_btree_ptr_v2(k); -+ -+ return (struct bkey_ptrs_c) { -+ to_entry(&e.v->start[0]), -+ to_entry(extent_entry_last(e)) -+ }; -+ } -+ default: -+ return (struct bkey_ptrs_c) { NULL, NULL }; -+ } -+} -+ -+static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k) -+{ -+ struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k.s_c); -+ -+ return (struct bkey_ptrs) { -+ (void *) p.start, -+ (void *) p.end -+ }; -+} -+ -+#define __bkey_extent_entry_for_each_from(_start, _end, _entry) \ -+ for ((_entry) = (_start); \ -+ (_entry) < (_end); \ -+ (_entry) = extent_entry_next(_entry)) -+ -+#define __bkey_ptr_next(_ptr, _end) \ -+({ \ -+ typeof(_end) _entry; \ -+ \ -+ __bkey_extent_entry_for_each_from(to_entry(_ptr), _end, _entry) \ -+ if (extent_entry_is_ptr(_entry)) \ -+ break; \ -+ \ -+ _entry < (_end) ? entry_to_ptr(_entry) : NULL; \ -+}) -+ -+#define bkey_extent_entry_for_each_from(_p, _entry, _start) \ -+ __bkey_extent_entry_for_each_from(_start, (_p).end, _entry) -+ -+#define bkey_extent_entry_for_each(_p, _entry) \ -+ bkey_extent_entry_for_each_from(_p, _entry, _p.start) -+ -+#define __bkey_for_each_ptr(_start, _end, _ptr) \ -+ for ((_ptr) = (_start); \ -+ ((_ptr) = __bkey_ptr_next(_ptr, _end)); \ -+ (_ptr)++) -+ -+#define bkey_ptr_next(_p, _ptr) \ -+ __bkey_ptr_next(_ptr, (_p).end) -+ -+#define bkey_for_each_ptr(_p, _ptr) \ -+ __bkey_for_each_ptr(&(_p).start->ptr, (_p).end, _ptr) -+ -+#define __bkey_ptr_next_decode(_k, _end, _ptr, _entry) \ -+({ \ -+ __label__ out; \ -+ \ -+ (_ptr).idx = 0; \ -+ (_ptr).has_ec = false; \ -+ \ -+ __bkey_extent_entry_for_each_from(_entry, _end, _entry) \ -+ switch (extent_entry_type(_entry)) { \ -+ case BCH_EXTENT_ENTRY_ptr: \ -+ (_ptr).ptr = _entry->ptr; \ -+ goto out; \ -+ case BCH_EXTENT_ENTRY_crc32: \ -+ case BCH_EXTENT_ENTRY_crc64: \ -+ case BCH_EXTENT_ENTRY_crc128: \ -+ (_ptr).crc = bch2_extent_crc_unpack(_k, \ -+ entry_to_crc(_entry)); \ -+ break; \ -+ case BCH_EXTENT_ENTRY_stripe_ptr: \ -+ (_ptr).ec = _entry->stripe_ptr; \ -+ (_ptr).has_ec = true; \ -+ break; \ -+ default: \ -+ /* nothing */ \ -+ break; \ -+ } \ -+out: \ -+ _entry < (_end); \ -+}) -+ -+#define __bkey_for_each_ptr_decode(_k, _start, _end, _ptr, _entry) \ -+ for ((_ptr).crc = bch2_extent_crc_unpack(_k, NULL), \ -+ (_entry) = _start; \ -+ __bkey_ptr_next_decode(_k, _end, _ptr, _entry); \ -+ (_entry) = extent_entry_next(_entry)) -+ -+#define bkey_for_each_ptr_decode(_k, _p, _ptr, _entry) \ -+ __bkey_for_each_ptr_decode(_k, (_p).start, (_p).end, \ -+ _ptr, _entry) -+ -+#define bkey_crc_next(_k, _start, _end, _crc, _iter) \ -+({ \ -+ __bkey_extent_entry_for_each_from(_iter, _end, _iter) \ -+ if (extent_entry_is_crc(_iter)) { \ -+ (_crc) = bch2_extent_crc_unpack(_k, \ -+ entry_to_crc(_iter)); \ -+ break; \ -+ } \ -+ \ -+ (_iter) < (_end); \ -+}) -+ -+#define __bkey_for_each_crc(_k, _start, _end, _crc, _iter) \ -+ for ((_crc) = bch2_extent_crc_unpack(_k, NULL), \ -+ (_iter) = (_start); \ -+ bkey_crc_next(_k, _start, _end, _crc, _iter); \ -+ (_iter) = extent_entry_next(_iter)) -+ -+#define bkey_for_each_crc(_k, _p, _crc, _iter) \ -+ __bkey_for_each_crc(_k, (_p).start, (_p).end, _crc, _iter) -+ -+/* Iterate over pointers in KEY_TYPE_extent: */ -+ -+#define extent_for_each_entry_from(_e, _entry, _start) \ -+ __bkey_extent_entry_for_each_from(_start, \ -+ extent_entry_last(_e), _entry) -+ -+#define extent_for_each_entry(_e, _entry) \ -+ extent_for_each_entry_from(_e, _entry, (_e).v->start) -+ -+#define extent_ptr_next(_e, _ptr) \ -+ __bkey_ptr_next(_ptr, extent_entry_last(_e)) -+ -+#define extent_for_each_ptr(_e, _ptr) \ -+ __bkey_for_each_ptr(&(_e).v->start->ptr, extent_entry_last(_e), _ptr) -+ -+#define extent_for_each_ptr_decode(_e, _ptr, _entry) \ -+ __bkey_for_each_ptr_decode((_e).k, (_e).v->start, \ -+ extent_entry_last(_e), _ptr, _entry) -+ -+/* utility code common to all keys with pointers: */ -+ -+void bch2_mark_io_failure(struct bch_io_failures *, -+ struct extent_ptr_decoded *); -+int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, -+ struct bch_io_failures *, -+ struct extent_ptr_decoded *); -+ -+/* KEY_TYPE_btree_ptr: */ -+ -+int bch2_btree_ptr_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, -+ struct bkey_s_c); -+ -+int bch2_btree_ptr_v2_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, -+ int, struct bkey_s); -+ -+#define bch2_bkey_ops_btree_ptr ((struct bkey_ops) { \ -+ .key_invalid = bch2_btree_ptr_invalid, \ -+ .val_to_text = bch2_btree_ptr_to_text, \ -+ .swab = bch2_ptr_swab, \ -+ .trans_trigger = bch2_trans_mark_extent, \ -+ .atomic_trigger = bch2_mark_extent, \ -+}) -+ -+#define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) { \ -+ .key_invalid = bch2_btree_ptr_v2_invalid, \ -+ .val_to_text = bch2_btree_ptr_v2_to_text, \ -+ .swab = bch2_ptr_swab, \ -+ .compat = bch2_btree_ptr_v2_compat, \ -+ .trans_trigger = bch2_trans_mark_extent, \ -+ .atomic_trigger = bch2_mark_extent, \ -+ .min_val_size = 40, \ -+}) -+ -+/* KEY_TYPE_extent: */ -+ -+bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); -+ -+#define bch2_bkey_ops_extent ((struct bkey_ops) { \ -+ .key_invalid = bch2_bkey_ptrs_invalid, \ -+ .val_to_text = bch2_bkey_ptrs_to_text, \ -+ .swab = bch2_ptr_swab, \ -+ .key_normalize = bch2_extent_normalize, \ -+ .key_merge = bch2_extent_merge, \ -+ .trans_trigger = bch2_trans_mark_extent, \ -+ .atomic_trigger = bch2_mark_extent, \ -+}) -+ -+/* KEY_TYPE_reservation: */ -+ -+int bch2_reservation_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); -+ -+#define bch2_bkey_ops_reservation ((struct bkey_ops) { \ -+ .key_invalid = bch2_reservation_invalid, \ -+ .val_to_text = bch2_reservation_to_text, \ -+ .key_merge = bch2_reservation_merge, \ -+ .trans_trigger = bch2_trans_mark_reservation, \ -+ .atomic_trigger = bch2_mark_reservation, \ -+ .min_val_size = 8, \ -+}) -+ -+/* Extent checksum entries: */ -+ -+bool bch2_can_narrow_extent_crcs(struct bkey_s_c, -+ struct bch_extent_crc_unpacked); -+bool bch2_bkey_narrow_crcs(struct bkey_i *, struct bch_extent_crc_unpacked); -+void bch2_extent_crc_append(struct bkey_i *, -+ struct bch_extent_crc_unpacked); -+ -+/* Generic code for keys with pointers: */ -+ -+static inline bool bkey_is_btree_ptr(const struct bkey *k) -+{ -+ switch (k->type) { -+ case KEY_TYPE_btree_ptr: -+ case KEY_TYPE_btree_ptr_v2: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static inline bool bkey_extent_is_direct_data(const struct bkey *k) -+{ -+ switch (k->type) { -+ case KEY_TYPE_btree_ptr: -+ case KEY_TYPE_btree_ptr_v2: -+ case KEY_TYPE_extent: -+ case KEY_TYPE_reflink_v: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static inline bool bkey_extent_is_inline_data(const struct bkey *k) -+{ -+ return k->type == KEY_TYPE_inline_data || -+ k->type == KEY_TYPE_indirect_inline_data; -+} -+ -+static inline unsigned bkey_inline_data_offset(const struct bkey *k) -+{ -+ switch (k->type) { -+ case KEY_TYPE_inline_data: -+ return sizeof(struct bch_inline_data); -+ case KEY_TYPE_indirect_inline_data: -+ return sizeof(struct bch_indirect_inline_data); -+ default: -+ BUG(); -+ } -+} -+ -+static inline unsigned bkey_inline_data_bytes(const struct bkey *k) -+{ -+ return bkey_val_bytes(k) - bkey_inline_data_offset(k); -+} -+ -+#define bkey_inline_data_p(_k) (((void *) (_k).v) + bkey_inline_data_offset((_k).k)) -+ -+static inline bool bkey_extent_is_data(const struct bkey *k) -+{ -+ return bkey_extent_is_direct_data(k) || -+ bkey_extent_is_inline_data(k) || -+ k->type == KEY_TYPE_reflink_p; -+} -+ -+/* -+ * Should extent be counted under inode->i_sectors? -+ */ -+static inline bool bkey_extent_is_allocation(const struct bkey *k) -+{ -+ switch (k->type) { -+ case KEY_TYPE_extent: -+ case KEY_TYPE_reservation: -+ case KEY_TYPE_reflink_p: -+ case KEY_TYPE_reflink_v: -+ case KEY_TYPE_inline_data: -+ case KEY_TYPE_indirect_inline_data: -+ case KEY_TYPE_error: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static inline bool bkey_extent_is_unwritten(struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const struct bch_extent_ptr *ptr; -+ -+ bkey_for_each_ptr(ptrs, ptr) -+ if (ptr->unwritten) -+ return true; -+ return false; -+} -+ -+static inline bool bkey_extent_is_reservation(struct bkey_s_c k) -+{ -+ return k.k->type == KEY_TYPE_reservation || -+ bkey_extent_is_unwritten(k); -+} -+ -+static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k) -+{ -+ struct bch_devs_list ret = (struct bch_devs_list) { 0 }; -+ struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); -+ const struct bch_extent_ptr *ptr; -+ -+ bkey_for_each_ptr(p, ptr) -+ ret.devs[ret.nr++] = ptr->dev; -+ -+ return ret; -+} -+ -+static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k) -+{ -+ struct bch_devs_list ret = (struct bch_devs_list) { 0 }; -+ struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); -+ const struct bch_extent_ptr *ptr; -+ -+ bkey_for_each_ptr(p, ptr) -+ if (!ptr->cached) -+ ret.devs[ret.nr++] = ptr->dev; -+ -+ return ret; -+} -+ -+static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k) -+{ -+ struct bch_devs_list ret = (struct bch_devs_list) { 0 }; -+ struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); -+ const struct bch_extent_ptr *ptr; -+ -+ bkey_for_each_ptr(p, ptr) -+ if (ptr->cached) -+ ret.devs[ret.nr++] = ptr->dev; -+ -+ return ret; -+} -+ -+static inline unsigned bch2_bkey_ptr_data_type(struct bkey_s_c k, const struct bch_extent_ptr *ptr) -+{ -+ switch (k.k->type) { -+ case KEY_TYPE_btree_ptr: -+ case KEY_TYPE_btree_ptr_v2: -+ return BCH_DATA_btree; -+ case KEY_TYPE_extent: -+ case KEY_TYPE_reflink_v: -+ return BCH_DATA_user; -+ case KEY_TYPE_stripe: { -+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); -+ -+ BUG_ON(ptr < s.v->ptrs || -+ ptr >= s.v->ptrs + s.v->nr_blocks); -+ -+ return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant -+ ? BCH_DATA_parity -+ : BCH_DATA_user; -+ } -+ default: -+ BUG(); -+ } -+} -+ -+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c); -+unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c); -+unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c); -+bool bch2_bkey_is_incompressible(struct bkey_s_c); -+unsigned bch2_bkey_sectors_compressed(struct bkey_s_c); -+ -+unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c); -+unsigned bch2_extent_ptr_desired_durability(struct bch_fs *, struct extent_ptr_decoded *); -+unsigned bch2_extent_ptr_durability(struct bch_fs *, struct extent_ptr_decoded *); -+unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c); -+ -+void bch2_bkey_drop_device(struct bkey_s, unsigned); -+void bch2_bkey_drop_device_noerror(struct bkey_s, unsigned); -+ -+const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c, unsigned); -+ -+static inline struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s k, unsigned dev) -+{ -+ return (void *) bch2_bkey_has_device_c(k.s_c, dev); -+} -+ -+bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned); -+ -+void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *); -+ -+static inline void bch2_bkey_append_ptr(struct bkey_i *k, struct bch_extent_ptr ptr) -+{ -+ struct bch_extent_ptr *dest; -+ -+ EBUG_ON(bch2_bkey_has_device(bkey_i_to_s(k), ptr.dev)); -+ -+ switch (k->k.type) { -+ case KEY_TYPE_btree_ptr: -+ case KEY_TYPE_btree_ptr_v2: -+ case KEY_TYPE_extent: -+ EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX); -+ -+ ptr.type = 1 << BCH_EXTENT_ENTRY_ptr; -+ dest = (struct bch_extent_ptr *)((void *) &k->v + bkey_val_bytes(&k->k)); -+ *dest = ptr; -+ k->k.u64s++; -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+void bch2_extent_ptr_decoded_append(struct bkey_i *, -+ struct extent_ptr_decoded *); -+union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s, -+ struct bch_extent_ptr *); -+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s, -+ struct bch_extent_ptr *); -+ -+#define bch2_bkey_drop_ptrs(_k, _ptr, _cond) \ -+do { \ -+ struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k); \ -+ \ -+ _ptr = &_ptrs.start->ptr; \ -+ \ -+ while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) { \ -+ if (_cond) { \ -+ _ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr); \ -+ _ptrs = bch2_bkey_ptrs(_k); \ -+ continue; \ -+ } \ -+ \ -+ (_ptr)++; \ -+ } \ -+} while (0) -+ -+bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c, -+ struct bch_extent_ptr, u64); -+bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c); -+struct bch_extent_ptr * -+bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s); -+ -+void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *); -+ -+bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); -+void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, -+ struct bkey_s_c); -+int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+ -+void bch2_ptr_swab(struct bkey_s); -+ -+const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c); -+unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c, -+ unsigned, unsigned); -+bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c); -+ -+int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *, -+ unsigned, unsigned); -+ -+/* Generic extent code: */ -+ -+enum bch_extent_overlap { -+ BCH_EXTENT_OVERLAP_ALL = 0, -+ BCH_EXTENT_OVERLAP_BACK = 1, -+ BCH_EXTENT_OVERLAP_FRONT = 2, -+ BCH_EXTENT_OVERLAP_MIDDLE = 3, -+}; -+ -+/* Returns how k overlaps with m */ -+static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k, -+ const struct bkey *m) -+{ -+ int cmp1 = bkey_lt(k->p, m->p); -+ int cmp2 = bkey_gt(bkey_start_pos(k), bkey_start_pos(m)); -+ -+ return (cmp1 << 1) + cmp2; -+} -+ -+int bch2_cut_front_s(struct bpos, struct bkey_s); -+int bch2_cut_back_s(struct bpos, struct bkey_s); -+ -+static inline void bch2_cut_front(struct bpos where, struct bkey_i *k) -+{ -+ bch2_cut_front_s(where, bkey_i_to_s(k)); -+} -+ -+static inline void bch2_cut_back(struct bpos where, struct bkey_i *k) -+{ -+ bch2_cut_back_s(where, bkey_i_to_s(k)); -+} -+ -+/** -+ * bch_key_resize - adjust size of @k -+ * -+ * bkey_start_offset(k) will be preserved, modifies where the extent ends -+ */ -+static inline void bch2_key_resize(struct bkey *k, unsigned new_size) -+{ -+ k->p.offset -= k->size; -+ k->p.offset += new_size; -+ k->size = new_size; -+} -+ -+#endif /* _BCACHEFS_EXTENTS_H */ -diff --git a/fs/bcachefs/extents_types.h b/fs/bcachefs/extents_types.h -new file mode 100644 -index 000000000000..43d6c341ecca ---- /dev/null -+++ b/fs/bcachefs/extents_types.h -@@ -0,0 +1,40 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_EXTENTS_TYPES_H -+#define _BCACHEFS_EXTENTS_TYPES_H -+ -+#include "bcachefs_format.h" -+ -+struct bch_extent_crc_unpacked { -+ u32 compressed_size; -+ u32 uncompressed_size; -+ u32 live_size; -+ -+ u8 csum_type; -+ u8 compression_type; -+ -+ u16 offset; -+ -+ u16 nonce; -+ -+ struct bch_csum csum; -+}; -+ -+struct extent_ptr_decoded { -+ unsigned idx; -+ bool has_ec; -+ struct bch_extent_crc_unpacked crc; -+ struct bch_extent_ptr ptr; -+ struct bch_extent_stripe_ptr ec; -+}; -+ -+struct bch_io_failures { -+ u8 nr; -+ struct bch_dev_io_failures { -+ u8 dev; -+ u8 idx; -+ u8 nr_failed; -+ u8 nr_retries; -+ } devs[BCH_REPLICAS_MAX]; -+}; -+ -+#endif /* _BCACHEFS_EXTENTS_TYPES_H */ -diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h -new file mode 100644 -index 000000000000..05429c9631cd ---- /dev/null -+++ b/fs/bcachefs/eytzinger.h -@@ -0,0 +1,281 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _EYTZINGER_H -+#define _EYTZINGER_H -+ -+#include -+#include -+ -+#include "util.h" -+ -+/* -+ * Traversal for trees in eytzinger layout - a full binary tree layed out in an -+ * array -+ */ -+ -+/* -+ * One based indexing version: -+ * -+ * With one based indexing each level of the tree starts at a power of two - -+ * good for cacheline alignment: -+ */ -+ -+static inline unsigned eytzinger1_child(unsigned i, unsigned child) -+{ -+ EBUG_ON(child > 1); -+ -+ return (i << 1) + child; -+} -+ -+static inline unsigned eytzinger1_left_child(unsigned i) -+{ -+ return eytzinger1_child(i, 0); -+} -+ -+static inline unsigned eytzinger1_right_child(unsigned i) -+{ -+ return eytzinger1_child(i, 1); -+} -+ -+static inline unsigned eytzinger1_first(unsigned size) -+{ -+ return rounddown_pow_of_two(size); -+} -+ -+static inline unsigned eytzinger1_last(unsigned size) -+{ -+ return rounddown_pow_of_two(size + 1) - 1; -+} -+ -+/* -+ * eytzinger1_next() and eytzinger1_prev() have the nice properties that -+ * -+ * eytzinger1_next(0) == eytzinger1_first()) -+ * eytzinger1_prev(0) == eytzinger1_last()) -+ * -+ * eytzinger1_prev(eytzinger1_first()) == 0 -+ * eytzinger1_next(eytzinger1_last()) == 0 -+ */ -+ -+static inline unsigned eytzinger1_next(unsigned i, unsigned size) -+{ -+ EBUG_ON(i > size); -+ -+ if (eytzinger1_right_child(i) <= size) { -+ i = eytzinger1_right_child(i); -+ -+ i <<= __fls(size + 1) - __fls(i); -+ i >>= i > size; -+ } else { -+ i >>= ffz(i) + 1; -+ } -+ -+ return i; -+} -+ -+static inline unsigned eytzinger1_prev(unsigned i, unsigned size) -+{ -+ EBUG_ON(i > size); -+ -+ if (eytzinger1_left_child(i) <= size) { -+ i = eytzinger1_left_child(i) + 1; -+ -+ i <<= __fls(size + 1) - __fls(i); -+ i -= 1; -+ i >>= i > size; -+ } else { -+ i >>= __ffs(i) + 1; -+ } -+ -+ return i; -+} -+ -+static inline unsigned eytzinger1_extra(unsigned size) -+{ -+ return (size + 1 - rounddown_pow_of_two(size)) << 1; -+} -+ -+static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, -+ unsigned extra) -+{ -+ unsigned b = __fls(i); -+ unsigned shift = __fls(size) - b; -+ int s; -+ -+ EBUG_ON(!i || i > size); -+ -+ i ^= 1U << b; -+ i <<= 1; -+ i |= 1; -+ i <<= shift; -+ -+ /* -+ * sign bit trick: -+ * -+ * if (i > extra) -+ * i -= (i - extra) >> 1; -+ */ -+ s = extra - i; -+ i += (s >> 1) & (s >> 31); -+ -+ return i; -+} -+ -+static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size, -+ unsigned extra) -+{ -+ unsigned shift; -+ int s; -+ -+ EBUG_ON(!i || i > size); -+ -+ /* -+ * sign bit trick: -+ * -+ * if (i > extra) -+ * i += i - extra; -+ */ -+ s = extra - i; -+ i -= s & (s >> 31); -+ -+ shift = __ffs(i); -+ -+ i >>= shift + 1; -+ i |= 1U << (__fls(size) - shift); -+ -+ return i; -+} -+ -+static inline unsigned eytzinger1_to_inorder(unsigned i, unsigned size) -+{ -+ return __eytzinger1_to_inorder(i, size, eytzinger1_extra(size)); -+} -+ -+static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size) -+{ -+ return __inorder_to_eytzinger1(i, size, eytzinger1_extra(size)); -+} -+ -+#define eytzinger1_for_each(_i, _size) \ -+ for ((_i) = eytzinger1_first((_size)); \ -+ (_i) != 0; \ -+ (_i) = eytzinger1_next((_i), (_size))) -+ -+/* Zero based indexing version: */ -+ -+static inline unsigned eytzinger0_child(unsigned i, unsigned child) -+{ -+ EBUG_ON(child > 1); -+ -+ return (i << 1) + 1 + child; -+} -+ -+static inline unsigned eytzinger0_left_child(unsigned i) -+{ -+ return eytzinger0_child(i, 0); -+} -+ -+static inline unsigned eytzinger0_right_child(unsigned i) -+{ -+ return eytzinger0_child(i, 1); -+} -+ -+static inline unsigned eytzinger0_first(unsigned size) -+{ -+ return eytzinger1_first(size) - 1; -+} -+ -+static inline unsigned eytzinger0_last(unsigned size) -+{ -+ return eytzinger1_last(size) - 1; -+} -+ -+static inline unsigned eytzinger0_next(unsigned i, unsigned size) -+{ -+ return eytzinger1_next(i + 1, size) - 1; -+} -+ -+static inline unsigned eytzinger0_prev(unsigned i, unsigned size) -+{ -+ return eytzinger1_prev(i + 1, size) - 1; -+} -+ -+static inline unsigned eytzinger0_extra(unsigned size) -+{ -+ return eytzinger1_extra(size); -+} -+ -+static inline unsigned __eytzinger0_to_inorder(unsigned i, unsigned size, -+ unsigned extra) -+{ -+ return __eytzinger1_to_inorder(i + 1, size, extra) - 1; -+} -+ -+static inline unsigned __inorder_to_eytzinger0(unsigned i, unsigned size, -+ unsigned extra) -+{ -+ return __inorder_to_eytzinger1(i + 1, size, extra) - 1; -+} -+ -+static inline unsigned eytzinger0_to_inorder(unsigned i, unsigned size) -+{ -+ return __eytzinger0_to_inorder(i, size, eytzinger0_extra(size)); -+} -+ -+static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size) -+{ -+ return __inorder_to_eytzinger0(i, size, eytzinger0_extra(size)); -+} -+ -+#define eytzinger0_for_each(_i, _size) \ -+ for ((_i) = eytzinger0_first((_size)); \ -+ (_i) != -1; \ -+ (_i) = eytzinger0_next((_i), (_size))) -+ -+typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size); -+ -+/* return greatest node <= @search, or -1 if not found */ -+static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, -+ eytzinger_cmp_fn cmp, const void *search) -+{ -+ unsigned i, n = 0; -+ -+ if (!nr) -+ return -1; -+ -+ do { -+ i = n; -+ n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0); -+ } while (n < nr); -+ -+ if (n & 1) { -+ /* @i was greater than @search, return previous node: */ -+ -+ if (i == eytzinger0_first(nr)) -+ return -1; -+ -+ return eytzinger0_prev(i, nr); -+ } else { -+ return i; -+ } -+} -+ -+#define eytzinger0_find(base, nr, size, _cmp, search) \ -+({ \ -+ void *_base = (base); \ -+ void *_search = (search); \ -+ size_t _nr = (nr); \ -+ size_t _size = (size); \ -+ size_t _i = 0; \ -+ int _res; \ -+ \ -+ while (_i < _nr && \ -+ (_res = _cmp(_search, _base + _i * _size, _size))) \ -+ _i = eytzinger0_child(_i, _res > 0); \ -+ _i; \ -+}) -+ -+void eytzinger0_sort(void *, size_t, size_t, -+ int (*cmp_func)(const void *, const void *, size_t), -+ void (*swap_func)(void *, void *, size_t)); -+ -+#endif /* _EYTZINGER_H */ -diff --git a/fs/bcachefs/fifo.h b/fs/bcachefs/fifo.h -new file mode 100644 -index 000000000000..66b945be10c2 ---- /dev/null -+++ b/fs/bcachefs/fifo.h -@@ -0,0 +1,127 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_FIFO_H -+#define _BCACHEFS_FIFO_H -+ -+#include "util.h" -+ -+#define FIFO(type) \ -+struct { \ -+ size_t front, back, size, mask; \ -+ type *data; \ -+} -+ -+#define DECLARE_FIFO(type, name) FIFO(type) name -+ -+#define fifo_buf_size(fifo) \ -+ ((fifo)->size \ -+ ? roundup_pow_of_two((fifo)->size) * sizeof((fifo)->data[0]) \ -+ : 0) -+ -+#define init_fifo(fifo, _size, _gfp) \ -+({ \ -+ (fifo)->front = (fifo)->back = 0; \ -+ (fifo)->size = (_size); \ -+ (fifo)->mask = (fifo)->size \ -+ ? roundup_pow_of_two((fifo)->size) - 1 \ -+ : 0; \ -+ (fifo)->data = kvpmalloc(fifo_buf_size(fifo), (_gfp)); \ -+}) -+ -+#define free_fifo(fifo) \ -+do { \ -+ kvpfree((fifo)->data, fifo_buf_size(fifo)); \ -+ (fifo)->data = NULL; \ -+} while (0) -+ -+#define fifo_swap(l, r) \ -+do { \ -+ swap((l)->front, (r)->front); \ -+ swap((l)->back, (r)->back); \ -+ swap((l)->size, (r)->size); \ -+ swap((l)->mask, (r)->mask); \ -+ swap((l)->data, (r)->data); \ -+} while (0) -+ -+#define fifo_move(dest, src) \ -+do { \ -+ typeof(*((dest)->data)) _t; \ -+ while (!fifo_full(dest) && \ -+ fifo_pop(src, _t)) \ -+ fifo_push(dest, _t); \ -+} while (0) -+ -+#define fifo_used(fifo) (((fifo)->back - (fifo)->front)) -+#define fifo_free(fifo) ((fifo)->size - fifo_used(fifo)) -+ -+#define fifo_empty(fifo) ((fifo)->front == (fifo)->back) -+#define fifo_full(fifo) (fifo_used(fifo) == (fifo)->size) -+ -+#define fifo_peek_front(fifo) ((fifo)->data[(fifo)->front & (fifo)->mask]) -+#define fifo_peek_back(fifo) ((fifo)->data[((fifo)->back - 1) & (fifo)->mask]) -+ -+#define fifo_entry_idx_abs(fifo, p) \ -+ ((((p) >= &fifo_peek_front(fifo) \ -+ ? (fifo)->front : (fifo)->back) & ~(fifo)->mask) + \ -+ (((p) - (fifo)->data))) -+ -+#define fifo_entry_idx(fifo, p) (((p) - &fifo_peek_front(fifo)) & (fifo)->mask) -+#define fifo_idx_entry(fifo, i) ((fifo)->data[((fifo)->front + (i)) & (fifo)->mask]) -+ -+#define fifo_push_back_ref(f) \ -+ (fifo_full((f)) ? NULL : &(f)->data[(f)->back++ & (f)->mask]) -+ -+#define fifo_push_front_ref(f) \ -+ (fifo_full((f)) ? NULL : &(f)->data[--(f)->front & (f)->mask]) -+ -+#define fifo_push_back(fifo, new) \ -+({ \ -+ typeof((fifo)->data) _r = fifo_push_back_ref(fifo); \ -+ if (_r) \ -+ *_r = (new); \ -+ _r != NULL; \ -+}) -+ -+#define fifo_push_front(fifo, new) \ -+({ \ -+ typeof((fifo)->data) _r = fifo_push_front_ref(fifo); \ -+ if (_r) \ -+ *_r = (new); \ -+ _r != NULL; \ -+}) -+ -+#define fifo_pop_front(fifo, i) \ -+({ \ -+ bool _r = !fifo_empty((fifo)); \ -+ if (_r) \ -+ (i) = (fifo)->data[(fifo)->front++ & (fifo)->mask]; \ -+ _r; \ -+}) -+ -+#define fifo_pop_back(fifo, i) \ -+({ \ -+ bool _r = !fifo_empty((fifo)); \ -+ if (_r) \ -+ (i) = (fifo)->data[--(fifo)->back & (fifo)->mask]; \ -+ _r; \ -+}) -+ -+#define fifo_push_ref(fifo) fifo_push_back_ref(fifo) -+#define fifo_push(fifo, i) fifo_push_back(fifo, (i)) -+#define fifo_pop(fifo, i) fifo_pop_front(fifo, (i)) -+#define fifo_peek(fifo) fifo_peek_front(fifo) -+ -+#define fifo_for_each_entry(_entry, _fifo, _iter) \ -+ for (typecheck(typeof((_fifo)->front), _iter), \ -+ (_iter) = (_fifo)->front; \ -+ ((_iter != (_fifo)->back) && \ -+ (_entry = (_fifo)->data[(_iter) & (_fifo)->mask], true)); \ -+ (_iter)++) -+ -+#define fifo_for_each_entry_ptr(_ptr, _fifo, _iter) \ -+ for (typecheck(typeof((_fifo)->front), _iter), \ -+ (_iter) = (_fifo)->front; \ -+ ((_iter != (_fifo)->back) && \ -+ (_ptr = &(_fifo)->data[(_iter) & (_fifo)->mask], true)); \ -+ (_iter)++) -+ -+#endif /* _BCACHEFS_FIFO_H */ -diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c -new file mode 100644 -index 000000000000..4496cf91a4c1 ---- /dev/null -+++ b/fs/bcachefs/fs-common.c -@@ -0,0 +1,501 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "acl.h" -+#include "btree_update.h" -+#include "dirent.h" -+#include "fs-common.h" -+#include "inode.h" -+#include "subvolume.h" -+#include "xattr.h" -+ -+#include -+ -+static inline int is_subdir_for_nlink(struct bch_inode_unpacked *inode) -+{ -+ return S_ISDIR(inode->bi_mode) && !inode->bi_subvol; -+} -+ -+int bch2_create_trans(struct btree_trans *trans, -+ subvol_inum dir, -+ struct bch_inode_unpacked *dir_u, -+ struct bch_inode_unpacked *new_inode, -+ const struct qstr *name, -+ uid_t uid, gid_t gid, umode_t mode, dev_t rdev, -+ struct posix_acl *default_acl, -+ struct posix_acl *acl, -+ subvol_inum snapshot_src, -+ unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter dir_iter = { NULL }; -+ struct btree_iter inode_iter = { NULL }; -+ subvol_inum new_inum = dir; -+ u64 now = bch2_current_time(c); -+ u64 cpu = raw_smp_processor_id(); -+ u64 dir_target; -+ u32 snapshot; -+ unsigned dir_type = mode_to_type(mode); -+ int ret; -+ -+ ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot); -+ if (ret) -+ goto err; -+ -+ ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT); -+ if (ret) -+ goto err; -+ -+ if (!(flags & BCH_CREATE_SNAPSHOT)) { -+ /* Normal create path - allocate a new inode: */ -+ bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u); -+ -+ if (flags & BCH_CREATE_TMPFILE) -+ new_inode->bi_flags |= BCH_INODE_unlinked; -+ -+ ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu); -+ if (ret) -+ goto err; -+ -+ snapshot_src = (subvol_inum) { 0 }; -+ } else { -+ /* -+ * Creating a snapshot - we're not allocating a new inode, but -+ * we do have to lookup the root inode of the subvolume we're -+ * snapshotting and update it (in the new snapshot): -+ */ -+ -+ if (!snapshot_src.inum) { -+ /* Inode wasn't specified, just snapshot: */ -+ struct bch_subvolume s; -+ -+ ret = bch2_subvolume_get(trans, snapshot_src.subvol, true, -+ BTREE_ITER_CACHED, &s); -+ if (ret) -+ goto err; -+ -+ snapshot_src.inum = le64_to_cpu(s.inode); -+ } -+ -+ ret = bch2_inode_peek(trans, &inode_iter, new_inode, snapshot_src, -+ BTREE_ITER_INTENT); -+ if (ret) -+ goto err; -+ -+ if (new_inode->bi_subvol != snapshot_src.subvol) { -+ /* Not a subvolume root: */ -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ /* -+ * If we're not root, we have to own the subvolume being -+ * snapshotted: -+ */ -+ if (uid && new_inode->bi_uid != uid) { -+ ret = -EPERM; -+ goto err; -+ } -+ -+ flags |= BCH_CREATE_SUBVOL; -+ } -+ -+ new_inum.inum = new_inode->bi_inum; -+ dir_target = new_inode->bi_inum; -+ -+ if (flags & BCH_CREATE_SUBVOL) { -+ u32 new_subvol, dir_snapshot; -+ -+ ret = bch2_subvolume_create(trans, new_inode->bi_inum, -+ snapshot_src.subvol, -+ &new_subvol, &snapshot, -+ (flags & BCH_CREATE_SNAPSHOT_RO) != 0); -+ if (ret) -+ goto err; -+ -+ new_inode->bi_parent_subvol = dir.subvol; -+ new_inode->bi_subvol = new_subvol; -+ new_inum.subvol = new_subvol; -+ dir_target = new_subvol; -+ dir_type = DT_SUBVOL; -+ -+ ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &dir_snapshot); -+ if (ret) -+ goto err; -+ -+ bch2_btree_iter_set_snapshot(&dir_iter, dir_snapshot); -+ ret = bch2_btree_iter_traverse(&dir_iter); -+ if (ret) -+ goto err; -+ } -+ -+ if (!(flags & BCH_CREATE_SNAPSHOT)) { -+ if (default_acl) { -+ ret = bch2_set_acl_trans(trans, new_inum, new_inode, -+ default_acl, ACL_TYPE_DEFAULT); -+ if (ret) -+ goto err; -+ } -+ -+ if (acl) { -+ ret = bch2_set_acl_trans(trans, new_inum, new_inode, -+ acl, ACL_TYPE_ACCESS); -+ if (ret) -+ goto err; -+ } -+ } -+ -+ if (!(flags & BCH_CREATE_TMPFILE)) { -+ struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u); -+ u64 dir_offset; -+ -+ if (is_subdir_for_nlink(new_inode)) -+ dir_u->bi_nlink++; -+ dir_u->bi_mtime = dir_u->bi_ctime = now; -+ -+ ret = bch2_inode_write(trans, &dir_iter, dir_u); -+ if (ret) -+ goto err; -+ -+ ret = bch2_dirent_create(trans, dir, &dir_hash, -+ dir_type, -+ name, -+ dir_target, -+ &dir_offset, -+ BCH_HASH_SET_MUST_CREATE); -+ if (ret) -+ goto err; -+ -+ if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) { -+ new_inode->bi_dir = dir_u->bi_inum; -+ new_inode->bi_dir_offset = dir_offset; -+ } -+ } -+ -+ inode_iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS; -+ bch2_btree_iter_set_snapshot(&inode_iter, snapshot); -+ -+ ret = bch2_btree_iter_traverse(&inode_iter) ?: -+ bch2_inode_write(trans, &inode_iter, new_inode); -+err: -+ bch2_trans_iter_exit(trans, &inode_iter); -+ bch2_trans_iter_exit(trans, &dir_iter); -+ return ret; -+} -+ -+int bch2_link_trans(struct btree_trans *trans, -+ subvol_inum dir, struct bch_inode_unpacked *dir_u, -+ subvol_inum inum, struct bch_inode_unpacked *inode_u, -+ const struct qstr *name) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter dir_iter = { NULL }; -+ struct btree_iter inode_iter = { NULL }; -+ struct bch_hash_info dir_hash; -+ u64 now = bch2_current_time(c); -+ u64 dir_offset = 0; -+ int ret; -+ -+ if (dir.subvol != inum.subvol) -+ return -EXDEV; -+ -+ ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT); -+ if (ret) -+ goto err; -+ -+ inode_u->bi_ctime = now; -+ ret = bch2_inode_nlink_inc(inode_u); -+ if (ret) -+ return ret; -+ -+ ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT); -+ if (ret) -+ goto err; -+ -+ if (bch2_reinherit_attrs(inode_u, dir_u)) { -+ ret = -EXDEV; -+ goto err; -+ } -+ -+ dir_u->bi_mtime = dir_u->bi_ctime = now; -+ -+ dir_hash = bch2_hash_info_init(c, dir_u); -+ -+ ret = bch2_dirent_create(trans, dir, &dir_hash, -+ mode_to_type(inode_u->bi_mode), -+ name, inum.inum, &dir_offset, -+ BCH_HASH_SET_MUST_CREATE); -+ if (ret) -+ goto err; -+ -+ if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) { -+ inode_u->bi_dir = dir.inum; -+ inode_u->bi_dir_offset = dir_offset; -+ } -+ -+ ret = bch2_inode_write(trans, &dir_iter, dir_u) ?: -+ bch2_inode_write(trans, &inode_iter, inode_u); -+err: -+ bch2_trans_iter_exit(trans, &dir_iter); -+ bch2_trans_iter_exit(trans, &inode_iter); -+ return ret; -+} -+ -+int bch2_unlink_trans(struct btree_trans *trans, -+ subvol_inum dir, -+ struct bch_inode_unpacked *dir_u, -+ struct bch_inode_unpacked *inode_u, -+ const struct qstr *name, -+ bool deleting_snapshot) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter dir_iter = { NULL }; -+ struct btree_iter dirent_iter = { NULL }; -+ struct btree_iter inode_iter = { NULL }; -+ struct bch_hash_info dir_hash; -+ subvol_inum inum; -+ u64 now = bch2_current_time(c); -+ struct bkey_s_c k; -+ int ret; -+ -+ ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT); -+ if (ret) -+ goto err; -+ -+ dir_hash = bch2_hash_info_init(c, dir_u); -+ -+ ret = __bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash, -+ name, &inum, BTREE_ITER_INTENT); -+ if (ret) -+ goto err; -+ -+ ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, -+ BTREE_ITER_INTENT); -+ if (ret) -+ goto err; -+ -+ if (!deleting_snapshot && S_ISDIR(inode_u->bi_mode)) { -+ ret = bch2_empty_dir_trans(trans, inum); -+ if (ret) -+ goto err; -+ } -+ -+ if (deleting_snapshot && !inode_u->bi_subvol) { -+ ret = -BCH_ERR_ENOENT_not_subvol; -+ goto err; -+ } -+ -+ if (deleting_snapshot || inode_u->bi_subvol) { -+ ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol); -+ if (ret) -+ goto err; -+ -+ k = bch2_btree_iter_peek_slot(&dirent_iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ /* -+ * If we're deleting a subvolume, we need to really delete the -+ * dirent, not just emit a whiteout in the current snapshot: -+ */ -+ bch2_btree_iter_set_snapshot(&dirent_iter, k.k->p.snapshot); -+ ret = bch2_btree_iter_traverse(&dirent_iter); -+ if (ret) -+ goto err; -+ } else { -+ bch2_inode_nlink_dec(trans, inode_u); -+ } -+ -+ if (inode_u->bi_dir == dirent_iter.pos.inode && -+ inode_u->bi_dir_offset == dirent_iter.pos.offset) { -+ inode_u->bi_dir = 0; -+ inode_u->bi_dir_offset = 0; -+ } -+ -+ dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now; -+ dir_u->bi_nlink -= is_subdir_for_nlink(inode_u); -+ -+ ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, -+ &dir_hash, &dirent_iter, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: -+ bch2_inode_write(trans, &dir_iter, dir_u) ?: -+ bch2_inode_write(trans, &inode_iter, inode_u); -+err: -+ bch2_trans_iter_exit(trans, &inode_iter); -+ bch2_trans_iter_exit(trans, &dirent_iter); -+ bch2_trans_iter_exit(trans, &dir_iter); -+ return ret; -+} -+ -+bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u, -+ struct bch_inode_unpacked *src_u) -+{ -+ u64 src, dst; -+ unsigned id; -+ bool ret = false; -+ -+ for (id = 0; id < Inode_opt_nr; id++) { -+ /* Skip attributes that were explicitly set on this inode */ -+ if (dst_u->bi_fields_set & (1 << id)) -+ continue; -+ -+ src = bch2_inode_opt_get(src_u, id); -+ dst = bch2_inode_opt_get(dst_u, id); -+ -+ if (src == dst) -+ continue; -+ -+ bch2_inode_opt_set(dst_u, id, src); -+ ret = true; -+ } -+ -+ return ret; -+} -+ -+int bch2_rename_trans(struct btree_trans *trans, -+ subvol_inum src_dir, struct bch_inode_unpacked *src_dir_u, -+ subvol_inum dst_dir, struct bch_inode_unpacked *dst_dir_u, -+ struct bch_inode_unpacked *src_inode_u, -+ struct bch_inode_unpacked *dst_inode_u, -+ const struct qstr *src_name, -+ const struct qstr *dst_name, -+ enum bch_rename_mode mode) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter src_dir_iter = { NULL }; -+ struct btree_iter dst_dir_iter = { NULL }; -+ struct btree_iter src_inode_iter = { NULL }; -+ struct btree_iter dst_inode_iter = { NULL }; -+ struct bch_hash_info src_hash, dst_hash; -+ subvol_inum src_inum, dst_inum; -+ u64 src_offset, dst_offset; -+ u64 now = bch2_current_time(c); -+ int ret; -+ -+ ret = bch2_inode_peek(trans, &src_dir_iter, src_dir_u, src_dir, -+ BTREE_ITER_INTENT); -+ if (ret) -+ goto err; -+ -+ src_hash = bch2_hash_info_init(c, src_dir_u); -+ -+ if (dst_dir.inum != src_dir.inum || -+ dst_dir.subvol != src_dir.subvol) { -+ ret = bch2_inode_peek(trans, &dst_dir_iter, dst_dir_u, dst_dir, -+ BTREE_ITER_INTENT); -+ if (ret) -+ goto err; -+ -+ dst_hash = bch2_hash_info_init(c, dst_dir_u); -+ } else { -+ dst_dir_u = src_dir_u; -+ dst_hash = src_hash; -+ } -+ -+ ret = bch2_dirent_rename(trans, -+ src_dir, &src_hash, -+ dst_dir, &dst_hash, -+ src_name, &src_inum, &src_offset, -+ dst_name, &dst_inum, &dst_offset, -+ mode); -+ if (ret) -+ goto err; -+ -+ ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inum, -+ BTREE_ITER_INTENT); -+ if (ret) -+ goto err; -+ -+ if (dst_inum.inum) { -+ ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inum, -+ BTREE_ITER_INTENT); -+ if (ret) -+ goto err; -+ } -+ -+ if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) { -+ src_inode_u->bi_dir = dst_dir_u->bi_inum; -+ src_inode_u->bi_dir_offset = dst_offset; -+ -+ if (mode == BCH_RENAME_EXCHANGE) { -+ dst_inode_u->bi_dir = src_dir_u->bi_inum; -+ dst_inode_u->bi_dir_offset = src_offset; -+ } -+ -+ if (mode == BCH_RENAME_OVERWRITE && -+ dst_inode_u->bi_dir == dst_dir_u->bi_inum && -+ dst_inode_u->bi_dir_offset == src_offset) { -+ dst_inode_u->bi_dir = 0; -+ dst_inode_u->bi_dir_offset = 0; -+ } -+ } -+ -+ if (mode == BCH_RENAME_OVERWRITE) { -+ if (S_ISDIR(src_inode_u->bi_mode) != -+ S_ISDIR(dst_inode_u->bi_mode)) { -+ ret = -ENOTDIR; -+ goto err; -+ } -+ -+ if (S_ISDIR(dst_inode_u->bi_mode) && -+ bch2_empty_dir_trans(trans, dst_inum)) { -+ ret = -ENOTEMPTY; -+ goto err; -+ } -+ } -+ -+ if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) && -+ S_ISDIR(src_inode_u->bi_mode)) { -+ ret = -EXDEV; -+ goto err; -+ } -+ -+ if (mode == BCH_RENAME_EXCHANGE && -+ bch2_reinherit_attrs(dst_inode_u, src_dir_u) && -+ S_ISDIR(dst_inode_u->bi_mode)) { -+ ret = -EXDEV; -+ goto err; -+ } -+ -+ if (is_subdir_for_nlink(src_inode_u)) { -+ src_dir_u->bi_nlink--; -+ dst_dir_u->bi_nlink++; -+ } -+ -+ if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) { -+ dst_dir_u->bi_nlink--; -+ src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE; -+ } -+ -+ if (mode == BCH_RENAME_OVERWRITE) -+ bch2_inode_nlink_dec(trans, dst_inode_u); -+ -+ src_dir_u->bi_mtime = now; -+ src_dir_u->bi_ctime = now; -+ -+ if (src_dir.inum != dst_dir.inum) { -+ dst_dir_u->bi_mtime = now; -+ dst_dir_u->bi_ctime = now; -+ } -+ -+ src_inode_u->bi_ctime = now; -+ -+ if (dst_inum.inum) -+ dst_inode_u->bi_ctime = now; -+ -+ ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?: -+ (src_dir.inum != dst_dir.inum -+ ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u) -+ : 0) ?: -+ bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?: -+ (dst_inum.inum -+ ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u) -+ : 0); -+err: -+ bch2_trans_iter_exit(trans, &dst_inode_iter); -+ bch2_trans_iter_exit(trans, &src_inode_iter); -+ bch2_trans_iter_exit(trans, &dst_dir_iter); -+ bch2_trans_iter_exit(trans, &src_dir_iter); -+ return ret; -+} -diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h -new file mode 100644 -index 000000000000..dde237859514 ---- /dev/null -+++ b/fs/bcachefs/fs-common.h -@@ -0,0 +1,43 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_FS_COMMON_H -+#define _BCACHEFS_FS_COMMON_H -+ -+struct posix_acl; -+ -+#define BCH_CREATE_TMPFILE (1U << 0) -+#define BCH_CREATE_SUBVOL (1U << 1) -+#define BCH_CREATE_SNAPSHOT (1U << 2) -+#define BCH_CREATE_SNAPSHOT_RO (1U << 3) -+ -+int bch2_create_trans(struct btree_trans *, subvol_inum, -+ struct bch_inode_unpacked *, -+ struct bch_inode_unpacked *, -+ const struct qstr *, -+ uid_t, gid_t, umode_t, dev_t, -+ struct posix_acl *, -+ struct posix_acl *, -+ subvol_inum, unsigned); -+ -+int bch2_link_trans(struct btree_trans *, -+ subvol_inum, struct bch_inode_unpacked *, -+ subvol_inum, struct bch_inode_unpacked *, -+ const struct qstr *); -+ -+int bch2_unlink_trans(struct btree_trans *, subvol_inum, -+ struct bch_inode_unpacked *, -+ struct bch_inode_unpacked *, -+ const struct qstr *, bool); -+ -+int bch2_rename_trans(struct btree_trans *, -+ subvol_inum, struct bch_inode_unpacked *, -+ subvol_inum, struct bch_inode_unpacked *, -+ struct bch_inode_unpacked *, -+ struct bch_inode_unpacked *, -+ const struct qstr *, -+ const struct qstr *, -+ enum bch_rename_mode); -+ -+bool bch2_reinherit_attrs(struct bch_inode_unpacked *, -+ struct bch_inode_unpacked *); -+ -+#endif /* _BCACHEFS_FS_COMMON_H */ -diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c -new file mode 100644 -index 000000000000..52f0e7acda3d ---- /dev/null -+++ b/fs/bcachefs/fs-io-buffered.c -@@ -0,0 +1,1106 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#ifndef NO_BCACHEFS_FS -+ -+#include "bcachefs.h" -+#include "alloc_foreground.h" -+#include "bkey_buf.h" -+#include "fs-io.h" -+#include "fs-io-buffered.h" -+#include "fs-io-direct.h" -+#include "fs-io-pagecache.h" -+#include "io_read.h" -+#include "io_write.h" -+ -+#include -+#include -+#include -+ -+static inline bool bio_full(struct bio *bio, unsigned len) -+{ -+ if (bio->bi_vcnt >= bio->bi_max_vecs) -+ return true; -+ if (bio->bi_iter.bi_size > UINT_MAX - len) -+ return true; -+ return false; -+} -+ -+/* readpage(s): */ -+ -+static void bch2_readpages_end_io(struct bio *bio) -+{ -+ struct folio_iter fi; -+ -+ bio_for_each_folio_all(fi, bio) { -+ if (!bio->bi_status) { -+ folio_mark_uptodate(fi.folio); -+ } else { -+ folio_clear_uptodate(fi.folio); -+ folio_set_error(fi.folio); -+ } -+ folio_unlock(fi.folio); -+ } -+ -+ bio_put(bio); -+} -+ -+struct readpages_iter { -+ struct address_space *mapping; -+ unsigned idx; -+ folios folios; -+}; -+ -+static int readpages_iter_init(struct readpages_iter *iter, -+ struct readahead_control *ractl) -+{ -+ struct folio **fi; -+ int ret; -+ -+ memset(iter, 0, sizeof(*iter)); -+ -+ iter->mapping = ractl->mapping; -+ -+ ret = bch2_filemap_get_contig_folios_d(iter->mapping, -+ ractl->_index << PAGE_SHIFT, -+ (ractl->_index + ractl->_nr_pages) << PAGE_SHIFT, -+ 0, mapping_gfp_mask(iter->mapping), -+ &iter->folios); -+ if (ret) -+ return ret; -+ -+ darray_for_each(iter->folios, fi) { -+ ractl->_nr_pages -= 1U << folio_order(*fi); -+ __bch2_folio_create(*fi, __GFP_NOFAIL|GFP_KERNEL); -+ folio_put(*fi); -+ folio_put(*fi); -+ } -+ -+ return 0; -+} -+ -+static inline struct folio *readpage_iter_peek(struct readpages_iter *iter) -+{ -+ if (iter->idx >= iter->folios.nr) -+ return NULL; -+ return iter->folios.data[iter->idx]; -+} -+ -+static inline void readpage_iter_advance(struct readpages_iter *iter) -+{ -+ iter->idx++; -+} -+ -+static bool extent_partial_reads_expensive(struct bkey_s_c k) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ struct bch_extent_crc_unpacked crc; -+ const union bch_extent_entry *i; -+ -+ bkey_for_each_crc(k.k, ptrs, crc, i) -+ if (crc.csum_type || crc.compression_type) -+ return true; -+ return false; -+} -+ -+static int readpage_bio_extend(struct btree_trans *trans, -+ struct readpages_iter *iter, -+ struct bio *bio, -+ unsigned sectors_this_extent, -+ bool get_more) -+{ -+ /* Don't hold btree locks while allocating memory: */ -+ bch2_trans_unlock(trans); -+ -+ while (bio_sectors(bio) < sectors_this_extent && -+ bio->bi_vcnt < bio->bi_max_vecs) { -+ struct folio *folio = readpage_iter_peek(iter); -+ int ret; -+ -+ if (folio) { -+ readpage_iter_advance(iter); -+ } else { -+ pgoff_t folio_offset = bio_end_sector(bio) >> PAGE_SECTORS_SHIFT; -+ -+ if (!get_more) -+ break; -+ -+ folio = xa_load(&iter->mapping->i_pages, folio_offset); -+ if (folio && !xa_is_value(folio)) -+ break; -+ -+ folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), 0); -+ if (!folio) -+ break; -+ -+ if (!__bch2_folio_create(folio, GFP_KERNEL)) { -+ folio_put(folio); -+ break; -+ } -+ -+ ret = filemap_add_folio(iter->mapping, folio, folio_offset, GFP_KERNEL); -+ if (ret) { -+ __bch2_folio_release(folio); -+ folio_put(folio); -+ break; -+ } -+ -+ folio_put(folio); -+ } -+ -+ BUG_ON(folio_sector(folio) != bio_end_sector(bio)); -+ -+ BUG_ON(!bio_add_folio(bio, folio, folio_size(folio), 0)); -+ } -+ -+ return bch2_trans_relock(trans); -+} -+ -+static void bchfs_read(struct btree_trans *trans, -+ struct bch_read_bio *rbio, -+ subvol_inum inum, -+ struct readpages_iter *readpages_iter) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_buf sk; -+ int flags = BCH_READ_RETRY_IF_STALE| -+ BCH_READ_MAY_PROMOTE; -+ u32 snapshot; -+ int ret = 0; -+ -+ rbio->c = c; -+ rbio->start_time = local_clock(); -+ rbio->subvol = inum.subvol; -+ -+ bch2_bkey_buf_init(&sk); -+retry: -+ bch2_trans_begin(trans); -+ iter = (struct btree_iter) { NULL }; -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ goto err; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -+ SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot), -+ BTREE_ITER_SLOTS); -+ while (1) { -+ struct bkey_s_c k; -+ unsigned bytes, sectors, offset_into_extent; -+ enum btree_id data_btree = BTREE_ID_extents; -+ -+ /* -+ * read_extent -> io_time_reset may cause a transaction restart -+ * without returning an error, we need to check for that here: -+ */ -+ ret = bch2_trans_relock(trans); -+ if (ret) -+ break; -+ -+ bch2_btree_iter_set_pos(&iter, -+ POS(inum.inum, rbio->bio.bi_iter.bi_sector)); -+ -+ k = bch2_btree_iter_peek_slot(&iter); -+ ret = bkey_err(k); -+ if (ret) -+ break; -+ -+ offset_into_extent = iter.pos.offset - -+ bkey_start_offset(k.k); -+ sectors = k.k->size - offset_into_extent; -+ -+ bch2_bkey_buf_reassemble(&sk, c, k); -+ -+ ret = bch2_read_indirect_extent(trans, &data_btree, -+ &offset_into_extent, &sk); -+ if (ret) -+ break; -+ -+ k = bkey_i_to_s_c(sk.k); -+ -+ sectors = min(sectors, k.k->size - offset_into_extent); -+ -+ if (readpages_iter) { -+ ret = readpage_bio_extend(trans, readpages_iter, &rbio->bio, sectors, -+ extent_partial_reads_expensive(k)); -+ if (ret) -+ break; -+ } -+ -+ bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; -+ swap(rbio->bio.bi_iter.bi_size, bytes); -+ -+ if (rbio->bio.bi_iter.bi_size == bytes) -+ flags |= BCH_READ_LAST_FRAGMENT; -+ -+ bch2_bio_page_state_set(&rbio->bio, k); -+ -+ bch2_read_extent(trans, rbio, iter.pos, -+ data_btree, k, offset_into_extent, flags); -+ -+ if (flags & BCH_READ_LAST_FRAGMENT) -+ break; -+ -+ swap(rbio->bio.bi_iter.bi_size, bytes); -+ bio_advance(&rbio->bio, bytes); -+ -+ ret = btree_trans_too_many_iters(trans); -+ if (ret) -+ break; -+ } -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ if (ret) { -+ bch_err_inum_offset_ratelimited(c, -+ iter.pos.inode, -+ iter.pos.offset << 9, -+ "read error %i from btree lookup", ret); -+ rbio->bio.bi_status = BLK_STS_IOERR; -+ bio_endio(&rbio->bio); -+ } -+ -+ bch2_bkey_buf_exit(&sk, c); -+} -+ -+void bch2_readahead(struct readahead_control *ractl) -+{ -+ struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct bch_io_opts opts; -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct folio *folio; -+ struct readpages_iter readpages_iter; -+ int ret; -+ -+ bch2_inode_opts_get(&opts, c, &inode->ei_inode); -+ -+ ret = readpages_iter_init(&readpages_iter, ractl); -+ BUG_ON(ret); -+ -+ bch2_pagecache_add_get(inode); -+ -+ while ((folio = readpage_iter_peek(&readpages_iter))) { -+ unsigned n = min_t(unsigned, -+ readpages_iter.folios.nr - -+ readpages_iter.idx, -+ BIO_MAX_VECS); -+ struct bch_read_bio *rbio = -+ rbio_init(bio_alloc_bioset(NULL, n, REQ_OP_READ, -+ GFP_KERNEL, &c->bio_read), -+ opts); -+ -+ readpage_iter_advance(&readpages_iter); -+ -+ rbio->bio.bi_iter.bi_sector = folio_sector(folio); -+ rbio->bio.bi_end_io = bch2_readpages_end_io; -+ BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); -+ -+ bchfs_read(trans, rbio, inode_inum(inode), -+ &readpages_iter); -+ bch2_trans_unlock(trans); -+ } -+ -+ bch2_pagecache_add_put(inode); -+ -+ bch2_trans_put(trans); -+ darray_exit(&readpages_iter.folios); -+} -+ -+static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio, -+ subvol_inum inum, struct folio *folio) -+{ -+ bch2_folio_create(folio, __GFP_NOFAIL); -+ -+ rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; -+ rbio->bio.bi_iter.bi_sector = folio_sector(folio); -+ BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); -+ -+ bch2_trans_run(c, (bchfs_read(trans, rbio, inum, NULL), 0)); -+} -+ -+static void bch2_read_single_folio_end_io(struct bio *bio) -+{ -+ complete(bio->bi_private); -+} -+ -+int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) -+{ -+ struct bch_inode_info *inode = to_bch_ei(mapping->host); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct bch_read_bio *rbio; -+ struct bch_io_opts opts; -+ int ret; -+ DECLARE_COMPLETION_ONSTACK(done); -+ -+ bch2_inode_opts_get(&opts, c, &inode->ei_inode); -+ -+ rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read), -+ opts); -+ rbio->bio.bi_private = &done; -+ rbio->bio.bi_end_io = bch2_read_single_folio_end_io; -+ -+ __bchfs_readfolio(c, rbio, inode_inum(inode), folio); -+ wait_for_completion(&done); -+ -+ ret = blk_status_to_errno(rbio->bio.bi_status); -+ bio_put(&rbio->bio); -+ -+ if (ret < 0) -+ return ret; -+ -+ folio_mark_uptodate(folio); -+ return 0; -+} -+ -+int bch2_read_folio(struct file *file, struct folio *folio) -+{ -+ int ret; -+ -+ ret = bch2_read_single_folio(folio, folio->mapping); -+ folio_unlock(folio); -+ return bch2_err_class(ret); -+} -+ -+/* writepages: */ -+ -+struct bch_writepage_io { -+ struct bch_inode_info *inode; -+ -+ /* must be last: */ -+ struct bch_write_op op; -+}; -+ -+struct bch_writepage_state { -+ struct bch_writepage_io *io; -+ struct bch_io_opts opts; -+ struct bch_folio_sector *tmp; -+ unsigned tmp_sectors; -+}; -+ -+static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs *c, -+ struct bch_inode_info *inode) -+{ -+ struct bch_writepage_state ret = { 0 }; -+ -+ bch2_inode_opts_get(&ret.opts, c, &inode->ei_inode); -+ return ret; -+} -+ -+/* -+ * Determine when a writepage io is full. We have to limit writepage bios to a -+ * single page per bvec (i.e. 1MB with 4k pages) because that is the limit to -+ * what the bounce path in bch2_write_extent() can handle. In theory we could -+ * loosen this restriction for non-bounce I/O, but we don't have that context -+ * here. Ideally, we can up this limit and make it configurable in the future -+ * when the bounce path can be enhanced to accommodate larger source bios. -+ */ -+static inline bool bch_io_full(struct bch_writepage_io *io, unsigned len) -+{ -+ struct bio *bio = &io->op.wbio.bio; -+ return bio_full(bio, len) || -+ (bio->bi_iter.bi_size + len > BIO_MAX_VECS * PAGE_SIZE); -+} -+ -+static void bch2_writepage_io_done(struct bch_write_op *op) -+{ -+ struct bch_writepage_io *io = -+ container_of(op, struct bch_writepage_io, op); -+ struct bch_fs *c = io->op.c; -+ struct bio *bio = &io->op.wbio.bio; -+ struct folio_iter fi; -+ unsigned i; -+ -+ if (io->op.error) { -+ set_bit(EI_INODE_ERROR, &io->inode->ei_flags); -+ -+ bio_for_each_folio_all(fi, bio) { -+ struct bch_folio *s; -+ -+ folio_set_error(fi.folio); -+ mapping_set_error(fi.folio->mapping, -EIO); -+ -+ s = __bch2_folio(fi.folio); -+ spin_lock(&s->lock); -+ for (i = 0; i < folio_sectors(fi.folio); i++) -+ s->s[i].nr_replicas = 0; -+ spin_unlock(&s->lock); -+ } -+ } -+ -+ if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) { -+ bio_for_each_folio_all(fi, bio) { -+ struct bch_folio *s; -+ -+ s = __bch2_folio(fi.folio); -+ spin_lock(&s->lock); -+ for (i = 0; i < folio_sectors(fi.folio); i++) -+ s->s[i].nr_replicas = 0; -+ spin_unlock(&s->lock); -+ } -+ } -+ -+ /* -+ * racing with fallocate can cause us to add fewer sectors than -+ * expected - but we shouldn't add more sectors than expected: -+ */ -+ WARN_ON_ONCE(io->op.i_sectors_delta > 0); -+ -+ /* -+ * (error (due to going RO) halfway through a page can screw that up -+ * slightly) -+ * XXX wtf? -+ BUG_ON(io->op.op.i_sectors_delta >= PAGE_SECTORS); -+ */ -+ -+ /* -+ * PageWriteback is effectively our ref on the inode - fixup i_blocks -+ * before calling end_page_writeback: -+ */ -+ bch2_i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta); -+ -+ bio_for_each_folio_all(fi, bio) { -+ struct bch_folio *s = __bch2_folio(fi.folio); -+ -+ if (atomic_dec_and_test(&s->write_count)) -+ folio_end_writeback(fi.folio); -+ } -+ -+ bio_put(&io->op.wbio.bio); -+} -+ -+static void bch2_writepage_do_io(struct bch_writepage_state *w) -+{ -+ struct bch_writepage_io *io = w->io; -+ -+ w->io = NULL; -+ closure_call(&io->op.cl, bch2_write, NULL, NULL); -+} -+ -+/* -+ * Get a bch_writepage_io and add @page to it - appending to an existing one if -+ * possible, else allocating a new one: -+ */ -+static void bch2_writepage_io_alloc(struct bch_fs *c, -+ struct writeback_control *wbc, -+ struct bch_writepage_state *w, -+ struct bch_inode_info *inode, -+ u64 sector, -+ unsigned nr_replicas) -+{ -+ struct bch_write_op *op; -+ -+ w->io = container_of(bio_alloc_bioset(NULL, BIO_MAX_VECS, -+ REQ_OP_WRITE, -+ GFP_KERNEL, -+ &c->writepage_bioset), -+ struct bch_writepage_io, op.wbio.bio); -+ -+ w->io->inode = inode; -+ op = &w->io->op; -+ bch2_write_op_init(op, c, w->opts); -+ op->target = w->opts.foreground_target; -+ op->nr_replicas = nr_replicas; -+ op->res.nr_replicas = nr_replicas; -+ op->write_point = writepoint_hashed(inode->ei_last_dirtied); -+ op->subvol = inode->ei_subvol; -+ op->pos = POS(inode->v.i_ino, sector); -+ op->end_io = bch2_writepage_io_done; -+ op->devs_need_flush = &inode->ei_devs_need_flush; -+ op->wbio.bio.bi_iter.bi_sector = sector; -+ op->wbio.bio.bi_opf = wbc_to_write_flags(wbc); -+} -+ -+static int __bch2_writepage(struct folio *folio, -+ struct writeback_control *wbc, -+ void *data) -+{ -+ struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct bch_writepage_state *w = data; -+ struct bch_folio *s; -+ unsigned i, offset, f_sectors, nr_replicas_this_write = U32_MAX; -+ loff_t i_size = i_size_read(&inode->v); -+ int ret; -+ -+ EBUG_ON(!folio_test_uptodate(folio)); -+ -+ /* Is the folio fully inside i_size? */ -+ if (folio_end_pos(folio) <= i_size) -+ goto do_io; -+ -+ /* Is the folio fully outside i_size? (truncate in progress) */ -+ if (folio_pos(folio) >= i_size) { -+ folio_unlock(folio); -+ return 0; -+ } -+ -+ /* -+ * The folio straddles i_size. It must be zeroed out on each and every -+ * writepage invocation because it may be mmapped. "A file is mapped -+ * in multiples of the folio size. For a file that is not a multiple of -+ * the folio size, the remaining memory is zeroed when mapped, and -+ * writes to that region are not written out to the file." -+ */ -+ folio_zero_segment(folio, -+ i_size - folio_pos(folio), -+ folio_size(folio)); -+do_io: -+ f_sectors = folio_sectors(folio); -+ s = bch2_folio(folio); -+ -+ if (f_sectors > w->tmp_sectors) { -+ kfree(w->tmp); -+ w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), __GFP_NOFAIL); -+ w->tmp_sectors = f_sectors; -+ } -+ -+ /* -+ * Things get really hairy with errors during writeback: -+ */ -+ ret = bch2_get_folio_disk_reservation(c, inode, folio, false); -+ BUG_ON(ret); -+ -+ /* Before unlocking the page, get copy of reservations: */ -+ spin_lock(&s->lock); -+ memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors); -+ -+ for (i = 0; i < f_sectors; i++) { -+ if (s->s[i].state < SECTOR_dirty) -+ continue; -+ -+ nr_replicas_this_write = -+ min_t(unsigned, nr_replicas_this_write, -+ s->s[i].nr_replicas + -+ s->s[i].replicas_reserved); -+ } -+ -+ for (i = 0; i < f_sectors; i++) { -+ if (s->s[i].state < SECTOR_dirty) -+ continue; -+ -+ s->s[i].nr_replicas = w->opts.compression -+ ? 0 : nr_replicas_this_write; -+ -+ s->s[i].replicas_reserved = 0; -+ bch2_folio_sector_set(folio, s, i, SECTOR_allocated); -+ } -+ spin_unlock(&s->lock); -+ -+ BUG_ON(atomic_read(&s->write_count)); -+ atomic_set(&s->write_count, 1); -+ -+ BUG_ON(folio_test_writeback(folio)); -+ folio_start_writeback(folio); -+ -+ folio_unlock(folio); -+ -+ offset = 0; -+ while (1) { -+ unsigned sectors = 0, dirty_sectors = 0, reserved_sectors = 0; -+ u64 sector; -+ -+ while (offset < f_sectors && -+ w->tmp[offset].state < SECTOR_dirty) -+ offset++; -+ -+ if (offset == f_sectors) -+ break; -+ -+ while (offset + sectors < f_sectors && -+ w->tmp[offset + sectors].state >= SECTOR_dirty) { -+ reserved_sectors += w->tmp[offset + sectors].replicas_reserved; -+ dirty_sectors += w->tmp[offset + sectors].state == SECTOR_dirty; -+ sectors++; -+ } -+ BUG_ON(!sectors); -+ -+ sector = folio_sector(folio) + offset; -+ -+ if (w->io && -+ (w->io->op.res.nr_replicas != nr_replicas_this_write || -+ bch_io_full(w->io, sectors << 9) || -+ bio_end_sector(&w->io->op.wbio.bio) != sector)) -+ bch2_writepage_do_io(w); -+ -+ if (!w->io) -+ bch2_writepage_io_alloc(c, wbc, w, inode, sector, -+ nr_replicas_this_write); -+ -+ atomic_inc(&s->write_count); -+ -+ BUG_ON(inode != w->io->inode); -+ BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio, -+ sectors << 9, offset << 9)); -+ -+ /* Check for writing past i_size: */ -+ WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) > -+ round_up(i_size, block_bytes(c)) && -+ !test_bit(BCH_FS_EMERGENCY_RO, &c->flags), -+ "writing past i_size: %llu > %llu (unrounded %llu)\n", -+ bio_end_sector(&w->io->op.wbio.bio) << 9, -+ round_up(i_size, block_bytes(c)), -+ i_size); -+ -+ w->io->op.res.sectors += reserved_sectors; -+ w->io->op.i_sectors_delta -= dirty_sectors; -+ w->io->op.new_i_size = i_size; -+ -+ offset += sectors; -+ } -+ -+ if (atomic_dec_and_test(&s->write_count)) -+ folio_end_writeback(folio); -+ -+ return 0; -+} -+ -+int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc) -+{ -+ struct bch_fs *c = mapping->host->i_sb->s_fs_info; -+ struct bch_writepage_state w = -+ bch_writepage_state_init(c, to_bch_ei(mapping->host)); -+ struct blk_plug plug; -+ int ret; -+ -+ blk_start_plug(&plug); -+ ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w); -+ if (w.io) -+ bch2_writepage_do_io(&w); -+ blk_finish_plug(&plug); -+ kfree(w.tmp); -+ return bch2_err_class(ret); -+} -+ -+/* buffered writes: */ -+ -+int bch2_write_begin(struct file *file, struct address_space *mapping, -+ loff_t pos, unsigned len, -+ struct page **pagep, void **fsdata) -+{ -+ struct bch_inode_info *inode = to_bch_ei(mapping->host); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct bch2_folio_reservation *res; -+ struct folio *folio; -+ unsigned offset; -+ int ret = -ENOMEM; -+ -+ res = kmalloc(sizeof(*res), GFP_KERNEL); -+ if (!res) -+ return -ENOMEM; -+ -+ bch2_folio_reservation_init(c, inode, res); -+ *fsdata = res; -+ -+ bch2_pagecache_add_get(inode); -+ -+ folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, -+ FGP_LOCK|FGP_WRITE|FGP_CREAT|FGP_STABLE, -+ mapping_gfp_mask(mapping)); -+ if (IS_ERR_OR_NULL(folio)) -+ goto err_unlock; -+ -+ offset = pos - folio_pos(folio); -+ len = min_t(size_t, len, folio_end_pos(folio) - pos); -+ -+ if (folio_test_uptodate(folio)) -+ goto out; -+ -+ /* If we're writing entire folio, don't need to read it in first: */ -+ if (!offset && len == folio_size(folio)) -+ goto out; -+ -+ if (!offset && pos + len >= inode->v.i_size) { -+ folio_zero_segment(folio, len, folio_size(folio)); -+ flush_dcache_folio(folio); -+ goto out; -+ } -+ -+ if (folio_pos(folio) >= inode->v.i_size) { -+ folio_zero_segments(folio, 0, offset, offset + len, folio_size(folio)); -+ flush_dcache_folio(folio); -+ goto out; -+ } -+readpage: -+ ret = bch2_read_single_folio(folio, mapping); -+ if (ret) -+ goto err; -+out: -+ ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); -+ if (ret) -+ goto err; -+ -+ ret = bch2_folio_reservation_get(c, inode, folio, res, offset, len); -+ if (ret) { -+ if (!folio_test_uptodate(folio)) { -+ /* -+ * If the folio hasn't been read in, we won't know if we -+ * actually need a reservation - we don't actually need -+ * to read here, we just need to check if the folio is -+ * fully backed by uncompressed data: -+ */ -+ goto readpage; -+ } -+ -+ goto err; -+ } -+ -+ *pagep = &folio->page; -+ return 0; -+err: -+ folio_unlock(folio); -+ folio_put(folio); -+ *pagep = NULL; -+err_unlock: -+ bch2_pagecache_add_put(inode); -+ kfree(res); -+ *fsdata = NULL; -+ return bch2_err_class(ret); -+} -+ -+int bch2_write_end(struct file *file, struct address_space *mapping, -+ loff_t pos, unsigned len, unsigned copied, -+ struct page *page, void *fsdata) -+{ -+ struct bch_inode_info *inode = to_bch_ei(mapping->host); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct bch2_folio_reservation *res = fsdata; -+ struct folio *folio = page_folio(page); -+ unsigned offset = pos - folio_pos(folio); -+ -+ lockdep_assert_held(&inode->v.i_rwsem); -+ BUG_ON(offset + copied > folio_size(folio)); -+ -+ if (unlikely(copied < len && !folio_test_uptodate(folio))) { -+ /* -+ * The folio needs to be read in, but that would destroy -+ * our partial write - simplest thing is to just force -+ * userspace to redo the write: -+ */ -+ folio_zero_range(folio, 0, folio_size(folio)); -+ flush_dcache_folio(folio); -+ copied = 0; -+ } -+ -+ spin_lock(&inode->v.i_lock); -+ if (pos + copied > inode->v.i_size) -+ i_size_write(&inode->v, pos + copied); -+ spin_unlock(&inode->v.i_lock); -+ -+ if (copied) { -+ if (!folio_test_uptodate(folio)) -+ folio_mark_uptodate(folio); -+ -+ bch2_set_folio_dirty(c, inode, folio, res, offset, copied); -+ -+ inode->ei_last_dirtied = (unsigned long) current; -+ } -+ -+ folio_unlock(folio); -+ folio_put(folio); -+ bch2_pagecache_add_put(inode); -+ -+ bch2_folio_reservation_put(c, inode, res); -+ kfree(res); -+ -+ return copied; -+} -+ -+static noinline void folios_trunc(folios *fs, struct folio **fi) -+{ -+ while (fs->data + fs->nr > fi) { -+ struct folio *f = darray_pop(fs); -+ -+ folio_unlock(f); -+ folio_put(f); -+ } -+} -+ -+static int __bch2_buffered_write(struct bch_inode_info *inode, -+ struct address_space *mapping, -+ struct iov_iter *iter, -+ loff_t pos, unsigned len) -+{ -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct bch2_folio_reservation res; -+ folios fs; -+ struct folio **fi, *f; -+ unsigned copied = 0, f_offset, f_copied; -+ u64 end = pos + len, f_pos, f_len; -+ loff_t last_folio_pos = inode->v.i_size; -+ int ret = 0; -+ -+ BUG_ON(!len); -+ -+ bch2_folio_reservation_init(c, inode, &res); -+ darray_init(&fs); -+ -+ ret = bch2_filemap_get_contig_folios_d(mapping, pos, end, -+ FGP_LOCK|FGP_WRITE|FGP_STABLE|FGP_CREAT, -+ mapping_gfp_mask(mapping), -+ &fs); -+ if (ret) -+ goto out; -+ -+ BUG_ON(!fs.nr); -+ -+ f = darray_first(fs); -+ if (pos != folio_pos(f) && !folio_test_uptodate(f)) { -+ ret = bch2_read_single_folio(f, mapping); -+ if (ret) -+ goto out; -+ } -+ -+ f = darray_last(fs); -+ end = min(end, folio_end_pos(f)); -+ last_folio_pos = folio_pos(f); -+ if (end != folio_end_pos(f) && !folio_test_uptodate(f)) { -+ if (end >= inode->v.i_size) { -+ folio_zero_range(f, 0, folio_size(f)); -+ } else { -+ ret = bch2_read_single_folio(f, mapping); -+ if (ret) -+ goto out; -+ } -+ } -+ -+ ret = bch2_folio_set(c, inode_inum(inode), fs.data, fs.nr); -+ if (ret) -+ goto out; -+ -+ f_pos = pos; -+ f_offset = pos - folio_pos(darray_first(fs)); -+ darray_for_each(fs, fi) { -+ f = *fi; -+ f_len = min(end, folio_end_pos(f)) - f_pos; -+ -+ /* -+ * XXX: per POSIX and fstests generic/275, on -ENOSPC we're -+ * supposed to write as much as we have disk space for. -+ * -+ * On failure here we should still write out a partial page if -+ * we aren't completely out of disk space - we don't do that -+ * yet: -+ */ -+ ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len); -+ if (unlikely(ret)) { -+ folios_trunc(&fs, fi); -+ if (!fs.nr) -+ goto out; -+ -+ end = min(end, folio_end_pos(darray_last(fs))); -+ break; -+ } -+ -+ f_pos = folio_end_pos(f); -+ f_offset = 0; -+ } -+ -+ if (mapping_writably_mapped(mapping)) -+ darray_for_each(fs, fi) -+ flush_dcache_folio(*fi); -+ -+ f_pos = pos; -+ f_offset = pos - folio_pos(darray_first(fs)); -+ darray_for_each(fs, fi) { -+ f = *fi; -+ f_len = min(end, folio_end_pos(f)) - f_pos; -+ f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter); -+ if (!f_copied) { -+ folios_trunc(&fs, fi); -+ break; -+ } -+ -+ if (!folio_test_uptodate(f) && -+ f_copied != folio_size(f) && -+ pos + copied + f_copied < inode->v.i_size) { -+ iov_iter_revert(iter, f_copied); -+ folio_zero_range(f, 0, folio_size(f)); -+ folios_trunc(&fs, fi); -+ break; -+ } -+ -+ flush_dcache_folio(f); -+ copied += f_copied; -+ -+ if (f_copied != f_len) { -+ folios_trunc(&fs, fi + 1); -+ break; -+ } -+ -+ f_pos = folio_end_pos(f); -+ f_offset = 0; -+ } -+ -+ if (!copied) -+ goto out; -+ -+ end = pos + copied; -+ -+ spin_lock(&inode->v.i_lock); -+ if (end > inode->v.i_size) -+ i_size_write(&inode->v, end); -+ spin_unlock(&inode->v.i_lock); -+ -+ f_pos = pos; -+ f_offset = pos - folio_pos(darray_first(fs)); -+ darray_for_each(fs, fi) { -+ f = *fi; -+ f_len = min(end, folio_end_pos(f)) - f_pos; -+ -+ if (!folio_test_uptodate(f)) -+ folio_mark_uptodate(f); -+ -+ bch2_set_folio_dirty(c, inode, f, &res, f_offset, f_len); -+ -+ f_pos = folio_end_pos(f); -+ f_offset = 0; -+ } -+ -+ inode->ei_last_dirtied = (unsigned long) current; -+out: -+ darray_for_each(fs, fi) { -+ folio_unlock(*fi); -+ folio_put(*fi); -+ } -+ -+ /* -+ * If the last folio added to the mapping starts beyond current EOF, we -+ * performed a short write but left around at least one post-EOF folio. -+ * Clean up the mapping before we return. -+ */ -+ if (last_folio_pos >= inode->v.i_size) -+ truncate_pagecache(&inode->v, inode->v.i_size); -+ -+ darray_exit(&fs); -+ bch2_folio_reservation_put(c, inode, &res); -+ -+ return copied ?: ret; -+} -+ -+static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter) -+{ -+ struct file *file = iocb->ki_filp; -+ struct address_space *mapping = file->f_mapping; -+ struct bch_inode_info *inode = file_bch_inode(file); -+ loff_t pos = iocb->ki_pos; -+ ssize_t written = 0; -+ int ret = 0; -+ -+ bch2_pagecache_add_get(inode); -+ -+ do { -+ unsigned offset = pos & (PAGE_SIZE - 1); -+ unsigned bytes = iov_iter_count(iter); -+again: -+ /* -+ * Bring in the user page that we will copy from _first_. -+ * Otherwise there's a nasty deadlock on copying from the -+ * same page as we're writing to, without it being marked -+ * up-to-date. -+ * -+ * Not only is this an optimisation, but it is also required -+ * to check that the address is actually valid, when atomic -+ * usercopies are used, below. -+ */ -+ if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { -+ bytes = min_t(unsigned long, iov_iter_count(iter), -+ PAGE_SIZE - offset); -+ -+ if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { -+ ret = -EFAULT; -+ break; -+ } -+ } -+ -+ if (unlikely(fatal_signal_pending(current))) { -+ ret = -EINTR; -+ break; -+ } -+ -+ ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes); -+ if (unlikely(ret < 0)) -+ break; -+ -+ cond_resched(); -+ -+ if (unlikely(ret == 0)) { -+ /* -+ * If we were unable to copy any data at all, we must -+ * fall back to a single segment length write. -+ * -+ * If we didn't fallback here, we could livelock -+ * because not all segments in the iov can be copied at -+ * once without a pagefault. -+ */ -+ bytes = min_t(unsigned long, PAGE_SIZE - offset, -+ iov_iter_single_seg_count(iter)); -+ goto again; -+ } -+ pos += ret; -+ written += ret; -+ ret = 0; -+ -+ balance_dirty_pages_ratelimited(mapping); -+ } while (iov_iter_count(iter)); -+ -+ bch2_pagecache_add_put(inode); -+ -+ return written ? written : ret; -+} -+ -+ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) -+{ -+ struct file *file = iocb->ki_filp; -+ struct bch_inode_info *inode = file_bch_inode(file); -+ ssize_t ret; -+ -+ if (iocb->ki_flags & IOCB_DIRECT) { -+ ret = bch2_direct_write(iocb, from); -+ goto out; -+ } -+ -+ inode_lock(&inode->v); -+ -+ ret = generic_write_checks(iocb, from); -+ if (ret <= 0) -+ goto unlock; -+ -+ ret = file_remove_privs(file); -+ if (ret) -+ goto unlock; -+ -+ ret = file_update_time(file); -+ if (ret) -+ goto unlock; -+ -+ ret = bch2_buffered_write(iocb, from); -+ if (likely(ret > 0)) -+ iocb->ki_pos += ret; -+unlock: -+ inode_unlock(&inode->v); -+ -+ if (ret > 0) -+ ret = generic_write_sync(iocb, ret); -+out: -+ return bch2_err_class(ret); -+} -+ -+void bch2_fs_fs_io_buffered_exit(struct bch_fs *c) -+{ -+ bioset_exit(&c->writepage_bioset); -+} -+ -+int bch2_fs_fs_io_buffered_init(struct bch_fs *c) -+{ -+ if (bioset_init(&c->writepage_bioset, -+ 4, offsetof(struct bch_writepage_io, op.wbio.bio), -+ BIOSET_NEED_BVECS)) -+ return -BCH_ERR_ENOMEM_writepage_bioset_init; -+ -+ return 0; -+} -+ -+#endif /* NO_BCACHEFS_FS */ -diff --git a/fs/bcachefs/fs-io-buffered.h b/fs/bcachefs/fs-io-buffered.h -new file mode 100644 -index 000000000000..a6126ff790e6 ---- /dev/null -+++ b/fs/bcachefs/fs-io-buffered.h -@@ -0,0 +1,27 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_FS_IO_BUFFERED_H -+#define _BCACHEFS_FS_IO_BUFFERED_H -+ -+#ifndef NO_BCACHEFS_FS -+ -+int bch2_read_single_folio(struct folio *, struct address_space *); -+int bch2_read_folio(struct file *, struct folio *); -+ -+int bch2_writepages(struct address_space *, struct writeback_control *); -+void bch2_readahead(struct readahead_control *); -+ -+int bch2_write_begin(struct file *, struct address_space *, loff_t, -+ unsigned, struct page **, void **); -+int bch2_write_end(struct file *, struct address_space *, loff_t, -+ unsigned, unsigned, struct page *, void *); -+ -+ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *); -+ -+void bch2_fs_fs_io_buffered_exit(struct bch_fs *); -+int bch2_fs_fs_io_buffered_init(struct bch_fs *); -+#else -+static inline void bch2_fs_fs_io_buffered_exit(struct bch_fs *c) {} -+static inline int bch2_fs_fs_io_buffered_init(struct bch_fs *c) { return 0; } -+#endif -+ -+#endif /* _BCACHEFS_FS_IO_BUFFERED_H */ -diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c -new file mode 100644 -index 000000000000..5b42a76c4796 ---- /dev/null -+++ b/fs/bcachefs/fs-io-direct.c -@@ -0,0 +1,680 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#ifndef NO_BCACHEFS_FS -+ -+#include "bcachefs.h" -+#include "alloc_foreground.h" -+#include "fs.h" -+#include "fs-io.h" -+#include "fs-io-direct.h" -+#include "fs-io-pagecache.h" -+#include "io_read.h" -+#include "io_write.h" -+ -+#include -+#include -+#include -+#include -+ -+/* O_DIRECT reads */ -+ -+struct dio_read { -+ struct closure cl; -+ struct kiocb *req; -+ long ret; -+ bool should_dirty; -+ struct bch_read_bio rbio; -+}; -+ -+static void bio_check_or_release(struct bio *bio, bool check_dirty) -+{ -+ if (check_dirty) { -+ bio_check_pages_dirty(bio); -+ } else { -+ bio_release_pages(bio, false); -+ bio_put(bio); -+ } -+} -+ -+static void bch2_dio_read_complete(struct closure *cl) -+{ -+ struct dio_read *dio = container_of(cl, struct dio_read, cl); -+ -+ dio->req->ki_complete(dio->req, dio->ret); -+ bio_check_or_release(&dio->rbio.bio, dio->should_dirty); -+} -+ -+static void bch2_direct_IO_read_endio(struct bio *bio) -+{ -+ struct dio_read *dio = bio->bi_private; -+ -+ if (bio->bi_status) -+ dio->ret = blk_status_to_errno(bio->bi_status); -+ -+ closure_put(&dio->cl); -+} -+ -+static void bch2_direct_IO_read_split_endio(struct bio *bio) -+{ -+ struct dio_read *dio = bio->bi_private; -+ bool should_dirty = dio->should_dirty; -+ -+ bch2_direct_IO_read_endio(bio); -+ bio_check_or_release(bio, should_dirty); -+} -+ -+static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) -+{ -+ struct file *file = req->ki_filp; -+ struct bch_inode_info *inode = file_bch_inode(file); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct bch_io_opts opts; -+ struct dio_read *dio; -+ struct bio *bio; -+ loff_t offset = req->ki_pos; -+ bool sync = is_sync_kiocb(req); -+ size_t shorten; -+ ssize_t ret; -+ -+ bch2_inode_opts_get(&opts, c, &inode->ei_inode); -+ -+ if ((offset|iter->count) & (block_bytes(c) - 1)) -+ return -EINVAL; -+ -+ ret = min_t(loff_t, iter->count, -+ max_t(loff_t, 0, i_size_read(&inode->v) - offset)); -+ -+ if (!ret) -+ return ret; -+ -+ shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c)); -+ iter->count -= shorten; -+ -+ bio = bio_alloc_bioset(NULL, -+ bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), -+ REQ_OP_READ, -+ GFP_KERNEL, -+ &c->dio_read_bioset); -+ -+ bio->bi_end_io = bch2_direct_IO_read_endio; -+ -+ dio = container_of(bio, struct dio_read, rbio.bio); -+ closure_init(&dio->cl, NULL); -+ -+ /* -+ * this is a _really_ horrible hack just to avoid an atomic sub at the -+ * end: -+ */ -+ if (!sync) { -+ set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL); -+ atomic_set(&dio->cl.remaining, -+ CLOSURE_REMAINING_INITIALIZER - -+ CLOSURE_RUNNING + -+ CLOSURE_DESTRUCTOR); -+ } else { -+ atomic_set(&dio->cl.remaining, -+ CLOSURE_REMAINING_INITIALIZER + 1); -+ dio->cl.closure_get_happened = true; -+ } -+ -+ dio->req = req; -+ dio->ret = ret; -+ /* -+ * This is one of the sketchier things I've encountered: we have to skip -+ * the dirtying of requests that are internal from the kernel (i.e. from -+ * loopback), because we'll deadlock on page_lock. -+ */ -+ dio->should_dirty = iter_is_iovec(iter); -+ -+ goto start; -+ while (iter->count) { -+ bio = bio_alloc_bioset(NULL, -+ bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), -+ REQ_OP_READ, -+ GFP_KERNEL, -+ &c->bio_read); -+ bio->bi_end_io = bch2_direct_IO_read_split_endio; -+start: -+ bio->bi_opf = REQ_OP_READ|REQ_SYNC; -+ bio->bi_iter.bi_sector = offset >> 9; -+ bio->bi_private = dio; -+ -+ ret = bio_iov_iter_get_pages(bio, iter); -+ if (ret < 0) { -+ /* XXX: fault inject this path */ -+ bio->bi_status = BLK_STS_RESOURCE; -+ bio_endio(bio); -+ break; -+ } -+ -+ offset += bio->bi_iter.bi_size; -+ -+ if (dio->should_dirty) -+ bio_set_pages_dirty(bio); -+ -+ if (iter->count) -+ closure_get(&dio->cl); -+ -+ bch2_read(c, rbio_init(bio, opts), inode_inum(inode)); -+ } -+ -+ iter->count += shorten; -+ -+ if (sync) { -+ closure_sync(&dio->cl); -+ closure_debug_destroy(&dio->cl); -+ ret = dio->ret; -+ bio_check_or_release(&dio->rbio.bio, dio->should_dirty); -+ return ret; -+ } else { -+ return -EIOCBQUEUED; -+ } -+} -+ -+ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) -+{ -+ struct file *file = iocb->ki_filp; -+ struct bch_inode_info *inode = file_bch_inode(file); -+ struct address_space *mapping = file->f_mapping; -+ size_t count = iov_iter_count(iter); -+ ssize_t ret; -+ -+ if (!count) -+ return 0; /* skip atime */ -+ -+ if (iocb->ki_flags & IOCB_DIRECT) { -+ struct blk_plug plug; -+ -+ if (unlikely(mapping->nrpages)) { -+ ret = filemap_write_and_wait_range(mapping, -+ iocb->ki_pos, -+ iocb->ki_pos + count - 1); -+ if (ret < 0) -+ goto out; -+ } -+ -+ file_accessed(file); -+ -+ blk_start_plug(&plug); -+ ret = bch2_direct_IO_read(iocb, iter); -+ blk_finish_plug(&plug); -+ -+ if (ret >= 0) -+ iocb->ki_pos += ret; -+ } else { -+ bch2_pagecache_add_get(inode); -+ ret = generic_file_read_iter(iocb, iter); -+ bch2_pagecache_add_put(inode); -+ } -+out: -+ return bch2_err_class(ret); -+} -+ -+/* O_DIRECT writes */ -+ -+struct dio_write { -+ struct kiocb *req; -+ struct address_space *mapping; -+ struct bch_inode_info *inode; -+ struct mm_struct *mm; -+ unsigned loop:1, -+ extending:1, -+ sync:1, -+ flush:1, -+ free_iov:1; -+ struct quota_res quota_res; -+ u64 written; -+ -+ struct iov_iter iter; -+ struct iovec inline_vecs[2]; -+ -+ /* must be last: */ -+ struct bch_write_op op; -+}; -+ -+static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, -+ u64 offset, u64 size, -+ unsigned nr_replicas, bool compressed) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ u64 end = offset + size; -+ u32 snapshot; -+ bool ret = true; -+ int err; -+retry: -+ bch2_trans_begin(trans); -+ -+ err = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (err) -+ goto err; -+ -+ for_each_btree_key_norestart(trans, iter, BTREE_ID_extents, -+ SPOS(inum.inum, offset, snapshot), -+ BTREE_ITER_SLOTS, k, err) { -+ if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end))) -+ break; -+ -+ if (k.k->p.snapshot != snapshot || -+ nr_replicas > bch2_bkey_replicas(c, k) || -+ (!compressed && bch2_bkey_sectors_compressed(k))) { -+ ret = false; -+ break; -+ } -+ } -+ -+ offset = iter.pos.offset; -+ bch2_trans_iter_exit(trans, &iter); -+err: -+ if (bch2_err_matches(err, BCH_ERR_transaction_restart)) -+ goto retry; -+ bch2_trans_put(trans); -+ -+ return err ? false : ret; -+} -+ -+static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio) -+{ -+ struct bch_fs *c = dio->op.c; -+ struct bch_inode_info *inode = dio->inode; -+ struct bio *bio = &dio->op.wbio.bio; -+ -+ return bch2_check_range_allocated(c, inode_inum(inode), -+ dio->op.pos.offset, bio_sectors(bio), -+ dio->op.opts.data_replicas, -+ dio->op.opts.compression != 0); -+} -+ -+static void bch2_dio_write_loop_async(struct bch_write_op *); -+static __always_inline long bch2_dio_write_done(struct dio_write *dio); -+ -+/* -+ * We're going to return -EIOCBQUEUED, but we haven't finished consuming the -+ * iov_iter yet, so we need to stash a copy of the iovec: it might be on the -+ * caller's stack, we're not guaranteed that it will live for the duration of -+ * the IO: -+ */ -+static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) -+{ -+ struct iovec *iov = dio->inline_vecs; -+ -+ /* -+ * iov_iter has a single embedded iovec - nothing to do: -+ */ -+ if (iter_is_ubuf(&dio->iter)) -+ return 0; -+ -+ /* -+ * We don't currently handle non-iovec iov_iters here - return an error, -+ * and we'll fall back to doing the IO synchronously: -+ */ -+ if (!iter_is_iovec(&dio->iter)) -+ return -1; -+ -+ if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { -+ iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov), -+ GFP_KERNEL); -+ if (unlikely(!iov)) -+ return -ENOMEM; -+ -+ dio->free_iov = true; -+ } -+ -+ memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov)); -+ dio->iter.__iov = iov; -+ return 0; -+} -+ -+static void bch2_dio_write_flush_done(struct closure *cl) -+{ -+ struct dio_write *dio = container_of(cl, struct dio_write, op.cl); -+ struct bch_fs *c = dio->op.c; -+ -+ closure_debug_destroy(cl); -+ -+ dio->op.error = bch2_journal_error(&c->journal); -+ -+ bch2_dio_write_done(dio); -+} -+ -+static noinline void bch2_dio_write_flush(struct dio_write *dio) -+{ -+ struct bch_fs *c = dio->op.c; -+ struct bch_inode_unpacked inode; -+ int ret; -+ -+ dio->flush = 0; -+ -+ closure_init(&dio->op.cl, NULL); -+ -+ if (!dio->op.error) { -+ ret = bch2_inode_find_by_inum(c, inode_inum(dio->inode), &inode); -+ if (ret) { -+ dio->op.error = ret; -+ } else { -+ bch2_journal_flush_seq_async(&c->journal, inode.bi_journal_seq, -+ &dio->op.cl); -+ bch2_inode_flush_nocow_writes_async(c, dio->inode, &dio->op.cl); -+ } -+ } -+ -+ if (dio->sync) { -+ closure_sync(&dio->op.cl); -+ closure_debug_destroy(&dio->op.cl); -+ } else { -+ continue_at(&dio->op.cl, bch2_dio_write_flush_done, NULL); -+ } -+} -+ -+static __always_inline long bch2_dio_write_done(struct dio_write *dio) -+{ -+ struct kiocb *req = dio->req; -+ struct bch_inode_info *inode = dio->inode; -+ bool sync = dio->sync; -+ long ret; -+ -+ if (unlikely(dio->flush)) { -+ bch2_dio_write_flush(dio); -+ if (!sync) -+ return -EIOCBQUEUED; -+ } -+ -+ bch2_pagecache_block_put(inode); -+ -+ if (dio->free_iov) -+ kfree(dio->iter.__iov); -+ -+ ret = dio->op.error ?: ((long) dio->written << 9); -+ bio_put(&dio->op.wbio.bio); -+ -+ /* inode->i_dio_count is our ref on inode and thus bch_fs */ -+ inode_dio_end(&inode->v); -+ -+ if (ret < 0) -+ ret = bch2_err_class(ret); -+ -+ if (!sync) { -+ req->ki_complete(req, ret); -+ ret = -EIOCBQUEUED; -+ } -+ return ret; -+} -+ -+static __always_inline void bch2_dio_write_end(struct dio_write *dio) -+{ -+ struct bch_fs *c = dio->op.c; -+ struct kiocb *req = dio->req; -+ struct bch_inode_info *inode = dio->inode; -+ struct bio *bio = &dio->op.wbio.bio; -+ -+ req->ki_pos += (u64) dio->op.written << 9; -+ dio->written += dio->op.written; -+ -+ if (dio->extending) { -+ spin_lock(&inode->v.i_lock); -+ if (req->ki_pos > inode->v.i_size) -+ i_size_write(&inode->v, req->ki_pos); -+ spin_unlock(&inode->v.i_lock); -+ } -+ -+ if (dio->op.i_sectors_delta || dio->quota_res.sectors) { -+ mutex_lock(&inode->ei_quota_lock); -+ __bch2_i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta); -+ __bch2_quota_reservation_put(c, inode, &dio->quota_res); -+ mutex_unlock(&inode->ei_quota_lock); -+ } -+ -+ bio_release_pages(bio, false); -+ -+ if (unlikely(dio->op.error)) -+ set_bit(EI_INODE_ERROR, &inode->ei_flags); -+} -+ -+static __always_inline long bch2_dio_write_loop(struct dio_write *dio) -+{ -+ struct bch_fs *c = dio->op.c; -+ struct kiocb *req = dio->req; -+ struct address_space *mapping = dio->mapping; -+ struct bch_inode_info *inode = dio->inode; -+ struct bch_io_opts opts; -+ struct bio *bio = &dio->op.wbio.bio; -+ unsigned unaligned, iter_count; -+ bool sync = dio->sync, dropped_locks; -+ long ret; -+ -+ bch2_inode_opts_get(&opts, c, &inode->ei_inode); -+ -+ while (1) { -+ iter_count = dio->iter.count; -+ -+ EBUG_ON(current->faults_disabled_mapping); -+ current->faults_disabled_mapping = mapping; -+ -+ ret = bio_iov_iter_get_pages(bio, &dio->iter); -+ -+ dropped_locks = fdm_dropped_locks(); -+ -+ current->faults_disabled_mapping = NULL; -+ -+ /* -+ * If the fault handler returned an error but also signalled -+ * that it dropped & retook ei_pagecache_lock, we just need to -+ * re-shoot down the page cache and retry: -+ */ -+ if (dropped_locks && ret) -+ ret = 0; -+ -+ if (unlikely(ret < 0)) -+ goto err; -+ -+ if (unlikely(dropped_locks)) { -+ ret = bch2_write_invalidate_inode_pages_range(mapping, -+ req->ki_pos, -+ req->ki_pos + iter_count - 1); -+ if (unlikely(ret)) -+ goto err; -+ -+ if (!bio->bi_iter.bi_size) -+ continue; -+ } -+ -+ unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1); -+ bio->bi_iter.bi_size -= unaligned; -+ iov_iter_revert(&dio->iter, unaligned); -+ -+ if (!bio->bi_iter.bi_size) { -+ /* -+ * bio_iov_iter_get_pages was only able to get < -+ * blocksize worth of pages: -+ */ -+ ret = -EFAULT; -+ goto err; -+ } -+ -+ bch2_write_op_init(&dio->op, c, opts); -+ dio->op.end_io = sync -+ ? NULL -+ : bch2_dio_write_loop_async; -+ dio->op.target = dio->op.opts.foreground_target; -+ dio->op.write_point = writepoint_hashed((unsigned long) current); -+ dio->op.nr_replicas = dio->op.opts.data_replicas; -+ dio->op.subvol = inode->ei_subvol; -+ dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9); -+ dio->op.devs_need_flush = &inode->ei_devs_need_flush; -+ -+ if (sync) -+ dio->op.flags |= BCH_WRITE_SYNC; -+ dio->op.flags |= BCH_WRITE_CHECK_ENOSPC; -+ -+ ret = bch2_quota_reservation_add(c, inode, &dio->quota_res, -+ bio_sectors(bio), true); -+ if (unlikely(ret)) -+ goto err; -+ -+ ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio), -+ dio->op.opts.data_replicas, 0); -+ if (unlikely(ret) && -+ !bch2_dio_write_check_allocated(dio)) -+ goto err; -+ -+ task_io_account_write(bio->bi_iter.bi_size); -+ -+ if (unlikely(dio->iter.count) && -+ !dio->sync && -+ !dio->loop && -+ bch2_dio_write_copy_iov(dio)) -+ dio->sync = sync = true; -+ -+ dio->loop = true; -+ closure_call(&dio->op.cl, bch2_write, NULL, NULL); -+ -+ if (!sync) -+ return -EIOCBQUEUED; -+ -+ bch2_dio_write_end(dio); -+ -+ if (likely(!dio->iter.count) || dio->op.error) -+ break; -+ -+ bio_reset(bio, NULL, REQ_OP_WRITE); -+ } -+out: -+ return bch2_dio_write_done(dio); -+err: -+ dio->op.error = ret; -+ -+ bio_release_pages(bio, false); -+ -+ bch2_quota_reservation_put(c, inode, &dio->quota_res); -+ goto out; -+} -+ -+static noinline __cold void bch2_dio_write_continue(struct dio_write *dio) -+{ -+ struct mm_struct *mm = dio->mm; -+ -+ bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE); -+ -+ if (mm) -+ kthread_use_mm(mm); -+ bch2_dio_write_loop(dio); -+ if (mm) -+ kthread_unuse_mm(mm); -+} -+ -+static void bch2_dio_write_loop_async(struct bch_write_op *op) -+{ -+ struct dio_write *dio = container_of(op, struct dio_write, op); -+ -+ bch2_dio_write_end(dio); -+ -+ if (likely(!dio->iter.count) || dio->op.error) -+ bch2_dio_write_done(dio); -+ else -+ bch2_dio_write_continue(dio); -+} -+ -+ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) -+{ -+ struct file *file = req->ki_filp; -+ struct address_space *mapping = file->f_mapping; -+ struct bch_inode_info *inode = file_bch_inode(file); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct dio_write *dio; -+ struct bio *bio; -+ bool locked = true, extending; -+ ssize_t ret; -+ -+ prefetch(&c->opts); -+ prefetch((void *) &c->opts + 64); -+ prefetch(&inode->ei_inode); -+ prefetch((void *) &inode->ei_inode + 64); -+ -+ inode_lock(&inode->v); -+ -+ ret = generic_write_checks(req, iter); -+ if (unlikely(ret <= 0)) -+ goto err; -+ -+ ret = file_remove_privs(file); -+ if (unlikely(ret)) -+ goto err; -+ -+ ret = file_update_time(file); -+ if (unlikely(ret)) -+ goto err; -+ -+ if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) -+ goto err; -+ -+ inode_dio_begin(&inode->v); -+ bch2_pagecache_block_get(inode); -+ -+ extending = req->ki_pos + iter->count > inode->v.i_size; -+ if (!extending) { -+ inode_unlock(&inode->v); -+ locked = false; -+ } -+ -+ bio = bio_alloc_bioset(NULL, -+ bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), -+ REQ_OP_WRITE, -+ GFP_KERNEL, -+ &c->dio_write_bioset); -+ dio = container_of(bio, struct dio_write, op.wbio.bio); -+ dio->req = req; -+ dio->mapping = mapping; -+ dio->inode = inode; -+ dio->mm = current->mm; -+ dio->loop = false; -+ dio->extending = extending; -+ dio->sync = is_sync_kiocb(req) || extending; -+ dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled; -+ dio->free_iov = false; -+ dio->quota_res.sectors = 0; -+ dio->written = 0; -+ dio->iter = *iter; -+ dio->op.c = c; -+ -+ if (unlikely(mapping->nrpages)) { -+ ret = bch2_write_invalidate_inode_pages_range(mapping, -+ req->ki_pos, -+ req->ki_pos + iter->count - 1); -+ if (unlikely(ret)) -+ goto err_put_bio; -+ } -+ -+ ret = bch2_dio_write_loop(dio); -+err: -+ if (locked) -+ inode_unlock(&inode->v); -+ return ret; -+err_put_bio: -+ bch2_pagecache_block_put(inode); -+ bio_put(bio); -+ inode_dio_end(&inode->v); -+ goto err; -+} -+ -+void bch2_fs_fs_io_direct_exit(struct bch_fs *c) -+{ -+ bioset_exit(&c->dio_write_bioset); -+ bioset_exit(&c->dio_read_bioset); -+} -+ -+int bch2_fs_fs_io_direct_init(struct bch_fs *c) -+{ -+ if (bioset_init(&c->dio_read_bioset, -+ 4, offsetof(struct dio_read, rbio.bio), -+ BIOSET_NEED_BVECS)) -+ return -BCH_ERR_ENOMEM_dio_read_bioset_init; -+ -+ if (bioset_init(&c->dio_write_bioset, -+ 4, offsetof(struct dio_write, op.wbio.bio), -+ BIOSET_NEED_BVECS)) -+ return -BCH_ERR_ENOMEM_dio_write_bioset_init; -+ -+ return 0; -+} -+ -+#endif /* NO_BCACHEFS_FS */ -diff --git a/fs/bcachefs/fs-io-direct.h b/fs/bcachefs/fs-io-direct.h -new file mode 100644 -index 000000000000..814621ec7f81 ---- /dev/null -+++ b/fs/bcachefs/fs-io-direct.h -@@ -0,0 +1,16 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_FS_IO_DIRECT_H -+#define _BCACHEFS_FS_IO_DIRECT_H -+ -+#ifndef NO_BCACHEFS_FS -+ssize_t bch2_direct_write(struct kiocb *, struct iov_iter *); -+ssize_t bch2_read_iter(struct kiocb *, struct iov_iter *); -+ -+void bch2_fs_fs_io_direct_exit(struct bch_fs *); -+int bch2_fs_fs_io_direct_init(struct bch_fs *); -+#else -+static inline void bch2_fs_fs_io_direct_exit(struct bch_fs *c) {} -+static inline int bch2_fs_fs_io_direct_init(struct bch_fs *c) { return 0; } -+#endif -+ -+#endif /* _BCACHEFS_FS_IO_DIRECT_H */ -diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c -new file mode 100644 -index 000000000000..8bd9bcdd27f7 ---- /dev/null -+++ b/fs/bcachefs/fs-io-pagecache.c -@@ -0,0 +1,791 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#ifndef NO_BCACHEFS_FS -+ -+#include "bcachefs.h" -+#include "btree_iter.h" -+#include "extents.h" -+#include "fs-io.h" -+#include "fs-io-pagecache.h" -+#include "subvolume.h" -+ -+#include -+#include -+ -+int bch2_filemap_get_contig_folios_d(struct address_space *mapping, -+ loff_t start, u64 end, -+ int fgp_flags, gfp_t gfp, -+ folios *fs) -+{ -+ struct folio *f; -+ u64 pos = start; -+ int ret = 0; -+ -+ while (pos < end) { -+ if ((u64) pos >= (u64) start + (1ULL << 20)) -+ fgp_flags &= ~FGP_CREAT; -+ -+ ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL); -+ if (ret) -+ break; -+ -+ f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp); -+ if (IS_ERR_OR_NULL(f)) -+ break; -+ -+ BUG_ON(fs->nr && folio_pos(f) != pos); -+ -+ pos = folio_end_pos(f); -+ darray_push(fs, f); -+ } -+ -+ if (!fs->nr && !ret && (fgp_flags & FGP_CREAT)) -+ ret = -ENOMEM; -+ -+ return fs->nr ? 0 : ret; -+} -+ -+/* pagecache_block must be held */ -+int bch2_write_invalidate_inode_pages_range(struct address_space *mapping, -+ loff_t start, loff_t end) -+{ -+ int ret; -+ -+ /* -+ * XXX: the way this is currently implemented, we can spin if a process -+ * is continually redirtying a specific page -+ */ -+ do { -+ if (!mapping->nrpages) -+ return 0; -+ -+ ret = filemap_write_and_wait_range(mapping, start, end); -+ if (ret) -+ break; -+ -+ if (!mapping->nrpages) -+ return 0; -+ -+ ret = invalidate_inode_pages2_range(mapping, -+ start >> PAGE_SHIFT, -+ end >> PAGE_SHIFT); -+ } while (ret == -EBUSY); -+ -+ return ret; -+} -+ -+#if 0 -+/* Useful for debug tracing: */ -+static const char * const bch2_folio_sector_states[] = { -+#define x(n) #n, -+ BCH_FOLIO_SECTOR_STATE() -+#undef x -+ NULL -+}; -+#endif -+ -+static inline enum bch_folio_sector_state -+folio_sector_dirty(enum bch_folio_sector_state state) -+{ -+ switch (state) { -+ case SECTOR_unallocated: -+ return SECTOR_dirty; -+ case SECTOR_reserved: -+ return SECTOR_dirty_reserved; -+ default: -+ return state; -+ } -+} -+ -+static inline enum bch_folio_sector_state -+folio_sector_undirty(enum bch_folio_sector_state state) -+{ -+ switch (state) { -+ case SECTOR_dirty: -+ return SECTOR_unallocated; -+ case SECTOR_dirty_reserved: -+ return SECTOR_reserved; -+ default: -+ return state; -+ } -+} -+ -+static inline enum bch_folio_sector_state -+folio_sector_reserve(enum bch_folio_sector_state state) -+{ -+ switch (state) { -+ case SECTOR_unallocated: -+ return SECTOR_reserved; -+ case SECTOR_dirty: -+ return SECTOR_dirty_reserved; -+ default: -+ return state; -+ } -+} -+ -+/* for newly allocated folios: */ -+struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp) -+{ -+ struct bch_folio *s; -+ -+ s = kzalloc(sizeof(*s) + -+ sizeof(struct bch_folio_sector) * -+ folio_sectors(folio), gfp); -+ if (!s) -+ return NULL; -+ -+ spin_lock_init(&s->lock); -+ folio_attach_private(folio, s); -+ return s; -+} -+ -+struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp) -+{ -+ return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp); -+} -+ -+static unsigned bkey_to_sector_state(struct bkey_s_c k) -+{ -+ if (bkey_extent_is_reservation(k)) -+ return SECTOR_reserved; -+ if (bkey_extent_is_allocation(k.k)) -+ return SECTOR_allocated; -+ return SECTOR_unallocated; -+} -+ -+static void __bch2_folio_set(struct folio *folio, -+ unsigned pg_offset, unsigned pg_len, -+ unsigned nr_ptrs, unsigned state) -+{ -+ struct bch_folio *s = bch2_folio(folio); -+ unsigned i, sectors = folio_sectors(folio); -+ -+ BUG_ON(pg_offset >= sectors); -+ BUG_ON(pg_offset + pg_len > sectors); -+ -+ spin_lock(&s->lock); -+ -+ for (i = pg_offset; i < pg_offset + pg_len; i++) { -+ s->s[i].nr_replicas = nr_ptrs; -+ bch2_folio_sector_set(folio, s, i, state); -+ } -+ -+ if (i == sectors) -+ s->uptodate = true; -+ -+ spin_unlock(&s->lock); -+} -+ -+/* -+ * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the -+ * extents btree: -+ */ -+int bch2_folio_set(struct bch_fs *c, subvol_inum inum, -+ struct folio **fs, unsigned nr_folios) -+{ -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bch_folio *s; -+ u64 offset = folio_sector(fs[0]); -+ unsigned folio_idx; -+ u32 snapshot; -+ bool need_set = false; -+ int ret; -+ -+ for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) { -+ s = bch2_folio_create(fs[folio_idx], GFP_KERNEL); -+ if (!s) -+ return -ENOMEM; -+ -+ need_set |= !s->uptodate; -+ } -+ -+ if (!need_set) -+ return 0; -+ -+ folio_idx = 0; -+ trans = bch2_trans_get(c); -+retry: -+ bch2_trans_begin(trans); -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ goto err; -+ -+ for_each_btree_key_norestart(trans, iter, BTREE_ID_extents, -+ SPOS(inum.inum, offset, snapshot), -+ BTREE_ITER_SLOTS, k, ret) { -+ unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k); -+ unsigned state = bkey_to_sector_state(k); -+ -+ while (folio_idx < nr_folios) { -+ struct folio *folio = fs[folio_idx]; -+ u64 folio_start = folio_sector(folio); -+ u64 folio_end = folio_end_sector(folio); -+ unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) - -+ folio_start; -+ unsigned folio_len = min(k.k->p.offset, folio_end) - -+ folio_offset - folio_start; -+ -+ BUG_ON(k.k->p.offset < folio_start); -+ BUG_ON(bkey_start_offset(k.k) > folio_end); -+ -+ if (!bch2_folio(folio)->uptodate) -+ __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state); -+ -+ if (k.k->p.offset < folio_end) -+ break; -+ folio_idx++; -+ } -+ -+ if (folio_idx == nr_folios) -+ break; -+ } -+ -+ offset = iter.pos.offset; -+ bch2_trans_iter_exit(trans, &iter); -+err: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ bch2_trans_put(trans); -+ -+ return ret; -+} -+ -+void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k) -+{ -+ struct bvec_iter iter; -+ struct folio_vec fv; -+ unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v -+ ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k); -+ unsigned state = bkey_to_sector_state(k); -+ -+ bio_for_each_folio(fv, bio, iter) -+ __bch2_folio_set(fv.fv_folio, -+ fv.fv_offset >> 9, -+ fv.fv_len >> 9, -+ nr_ptrs, state); -+} -+ -+void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode, -+ u64 start, u64 end) -+{ -+ pgoff_t index = start >> PAGE_SECTORS_SHIFT; -+ pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; -+ struct folio_batch fbatch; -+ unsigned i, j; -+ -+ if (end <= start) -+ return; -+ -+ folio_batch_init(&fbatch); -+ -+ while (filemap_get_folios(inode->v.i_mapping, -+ &index, end_index, &fbatch)) { -+ for (i = 0; i < folio_batch_count(&fbatch); i++) { -+ struct folio *folio = fbatch.folios[i]; -+ u64 folio_start = folio_sector(folio); -+ u64 folio_end = folio_end_sector(folio); -+ unsigned folio_offset = max(start, folio_start) - folio_start; -+ unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; -+ struct bch_folio *s; -+ -+ BUG_ON(end <= folio_start); -+ -+ folio_lock(folio); -+ s = bch2_folio(folio); -+ -+ if (s) { -+ spin_lock(&s->lock); -+ for (j = folio_offset; j < folio_offset + folio_len; j++) -+ s->s[j].nr_replicas = 0; -+ spin_unlock(&s->lock); -+ } -+ -+ folio_unlock(folio); -+ } -+ folio_batch_release(&fbatch); -+ cond_resched(); -+ } -+} -+ -+void bch2_mark_pagecache_reserved(struct bch_inode_info *inode, -+ u64 start, u64 end) -+{ -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ pgoff_t index = start >> PAGE_SECTORS_SHIFT; -+ pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; -+ struct folio_batch fbatch; -+ s64 i_sectors_delta = 0; -+ unsigned i, j; -+ -+ if (end <= start) -+ return; -+ -+ folio_batch_init(&fbatch); -+ -+ while (filemap_get_folios(inode->v.i_mapping, -+ &index, end_index, &fbatch)) { -+ for (i = 0; i < folio_batch_count(&fbatch); i++) { -+ struct folio *folio = fbatch.folios[i]; -+ u64 folio_start = folio_sector(folio); -+ u64 folio_end = folio_end_sector(folio); -+ unsigned folio_offset = max(start, folio_start) - folio_start; -+ unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; -+ struct bch_folio *s; -+ -+ BUG_ON(end <= folio_start); -+ -+ folio_lock(folio); -+ s = bch2_folio(folio); -+ -+ if (s) { -+ spin_lock(&s->lock); -+ for (j = folio_offset; j < folio_offset + folio_len; j++) { -+ i_sectors_delta -= s->s[j].state == SECTOR_dirty; -+ bch2_folio_sector_set(folio, s, j, -+ folio_sector_reserve(s->s[j].state)); -+ } -+ spin_unlock(&s->lock); -+ } -+ -+ folio_unlock(folio); -+ } -+ folio_batch_release(&fbatch); -+ cond_resched(); -+ } -+ -+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); -+} -+ -+static inline unsigned sectors_to_reserve(struct bch_folio_sector *s, -+ unsigned nr_replicas) -+{ -+ return max(0, (int) nr_replicas - -+ s->nr_replicas - -+ s->replicas_reserved); -+} -+ -+int bch2_get_folio_disk_reservation(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ struct folio *folio, bool check_enospc) -+{ -+ struct bch_folio *s = bch2_folio_create(folio, 0); -+ unsigned nr_replicas = inode_nr_replicas(c, inode); -+ struct disk_reservation disk_res = { 0 }; -+ unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0; -+ int ret; -+ -+ if (!s) -+ return -ENOMEM; -+ -+ for (i = 0; i < sectors; i++) -+ disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas); -+ -+ if (!disk_res_sectors) -+ return 0; -+ -+ ret = bch2_disk_reservation_get(c, &disk_res, -+ disk_res_sectors, 1, -+ !check_enospc -+ ? BCH_DISK_RESERVATION_NOFAIL -+ : 0); -+ if (unlikely(ret)) -+ return ret; -+ -+ for (i = 0; i < sectors; i++) -+ s->s[i].replicas_reserved += -+ sectors_to_reserve(&s->s[i], nr_replicas); -+ -+ return 0; -+} -+ -+void bch2_folio_reservation_put(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ struct bch2_folio_reservation *res) -+{ -+ bch2_disk_reservation_put(c, &res->disk); -+ bch2_quota_reservation_put(c, inode, &res->quota); -+} -+ -+int bch2_folio_reservation_get(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ struct folio *folio, -+ struct bch2_folio_reservation *res, -+ unsigned offset, unsigned len) -+{ -+ struct bch_folio *s = bch2_folio_create(folio, 0); -+ unsigned i, disk_sectors = 0, quota_sectors = 0; -+ int ret; -+ -+ if (!s) -+ return -ENOMEM; -+ -+ BUG_ON(!s->uptodate); -+ -+ for (i = round_down(offset, block_bytes(c)) >> 9; -+ i < round_up(offset + len, block_bytes(c)) >> 9; -+ i++) { -+ disk_sectors += sectors_to_reserve(&s->s[i], -+ res->disk.nr_replicas); -+ quota_sectors += s->s[i].state == SECTOR_unallocated; -+ } -+ -+ if (disk_sectors) { -+ ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0); -+ if (unlikely(ret)) -+ return ret; -+ } -+ -+ if (quota_sectors) { -+ ret = bch2_quota_reservation_add(c, inode, &res->quota, -+ quota_sectors, true); -+ if (unlikely(ret)) { -+ struct disk_reservation tmp = { -+ .sectors = disk_sectors -+ }; -+ -+ bch2_disk_reservation_put(c, &tmp); -+ res->disk.sectors -= disk_sectors; -+ return ret; -+ } -+ } -+ -+ return 0; -+} -+ -+static void bch2_clear_folio_bits(struct folio *folio) -+{ -+ struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct bch_folio *s = bch2_folio(folio); -+ struct disk_reservation disk_res = { 0 }; -+ int i, sectors = folio_sectors(folio), dirty_sectors = 0; -+ -+ if (!s) -+ return; -+ -+ EBUG_ON(!folio_test_locked(folio)); -+ EBUG_ON(folio_test_writeback(folio)); -+ -+ for (i = 0; i < sectors; i++) { -+ disk_res.sectors += s->s[i].replicas_reserved; -+ s->s[i].replicas_reserved = 0; -+ -+ dirty_sectors -= s->s[i].state == SECTOR_dirty; -+ bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state)); -+ } -+ -+ bch2_disk_reservation_put(c, &disk_res); -+ -+ bch2_i_sectors_acct(c, inode, NULL, dirty_sectors); -+ -+ bch2_folio_release(folio); -+} -+ -+void bch2_set_folio_dirty(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ struct folio *folio, -+ struct bch2_folio_reservation *res, -+ unsigned offset, unsigned len) -+{ -+ struct bch_folio *s = bch2_folio(folio); -+ unsigned i, dirty_sectors = 0; -+ -+ WARN_ON((u64) folio_pos(folio) + offset + len > -+ round_up((u64) i_size_read(&inode->v), block_bytes(c))); -+ -+ BUG_ON(!s->uptodate); -+ -+ spin_lock(&s->lock); -+ -+ for (i = round_down(offset, block_bytes(c)) >> 9; -+ i < round_up(offset + len, block_bytes(c)) >> 9; -+ i++) { -+ unsigned sectors = sectors_to_reserve(&s->s[i], -+ res->disk.nr_replicas); -+ -+ /* -+ * This can happen if we race with the error path in -+ * bch2_writepage_io_done(): -+ */ -+ sectors = min_t(unsigned, sectors, res->disk.sectors); -+ -+ s->s[i].replicas_reserved += sectors; -+ res->disk.sectors -= sectors; -+ -+ dirty_sectors += s->s[i].state == SECTOR_unallocated; -+ -+ bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state)); -+ } -+ -+ spin_unlock(&s->lock); -+ -+ bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors); -+ -+ if (!folio_test_dirty(folio)) -+ filemap_dirty_folio(inode->v.i_mapping, folio); -+} -+ -+vm_fault_t bch2_page_fault(struct vm_fault *vmf) -+{ -+ struct file *file = vmf->vma->vm_file; -+ struct address_space *mapping = file->f_mapping; -+ struct address_space *fdm = faults_disabled_mapping(); -+ struct bch_inode_info *inode = file_bch_inode(file); -+ vm_fault_t ret; -+ -+ if (fdm == mapping) -+ return VM_FAULT_SIGBUS; -+ -+ /* Lock ordering: */ -+ if (fdm > mapping) { -+ struct bch_inode_info *fdm_host = to_bch_ei(fdm->host); -+ -+ if (bch2_pagecache_add_tryget(inode)) -+ goto got_lock; -+ -+ bch2_pagecache_block_put(fdm_host); -+ -+ bch2_pagecache_add_get(inode); -+ bch2_pagecache_add_put(inode); -+ -+ bch2_pagecache_block_get(fdm_host); -+ -+ /* Signal that lock has been dropped: */ -+ set_fdm_dropped_locks(); -+ return VM_FAULT_SIGBUS; -+ } -+ -+ bch2_pagecache_add_get(inode); -+got_lock: -+ ret = filemap_fault(vmf); -+ bch2_pagecache_add_put(inode); -+ -+ return ret; -+} -+ -+vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) -+{ -+ struct folio *folio = page_folio(vmf->page); -+ struct file *file = vmf->vma->vm_file; -+ struct bch_inode_info *inode = file_bch_inode(file); -+ struct address_space *mapping = file->f_mapping; -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct bch2_folio_reservation res; -+ unsigned len; -+ loff_t isize; -+ vm_fault_t ret; -+ -+ bch2_folio_reservation_init(c, inode, &res); -+ -+ sb_start_pagefault(inode->v.i_sb); -+ file_update_time(file); -+ -+ /* -+ * Not strictly necessary, but helps avoid dio writes livelocking in -+ * bch2_write_invalidate_inode_pages_range() - can drop this if/when we get -+ * a bch2_write_invalidate_inode_pages_range() that works without dropping -+ * page lock before invalidating page -+ */ -+ bch2_pagecache_add_get(inode); -+ -+ folio_lock(folio); -+ isize = i_size_read(&inode->v); -+ -+ if (folio->mapping != mapping || folio_pos(folio) >= isize) { -+ folio_unlock(folio); -+ ret = VM_FAULT_NOPAGE; -+ goto out; -+ } -+ -+ len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio)); -+ -+ if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?: -+ bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) { -+ folio_unlock(folio); -+ ret = VM_FAULT_SIGBUS; -+ goto out; -+ } -+ -+ bch2_set_folio_dirty(c, inode, folio, &res, 0, len); -+ bch2_folio_reservation_put(c, inode, &res); -+ -+ folio_wait_stable(folio); -+ ret = VM_FAULT_LOCKED; -+out: -+ bch2_pagecache_add_put(inode); -+ sb_end_pagefault(inode->v.i_sb); -+ -+ return ret; -+} -+ -+void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length) -+{ -+ if (offset || length < folio_size(folio)) -+ return; -+ -+ bch2_clear_folio_bits(folio); -+} -+ -+bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask) -+{ -+ if (folio_test_dirty(folio) || folio_test_writeback(folio)) -+ return false; -+ -+ bch2_clear_folio_bits(folio); -+ return true; -+} -+ -+/* fseek: */ -+ -+static int folio_data_offset(struct folio *folio, loff_t pos, -+ unsigned min_replicas) -+{ -+ struct bch_folio *s = bch2_folio(folio); -+ unsigned i, sectors = folio_sectors(folio); -+ -+ if (s) -+ for (i = folio_pos_to_s(folio, pos); i < sectors; i++) -+ if (s->s[i].state >= SECTOR_dirty && -+ s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas) -+ return i << SECTOR_SHIFT; -+ -+ return -1; -+} -+ -+loff_t bch2_seek_pagecache_data(struct inode *vinode, -+ loff_t start_offset, -+ loff_t end_offset, -+ unsigned min_replicas, -+ bool nonblock) -+{ -+ struct folio_batch fbatch; -+ pgoff_t start_index = start_offset >> PAGE_SHIFT; -+ pgoff_t end_index = end_offset >> PAGE_SHIFT; -+ pgoff_t index = start_index; -+ unsigned i; -+ loff_t ret; -+ int offset; -+ -+ folio_batch_init(&fbatch); -+ -+ while (filemap_get_folios(vinode->i_mapping, -+ &index, end_index, &fbatch)) { -+ for (i = 0; i < folio_batch_count(&fbatch); i++) { -+ struct folio *folio = fbatch.folios[i]; -+ -+ if (!nonblock) { -+ folio_lock(folio); -+ } else if (!folio_trylock(folio)) { -+ folio_batch_release(&fbatch); -+ return -EAGAIN; -+ } -+ -+ offset = folio_data_offset(folio, -+ max(folio_pos(folio), start_offset), -+ min_replicas); -+ if (offset >= 0) { -+ ret = clamp(folio_pos(folio) + offset, -+ start_offset, end_offset); -+ folio_unlock(folio); -+ folio_batch_release(&fbatch); -+ return ret; -+ } -+ folio_unlock(folio); -+ } -+ folio_batch_release(&fbatch); -+ cond_resched(); -+ } -+ -+ return end_offset; -+} -+ -+/* -+ * Search for a hole in a folio. -+ * -+ * The filemap layer returns -ENOENT if no folio exists, so reuse the same error -+ * code to indicate a pagecache hole exists at the returned offset. Otherwise -+ * return 0 if the folio is filled with data, or an error code. This function -+ * can return -EAGAIN if nonblock is specified. -+ */ -+static int folio_hole_offset(struct address_space *mapping, loff_t *offset, -+ unsigned min_replicas, bool nonblock) -+{ -+ struct folio *folio; -+ struct bch_folio *s; -+ unsigned i, sectors; -+ int ret = -ENOENT; -+ -+ folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT, -+ FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0); -+ if (IS_ERR(folio)) -+ return PTR_ERR(folio); -+ -+ s = bch2_folio(folio); -+ if (!s) -+ goto unlock; -+ -+ sectors = folio_sectors(folio); -+ for (i = folio_pos_to_s(folio, *offset); i < sectors; i++) -+ if (s->s[i].state < SECTOR_dirty || -+ s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) { -+ *offset = max(*offset, -+ folio_pos(folio) + (i << SECTOR_SHIFT)); -+ goto unlock; -+ } -+ -+ *offset = folio_end_pos(folio); -+ ret = 0; -+unlock: -+ folio_unlock(folio); -+ folio_put(folio); -+ return ret; -+} -+ -+loff_t bch2_seek_pagecache_hole(struct inode *vinode, -+ loff_t start_offset, -+ loff_t end_offset, -+ unsigned min_replicas, -+ bool nonblock) -+{ -+ struct address_space *mapping = vinode->i_mapping; -+ loff_t offset = start_offset; -+ loff_t ret = 0; -+ -+ while (!ret && offset < end_offset) -+ ret = folio_hole_offset(mapping, &offset, min_replicas, nonblock); -+ -+ if (ret && ret != -ENOENT) -+ return ret; -+ return min(offset, end_offset); -+} -+ -+int bch2_clamp_data_hole(struct inode *inode, -+ u64 *hole_start, -+ u64 *hole_end, -+ unsigned min_replicas, -+ bool nonblock) -+{ -+ loff_t ret; -+ -+ ret = bch2_seek_pagecache_hole(inode, -+ *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9; -+ if (ret < 0) -+ return ret; -+ -+ *hole_start = ret; -+ -+ if (*hole_start == *hole_end) -+ return 0; -+ -+ ret = bch2_seek_pagecache_data(inode, -+ *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9; -+ if (ret < 0) -+ return ret; -+ -+ *hole_end = ret; -+ return 0; -+} -+ -+#endif /* NO_BCACHEFS_FS */ -diff --git a/fs/bcachefs/fs-io-pagecache.h b/fs/bcachefs/fs-io-pagecache.h -new file mode 100644 -index 000000000000..a2222ad586e9 ---- /dev/null -+++ b/fs/bcachefs/fs-io-pagecache.h -@@ -0,0 +1,176 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_FS_IO_PAGECACHE_H -+#define _BCACHEFS_FS_IO_PAGECACHE_H -+ -+#include -+ -+typedef DARRAY(struct folio *) folios; -+ -+int bch2_filemap_get_contig_folios_d(struct address_space *, loff_t, -+ u64, int, gfp_t, folios *); -+int bch2_write_invalidate_inode_pages_range(struct address_space *, loff_t, loff_t); -+ -+/* -+ * Use u64 for the end pos and sector helpers because if the folio covers the -+ * max supported range of the mapping, the start offset of the next folio -+ * overflows loff_t. This breaks much of the range based processing in the -+ * buffered write path. -+ */ -+static inline u64 folio_end_pos(struct folio *folio) -+{ -+ return folio_pos(folio) + folio_size(folio); -+} -+ -+static inline size_t folio_sectors(struct folio *folio) -+{ -+ return PAGE_SECTORS << folio_order(folio); -+} -+ -+static inline loff_t folio_sector(struct folio *folio) -+{ -+ return folio_pos(folio) >> 9; -+} -+ -+static inline u64 folio_end_sector(struct folio *folio) -+{ -+ return folio_end_pos(folio) >> 9; -+} -+ -+#define BCH_FOLIO_SECTOR_STATE() \ -+ x(unallocated) \ -+ x(reserved) \ -+ x(dirty) \ -+ x(dirty_reserved) \ -+ x(allocated) -+ -+enum bch_folio_sector_state { -+#define x(n) SECTOR_##n, -+ BCH_FOLIO_SECTOR_STATE() -+#undef x -+}; -+ -+struct bch_folio_sector { -+ /* Uncompressed, fully allocated replicas (or on disk reservation): */ -+ unsigned nr_replicas:4; -+ -+ /* Owns PAGE_SECTORS * replicas_reserved sized in memory reservation: */ -+ unsigned replicas_reserved:4; -+ -+ /* i_sectors: */ -+ enum bch_folio_sector_state state:8; -+}; -+ -+struct bch_folio { -+ spinlock_t lock; -+ atomic_t write_count; -+ /* -+ * Is the sector state up to date with the btree? -+ * (Not the data itself) -+ */ -+ bool uptodate; -+ struct bch_folio_sector s[]; -+}; -+ -+/* Helper for when we need to add debug instrumentation: */ -+static inline void bch2_folio_sector_set(struct folio *folio, -+ struct bch_folio *s, -+ unsigned i, unsigned n) -+{ -+ s->s[i].state = n; -+} -+ -+/* file offset (to folio offset) to bch_folio_sector index */ -+static inline int folio_pos_to_s(struct folio *folio, loff_t pos) -+{ -+ u64 f_offset = pos - folio_pos(folio); -+ -+ BUG_ON(pos < folio_pos(folio) || pos >= folio_end_pos(folio)); -+ return f_offset >> SECTOR_SHIFT; -+} -+ -+/* for newly allocated folios: */ -+static inline void __bch2_folio_release(struct folio *folio) -+{ -+ kfree(folio_detach_private(folio)); -+} -+ -+static inline void bch2_folio_release(struct folio *folio) -+{ -+ EBUG_ON(!folio_test_locked(folio)); -+ __bch2_folio_release(folio); -+} -+ -+static inline struct bch_folio *__bch2_folio(struct folio *folio) -+{ -+ return folio_has_private(folio) -+ ? (struct bch_folio *) folio_get_private(folio) -+ : NULL; -+} -+ -+static inline struct bch_folio *bch2_folio(struct folio *folio) -+{ -+ EBUG_ON(!folio_test_locked(folio)); -+ -+ return __bch2_folio(folio); -+} -+ -+struct bch_folio *__bch2_folio_create(struct folio *, gfp_t); -+struct bch_folio *bch2_folio_create(struct folio *, gfp_t); -+ -+struct bch2_folio_reservation { -+ struct disk_reservation disk; -+ struct quota_res quota; -+}; -+ -+static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode) -+{ -+ /* XXX: this should not be open coded */ -+ return inode->ei_inode.bi_data_replicas -+ ? inode->ei_inode.bi_data_replicas - 1 -+ : c->opts.data_replicas; -+} -+ -+static inline void bch2_folio_reservation_init(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ struct bch2_folio_reservation *res) -+{ -+ memset(res, 0, sizeof(*res)); -+ -+ res->disk.nr_replicas = inode_nr_replicas(c, inode); -+} -+ -+int bch2_folio_set(struct bch_fs *, subvol_inum, struct folio **, unsigned); -+void bch2_bio_page_state_set(struct bio *, struct bkey_s_c); -+ -+void bch2_mark_pagecache_unallocated(struct bch_inode_info *, u64, u64); -+void bch2_mark_pagecache_reserved(struct bch_inode_info *, u64, u64); -+ -+int bch2_get_folio_disk_reservation(struct bch_fs *, -+ struct bch_inode_info *, -+ struct folio *, bool); -+ -+void bch2_folio_reservation_put(struct bch_fs *, -+ struct bch_inode_info *, -+ struct bch2_folio_reservation *); -+int bch2_folio_reservation_get(struct bch_fs *, -+ struct bch_inode_info *, -+ struct folio *, -+ struct bch2_folio_reservation *, -+ unsigned, unsigned); -+ -+void bch2_set_folio_dirty(struct bch_fs *, -+ struct bch_inode_info *, -+ struct folio *, -+ struct bch2_folio_reservation *, -+ unsigned, unsigned); -+ -+vm_fault_t bch2_page_fault(struct vm_fault *); -+vm_fault_t bch2_page_mkwrite(struct vm_fault *); -+void bch2_invalidate_folio(struct folio *, size_t, size_t); -+bool bch2_release_folio(struct folio *, gfp_t); -+ -+loff_t bch2_seek_pagecache_data(struct inode *, loff_t, loff_t, unsigned, bool); -+loff_t bch2_seek_pagecache_hole(struct inode *, loff_t, loff_t, unsigned, bool); -+int bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned, bool); -+ -+#endif /* _BCACHEFS_FS_IO_PAGECACHE_H */ -diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c -new file mode 100644 -index 000000000000..b0e8144ec550 ---- /dev/null -+++ b/fs/bcachefs/fs-io.c -@@ -0,0 +1,1072 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#ifndef NO_BCACHEFS_FS -+ -+#include "bcachefs.h" -+#include "alloc_foreground.h" -+#include "bkey_buf.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "clock.h" -+#include "error.h" -+#include "extents.h" -+#include "extent_update.h" -+#include "fs.h" -+#include "fs-io.h" -+#include "fs-io-buffered.h" -+#include "fs-io-pagecache.h" -+#include "fsck.h" -+#include "inode.h" -+#include "journal.h" -+#include "io_misc.h" -+#include "keylist.h" -+#include "quota.h" -+#include "reflink.h" -+#include "trace.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+struct nocow_flush { -+ struct closure *cl; -+ struct bch_dev *ca; -+ struct bio bio; -+}; -+ -+static void nocow_flush_endio(struct bio *_bio) -+{ -+ -+ struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio); -+ -+ closure_put(bio->cl); -+ percpu_ref_put(&bio->ca->io_ref); -+ bio_put(&bio->bio); -+} -+ -+void bch2_inode_flush_nocow_writes_async(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ struct closure *cl) -+{ -+ struct nocow_flush *bio; -+ struct bch_dev *ca; -+ struct bch_devs_mask devs; -+ unsigned dev; -+ -+ dev = find_first_bit(inode->ei_devs_need_flush.d, BCH_SB_MEMBERS_MAX); -+ if (dev == BCH_SB_MEMBERS_MAX) -+ return; -+ -+ devs = inode->ei_devs_need_flush; -+ memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush)); -+ -+ for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) { -+ rcu_read_lock(); -+ ca = rcu_dereference(c->devs[dev]); -+ if (ca && !percpu_ref_tryget(&ca->io_ref)) -+ ca = NULL; -+ rcu_read_unlock(); -+ -+ if (!ca) -+ continue; -+ -+ bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 0, -+ REQ_OP_FLUSH, -+ GFP_KERNEL, -+ &c->nocow_flush_bioset), -+ struct nocow_flush, bio); -+ bio->cl = cl; -+ bio->ca = ca; -+ bio->bio.bi_end_io = nocow_flush_endio; -+ closure_bio_submit(&bio->bio, cl); -+ } -+} -+ -+static int bch2_inode_flush_nocow_writes(struct bch_fs *c, -+ struct bch_inode_info *inode) -+{ -+ struct closure cl; -+ -+ closure_init_stack(&cl); -+ bch2_inode_flush_nocow_writes_async(c, inode, &cl); -+ closure_sync(&cl); -+ -+ return 0; -+} -+ -+/* i_size updates: */ -+ -+struct inode_new_size { -+ loff_t new_size; -+ u64 now; -+ unsigned fields; -+}; -+ -+static int inode_set_size(struct btree_trans *trans, -+ struct bch_inode_info *inode, -+ struct bch_inode_unpacked *bi, -+ void *p) -+{ -+ struct inode_new_size *s = p; -+ -+ bi->bi_size = s->new_size; -+ if (s->fields & ATTR_ATIME) -+ bi->bi_atime = s->now; -+ if (s->fields & ATTR_MTIME) -+ bi->bi_mtime = s->now; -+ if (s->fields & ATTR_CTIME) -+ bi->bi_ctime = s->now; -+ -+ return 0; -+} -+ -+int __must_check bch2_write_inode_size(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ loff_t new_size, unsigned fields) -+{ -+ struct inode_new_size s = { -+ .new_size = new_size, -+ .now = bch2_current_time(c), -+ .fields = fields, -+ }; -+ -+ return bch2_write_inode(c, inode, inode_set_size, &s, fields); -+} -+ -+void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, -+ struct quota_res *quota_res, s64 sectors) -+{ -+ bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, -+ "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", -+ inode->v.i_ino, (u64) inode->v.i_blocks, sectors, -+ inode->ei_inode.bi_sectors); -+ inode->v.i_blocks += sectors; -+ -+#ifdef CONFIG_BCACHEFS_QUOTA -+ if (quota_res && -+ !test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags) && -+ sectors > 0) { -+ BUG_ON(sectors > quota_res->sectors); -+ BUG_ON(sectors > inode->ei_quota_reserved); -+ -+ quota_res->sectors -= sectors; -+ inode->ei_quota_reserved -= sectors; -+ } else { -+ bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN); -+ } -+#endif -+} -+ -+/* fsync: */ -+ -+/* -+ * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an -+ * insert trigger: look up the btree inode instead -+ */ -+static int bch2_flush_inode(struct bch_fs *c, -+ struct bch_inode_info *inode) -+{ -+ struct bch_inode_unpacked u; -+ int ret; -+ -+ if (c->opts.journal_flush_disabled) -+ return 0; -+ -+ ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u); -+ if (ret) -+ return ret; -+ -+ return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?: -+ bch2_inode_flush_nocow_writes(c, inode); -+} -+ -+int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) -+{ -+ struct bch_inode_info *inode = file_bch_inode(file); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ int ret, ret2, ret3; -+ -+ ret = file_write_and_wait_range(file, start, end); -+ ret2 = sync_inode_metadata(&inode->v, 1); -+ ret3 = bch2_flush_inode(c, inode); -+ -+ return bch2_err_class(ret ?: ret2 ?: ret3); -+} -+ -+/* truncate: */ -+ -+static inline int range_has_data(struct bch_fs *c, u32 subvol, -+ struct bpos start, -+ struct bpos end) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret = 0; -+retry: -+ bch2_trans_begin(trans); -+ -+ ret = bch2_subvolume_get_snapshot(trans, subvol, &start.snapshot); -+ if (ret) -+ goto err; -+ -+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents, start, end, 0, k, ret) -+ if (bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k)) { -+ ret = 1; -+ break; -+ } -+ start = iter.pos; -+ bch2_trans_iter_exit(trans, &iter); -+err: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static int __bch2_truncate_folio(struct bch_inode_info *inode, -+ pgoff_t index, loff_t start, loff_t end) -+{ -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct address_space *mapping = inode->v.i_mapping; -+ struct bch_folio *s; -+ unsigned start_offset; -+ unsigned end_offset; -+ unsigned i; -+ struct folio *folio; -+ s64 i_sectors_delta = 0; -+ int ret = 0; -+ u64 end_pos; -+ -+ folio = filemap_lock_folio(mapping, index); -+ if (IS_ERR_OR_NULL(folio)) { -+ /* -+ * XXX: we're doing two index lookups when we end up reading the -+ * folio -+ */ -+ ret = range_has_data(c, inode->ei_subvol, -+ POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)), -+ POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS)); -+ if (ret <= 0) -+ return ret; -+ -+ folio = __filemap_get_folio(mapping, index, -+ FGP_LOCK|FGP_CREAT, GFP_KERNEL); -+ if (IS_ERR_OR_NULL(folio)) { -+ ret = -ENOMEM; -+ goto out; -+ } -+ } -+ -+ BUG_ON(start >= folio_end_pos(folio)); -+ BUG_ON(end <= folio_pos(folio)); -+ -+ start_offset = max(start, folio_pos(folio)) - folio_pos(folio); -+ end_offset = min_t(u64, end, folio_end_pos(folio)) - folio_pos(folio); -+ -+ /* Folio boundary? Nothing to do */ -+ if (start_offset == 0 && -+ end_offset == folio_size(folio)) { -+ ret = 0; -+ goto unlock; -+ } -+ -+ s = bch2_folio_create(folio, 0); -+ if (!s) { -+ ret = -ENOMEM; -+ goto unlock; -+ } -+ -+ if (!folio_test_uptodate(folio)) { -+ ret = bch2_read_single_folio(folio, mapping); -+ if (ret) -+ goto unlock; -+ } -+ -+ ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); -+ if (ret) -+ goto unlock; -+ -+ for (i = round_up(start_offset, block_bytes(c)) >> 9; -+ i < round_down(end_offset, block_bytes(c)) >> 9; -+ i++) { -+ s->s[i].nr_replicas = 0; -+ -+ i_sectors_delta -= s->s[i].state == SECTOR_dirty; -+ bch2_folio_sector_set(folio, s, i, SECTOR_unallocated); -+ } -+ -+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); -+ -+ /* -+ * Caller needs to know whether this folio will be written out by -+ * writeback - doing an i_size update if necessary - or whether it will -+ * be responsible for the i_size update. -+ * -+ * Note that we shouldn't ever see a folio beyond EOF, but check and -+ * warn if so. This has been observed by failure to clean up folios -+ * after a short write and there's still a chance reclaim will fix -+ * things up. -+ */ -+ WARN_ON_ONCE(folio_pos(folio) >= inode->v.i_size); -+ end_pos = folio_end_pos(folio); -+ if (inode->v.i_size > folio_pos(folio)) -+ end_pos = min_t(u64, inode->v.i_size, end_pos); -+ ret = s->s[folio_pos_to_s(folio, end_pos - 1)].state >= SECTOR_dirty; -+ -+ folio_zero_segment(folio, start_offset, end_offset); -+ -+ /* -+ * Bit of a hack - we don't want truncate to fail due to -ENOSPC. -+ * -+ * XXX: because we aren't currently tracking whether the folio has actual -+ * data in it (vs. just 0s, or only partially written) this wrong. ick. -+ */ -+ BUG_ON(bch2_get_folio_disk_reservation(c, inode, folio, false)); -+ -+ /* -+ * This removes any writeable userspace mappings; we need to force -+ * .page_mkwrite to be called again before any mmapped writes, to -+ * redirty the full page: -+ */ -+ folio_mkclean(folio); -+ filemap_dirty_folio(mapping, folio); -+unlock: -+ folio_unlock(folio); -+ folio_put(folio); -+out: -+ return ret; -+} -+ -+static int bch2_truncate_folio(struct bch_inode_info *inode, loff_t from) -+{ -+ return __bch2_truncate_folio(inode, from >> PAGE_SHIFT, -+ from, ANYSINT_MAX(loff_t)); -+} -+ -+static int bch2_truncate_folios(struct bch_inode_info *inode, -+ loff_t start, loff_t end) -+{ -+ int ret = __bch2_truncate_folio(inode, start >> PAGE_SHIFT, -+ start, end); -+ -+ if (ret >= 0 && -+ start >> PAGE_SHIFT != end >> PAGE_SHIFT) -+ ret = __bch2_truncate_folio(inode, -+ (end - 1) >> PAGE_SHIFT, -+ start, end); -+ return ret; -+} -+ -+static int bch2_extend(struct mnt_idmap *idmap, -+ struct bch_inode_info *inode, -+ struct bch_inode_unpacked *inode_u, -+ struct iattr *iattr) -+{ -+ struct address_space *mapping = inode->v.i_mapping; -+ int ret; -+ -+ /* -+ * sync appends: -+ * -+ * this has to be done _before_ extending i_size: -+ */ -+ ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX); -+ if (ret) -+ return ret; -+ -+ truncate_setsize(&inode->v, iattr->ia_size); -+ -+ return bch2_setattr_nonsize(idmap, inode, iattr); -+} -+ -+int bchfs_truncate(struct mnt_idmap *idmap, -+ struct bch_inode_info *inode, struct iattr *iattr) -+{ -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct address_space *mapping = inode->v.i_mapping; -+ struct bch_inode_unpacked inode_u; -+ s64 i_sectors_delta = 0; -+ int ret = 0; -+ -+ /* -+ * If the truncate call with change the size of the file, the -+ * cmtimes should be updated. If the size will not change, we -+ * do not need to update the cmtimes. -+ */ -+ if (iattr->ia_size != inode->v.i_size) { -+ if (!(iattr->ia_valid & ATTR_MTIME)) -+ ktime_get_coarse_real_ts64(&iattr->ia_mtime); -+ if (!(iattr->ia_valid & ATTR_CTIME)) -+ ktime_get_coarse_real_ts64(&iattr->ia_ctime); -+ iattr->ia_valid |= ATTR_MTIME|ATTR_CTIME; -+ } -+ -+ inode_dio_wait(&inode->v); -+ bch2_pagecache_block_get(inode); -+ -+ ret = bch2_inode_find_by_inum(c, inode_inum(inode), &inode_u); -+ if (ret) -+ goto err; -+ -+ /* -+ * check this before next assertion; on filesystem error our normal -+ * invariants are a bit broken (truncate has to truncate the page cache -+ * before the inode). -+ */ -+ ret = bch2_journal_error(&c->journal); -+ if (ret) -+ goto err; -+ -+ WARN_ONCE(!test_bit(EI_INODE_ERROR, &inode->ei_flags) && -+ inode->v.i_size < inode_u.bi_size, -+ "truncate spotted in mem i_size < btree i_size: %llu < %llu\n", -+ (u64) inode->v.i_size, inode_u.bi_size); -+ -+ if (iattr->ia_size > inode->v.i_size) { -+ ret = bch2_extend(idmap, inode, &inode_u, iattr); -+ goto err; -+ } -+ -+ iattr->ia_valid &= ~ATTR_SIZE; -+ -+ ret = bch2_truncate_folio(inode, iattr->ia_size); -+ if (unlikely(ret < 0)) -+ goto err; -+ -+ truncate_setsize(&inode->v, iattr->ia_size); -+ -+ /* -+ * When extending, we're going to write the new i_size to disk -+ * immediately so we need to flush anything above the current on disk -+ * i_size first: -+ * -+ * Also, when extending we need to flush the page that i_size currently -+ * straddles - if it's mapped to userspace, we need to ensure that -+ * userspace has to redirty it and call .mkwrite -> set_page_dirty -+ * again to allocate the part of the page that was extended. -+ */ -+ if (iattr->ia_size > inode_u.bi_size) -+ ret = filemap_write_and_wait_range(mapping, -+ inode_u.bi_size, -+ iattr->ia_size - 1); -+ else if (iattr->ia_size & (PAGE_SIZE - 1)) -+ ret = filemap_write_and_wait_range(mapping, -+ round_down(iattr->ia_size, PAGE_SIZE), -+ iattr->ia_size - 1); -+ if (ret) -+ goto err; -+ -+ ret = bch2_truncate(c, inode_inum(inode), iattr->ia_size, &i_sectors_delta); -+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); -+ -+ if (unlikely(ret)) { -+ /* -+ * If we error here, VFS caches are now inconsistent with btree -+ */ -+ set_bit(EI_INODE_ERROR, &inode->ei_flags); -+ goto err; -+ } -+ -+ bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && -+ !bch2_journal_error(&c->journal), c, -+ "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", -+ inode->v.i_ino, (u64) inode->v.i_blocks, -+ inode->ei_inode.bi_sectors); -+ -+ ret = bch2_setattr_nonsize(idmap, inode, iattr); -+err: -+ bch2_pagecache_block_put(inode); -+ return bch2_err_class(ret); -+} -+ -+/* fallocate: */ -+ -+static int inode_update_times_fn(struct btree_trans *trans, -+ struct bch_inode_info *inode, -+ struct bch_inode_unpacked *bi, void *p) -+{ -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ -+ bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); -+ return 0; -+} -+ -+static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len) -+{ -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ u64 end = offset + len; -+ u64 block_start = round_up(offset, block_bytes(c)); -+ u64 block_end = round_down(end, block_bytes(c)); -+ bool truncated_last_page; -+ int ret = 0; -+ -+ ret = bch2_truncate_folios(inode, offset, end); -+ if (unlikely(ret < 0)) -+ goto err; -+ -+ truncated_last_page = ret; -+ -+ truncate_pagecache_range(&inode->v, offset, end - 1); -+ -+ if (block_start < block_end) { -+ s64 i_sectors_delta = 0; -+ -+ ret = bch2_fpunch(c, inode_inum(inode), -+ block_start >> 9, block_end >> 9, -+ &i_sectors_delta); -+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); -+ } -+ -+ mutex_lock(&inode->ei_update_lock); -+ if (end >= inode->v.i_size && !truncated_last_page) { -+ ret = bch2_write_inode_size(c, inode, inode->v.i_size, -+ ATTR_MTIME|ATTR_CTIME); -+ } else { -+ ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, -+ ATTR_MTIME|ATTR_CTIME); -+ } -+ mutex_unlock(&inode->ei_update_lock); -+err: -+ return ret; -+} -+ -+static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, -+ loff_t offset, loff_t len, -+ bool insert) -+{ -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct address_space *mapping = inode->v.i_mapping; -+ s64 i_sectors_delta = 0; -+ int ret = 0; -+ -+ if ((offset | len) & (block_bytes(c) - 1)) -+ return -EINVAL; -+ -+ if (insert) { -+ if (offset >= inode->v.i_size) -+ return -EINVAL; -+ } else { -+ if (offset + len >= inode->v.i_size) -+ return -EINVAL; -+ } -+ -+ ret = bch2_write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); -+ if (ret) -+ return ret; -+ -+ if (insert) -+ i_size_write(&inode->v, inode->v.i_size + len); -+ -+ ret = bch2_fcollapse_finsert(c, inode_inum(inode), offset >> 9, len >> 9, -+ insert, &i_sectors_delta); -+ if (!ret && !insert) -+ i_size_write(&inode->v, inode->v.i_size - len); -+ bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); -+ -+ return ret; -+} -+ -+static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, -+ u64 start_sector, u64 end_sector) -+{ -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bpos end_pos = POS(inode->v.i_ino, end_sector); -+ struct bch_io_opts opts; -+ int ret = 0; -+ -+ bch2_inode_opts_get(&opts, c, &inode->ei_inode); -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -+ POS(inode->v.i_ino, start_sector), -+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT); -+ -+ while (!ret && bkey_lt(iter.pos, end_pos)) { -+ s64 i_sectors_delta = 0; -+ struct quota_res quota_res = { 0 }; -+ struct bkey_s_c k; -+ unsigned sectors; -+ bool is_allocation; -+ u64 hole_start, hole_end; -+ u32 snapshot; -+ -+ bch2_trans_begin(trans); -+ -+ ret = bch2_subvolume_get_snapshot(trans, -+ inode->ei_subvol, &snapshot); -+ if (ret) -+ goto bkey_err; -+ -+ bch2_btree_iter_set_snapshot(&iter, snapshot); -+ -+ k = bch2_btree_iter_peek_slot(&iter); -+ if ((ret = bkey_err(k))) -+ goto bkey_err; -+ -+ hole_start = iter.pos.offset; -+ hole_end = bpos_min(k.k->p, end_pos).offset; -+ is_allocation = bkey_extent_is_allocation(k.k); -+ -+ /* already reserved */ -+ if (bkey_extent_is_reservation(k) && -+ bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) { -+ bch2_btree_iter_advance(&iter); -+ continue; -+ } -+ -+ if (bkey_extent_is_data(k.k) && -+ !(mode & FALLOC_FL_ZERO_RANGE)) { -+ bch2_btree_iter_advance(&iter); -+ continue; -+ } -+ -+ if (!(mode & FALLOC_FL_ZERO_RANGE)) { -+ /* -+ * Lock ordering - can't be holding btree locks while -+ * blocking on a folio lock: -+ */ -+ if (bch2_clamp_data_hole(&inode->v, -+ &hole_start, -+ &hole_end, -+ opts.data_replicas, true)) -+ ret = drop_locks_do(trans, -+ (bch2_clamp_data_hole(&inode->v, -+ &hole_start, -+ &hole_end, -+ opts.data_replicas, false), 0)); -+ bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start)); -+ -+ if (ret) -+ goto bkey_err; -+ -+ if (hole_start == hole_end) -+ continue; -+ } -+ -+ sectors = hole_end - hole_start; -+ -+ if (!is_allocation) { -+ ret = bch2_quota_reservation_add(c, inode, -+ "a_res, sectors, true); -+ if (unlikely(ret)) -+ goto bkey_err; -+ } -+ -+ ret = bch2_extent_fallocate(trans, inode_inum(inode), &iter, -+ sectors, opts, &i_sectors_delta, -+ writepoint_hashed((unsigned long) current)); -+ if (ret) -+ goto bkey_err; -+ -+ bch2_i_sectors_acct(c, inode, "a_res, i_sectors_delta); -+ -+ drop_locks_do(trans, -+ (bch2_mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0)); -+bkey_err: -+ bch2_quota_reservation_put(c, inode, "a_res); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ ret = 0; -+ } -+ -+ if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) { -+ struct quota_res quota_res = { 0 }; -+ s64 i_sectors_delta = 0; -+ -+ bch2_fpunch_at(trans, &iter, inode_inum(inode), -+ end_sector, &i_sectors_delta); -+ bch2_i_sectors_acct(c, inode, "a_res, i_sectors_delta); -+ bch2_quota_reservation_put(c, inode, "a_res); -+ } -+ -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static long bchfs_fallocate(struct bch_inode_info *inode, int mode, -+ loff_t offset, loff_t len) -+{ -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ u64 end = offset + len; -+ u64 block_start = round_down(offset, block_bytes(c)); -+ u64 block_end = round_up(end, block_bytes(c)); -+ bool truncated_last_page = false; -+ int ret, ret2 = 0; -+ -+ if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) { -+ ret = inode_newsize_ok(&inode->v, end); -+ if (ret) -+ return ret; -+ } -+ -+ if (mode & FALLOC_FL_ZERO_RANGE) { -+ ret = bch2_truncate_folios(inode, offset, end); -+ if (unlikely(ret < 0)) -+ return ret; -+ -+ truncated_last_page = ret; -+ -+ truncate_pagecache_range(&inode->v, offset, end - 1); -+ -+ block_start = round_up(offset, block_bytes(c)); -+ block_end = round_down(end, block_bytes(c)); -+ } -+ -+ ret = __bchfs_fallocate(inode, mode, block_start >> 9, block_end >> 9); -+ -+ /* -+ * On -ENOSPC in ZERO_RANGE mode, we still want to do the inode update, -+ * so that the VFS cache i_size is consistent with the btree i_size: -+ */ -+ if (ret && -+ !(bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE))) -+ return ret; -+ -+ if (mode & FALLOC_FL_KEEP_SIZE && end > inode->v.i_size) -+ end = inode->v.i_size; -+ -+ if (end >= inode->v.i_size && -+ (((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) || -+ !(mode & FALLOC_FL_KEEP_SIZE))) { -+ spin_lock(&inode->v.i_lock); -+ i_size_write(&inode->v, end); -+ spin_unlock(&inode->v.i_lock); -+ -+ mutex_lock(&inode->ei_update_lock); -+ ret2 = bch2_write_inode_size(c, inode, end, 0); -+ mutex_unlock(&inode->ei_update_lock); -+ } -+ -+ return ret ?: ret2; -+} -+ -+long bch2_fallocate_dispatch(struct file *file, int mode, -+ loff_t offset, loff_t len) -+{ -+ struct bch_inode_info *inode = file_bch_inode(file); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ long ret; -+ -+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate)) -+ return -EROFS; -+ -+ inode_lock(&inode->v); -+ inode_dio_wait(&inode->v); -+ bch2_pagecache_block_get(inode); -+ -+ ret = file_modified(file); -+ if (ret) -+ goto err; -+ -+ if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE))) -+ ret = bchfs_fallocate(inode, mode, offset, len); -+ else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE)) -+ ret = bchfs_fpunch(inode, offset, len); -+ else if (mode == FALLOC_FL_INSERT_RANGE) -+ ret = bchfs_fcollapse_finsert(inode, offset, len, true); -+ else if (mode == FALLOC_FL_COLLAPSE_RANGE) -+ ret = bchfs_fcollapse_finsert(inode, offset, len, false); -+ else -+ ret = -EOPNOTSUPP; -+err: -+ bch2_pagecache_block_put(inode); -+ inode_unlock(&inode->v); -+ bch2_write_ref_put(c, BCH_WRITE_REF_fallocate); -+ -+ return bch2_err_class(ret); -+} -+ -+/* -+ * Take a quota reservation for unallocated blocks in a given file range -+ * Does not check pagecache -+ */ -+static int quota_reserve_range(struct bch_inode_info *inode, -+ struct quota_res *res, -+ u64 start, u64 end) -+{ -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ u32 snapshot; -+ u64 sectors = end - start; -+ u64 pos = start; -+ int ret; -+retry: -+ bch2_trans_begin(trans); -+ -+ ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot); -+ if (ret) -+ goto err; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -+ SPOS(inode->v.i_ino, pos, snapshot), 0); -+ -+ while (!(ret = btree_trans_too_many_iters(trans)) && -+ (k = bch2_btree_iter_peek_upto(&iter, POS(inode->v.i_ino, end - 1))).k && -+ !(ret = bkey_err(k))) { -+ if (bkey_extent_is_allocation(k.k)) { -+ u64 s = min(end, k.k->p.offset) - -+ max(start, bkey_start_offset(k.k)); -+ BUG_ON(s > sectors); -+ sectors -= s; -+ } -+ bch2_btree_iter_advance(&iter); -+ } -+ pos = iter.pos.offset; -+ bch2_trans_iter_exit(trans, &iter); -+err: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ bch2_trans_put(trans); -+ -+ return ret ?: bch2_quota_reservation_add(c, inode, res, sectors, true); -+} -+ -+loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, -+ struct file *file_dst, loff_t pos_dst, -+ loff_t len, unsigned remap_flags) -+{ -+ struct bch_inode_info *src = file_bch_inode(file_src); -+ struct bch_inode_info *dst = file_bch_inode(file_dst); -+ struct bch_fs *c = src->v.i_sb->s_fs_info; -+ struct quota_res quota_res = { 0 }; -+ s64 i_sectors_delta = 0; -+ u64 aligned_len; -+ loff_t ret = 0; -+ -+ if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY)) -+ return -EINVAL; -+ -+ if (remap_flags & REMAP_FILE_DEDUP) -+ return -EOPNOTSUPP; -+ -+ if ((pos_src & (block_bytes(c) - 1)) || -+ (pos_dst & (block_bytes(c) - 1))) -+ return -EINVAL; -+ -+ if (src == dst && -+ abs(pos_src - pos_dst) < len) -+ return -EINVAL; -+ -+ bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); -+ -+ inode_dio_wait(&src->v); -+ inode_dio_wait(&dst->v); -+ -+ ret = generic_remap_file_range_prep(file_src, pos_src, -+ file_dst, pos_dst, -+ &len, remap_flags); -+ if (ret < 0 || len == 0) -+ goto err; -+ -+ aligned_len = round_up((u64) len, block_bytes(c)); -+ -+ ret = bch2_write_invalidate_inode_pages_range(dst->v.i_mapping, -+ pos_dst, pos_dst + len - 1); -+ if (ret) -+ goto err; -+ -+ ret = quota_reserve_range(dst, "a_res, pos_dst >> 9, -+ (pos_dst + aligned_len) >> 9); -+ if (ret) -+ goto err; -+ -+ file_update_time(file_dst); -+ -+ bch2_mark_pagecache_unallocated(src, pos_src >> 9, -+ (pos_src + aligned_len) >> 9); -+ -+ ret = bch2_remap_range(c, -+ inode_inum(dst), pos_dst >> 9, -+ inode_inum(src), pos_src >> 9, -+ aligned_len >> 9, -+ pos_dst + len, &i_sectors_delta); -+ if (ret < 0) -+ goto err; -+ -+ /* -+ * due to alignment, we might have remapped slightly more than requsted -+ */ -+ ret = min((u64) ret << 9, (u64) len); -+ -+ bch2_i_sectors_acct(c, dst, "a_res, i_sectors_delta); -+ -+ spin_lock(&dst->v.i_lock); -+ if (pos_dst + ret > dst->v.i_size) -+ i_size_write(&dst->v, pos_dst + ret); -+ spin_unlock(&dst->v.i_lock); -+ -+ if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) || -+ IS_SYNC(file_inode(file_dst))) -+ ret = bch2_flush_inode(c, dst); -+err: -+ bch2_quota_reservation_put(c, dst, "a_res); -+ bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); -+ -+ return bch2_err_class(ret); -+} -+ -+/* fseek: */ -+ -+static loff_t bch2_seek_data(struct file *file, u64 offset) -+{ -+ struct bch_inode_info *inode = file_bch_inode(file); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ subvol_inum inum = inode_inum(inode); -+ u64 isize, next_data = MAX_LFS_FILESIZE; -+ u32 snapshot; -+ int ret; -+ -+ isize = i_size_read(&inode->v); -+ if (offset >= isize) -+ return -ENXIO; -+ -+ trans = bch2_trans_get(c); -+retry: -+ bch2_trans_begin(trans); -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ goto err; -+ -+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents, -+ SPOS(inode->v.i_ino, offset >> 9, snapshot), -+ POS(inode->v.i_ino, U64_MAX), -+ 0, k, ret) { -+ if (bkey_extent_is_data(k.k)) { -+ next_data = max(offset, bkey_start_offset(k.k) << 9); -+ break; -+ } else if (k.k->p.offset >> 9 > isize) -+ break; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+err: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ bch2_trans_put(trans); -+ if (ret) -+ return ret; -+ -+ if (next_data > offset) -+ next_data = bch2_seek_pagecache_data(&inode->v, -+ offset, next_data, 0, false); -+ -+ if (next_data >= isize) -+ return -ENXIO; -+ -+ return vfs_setpos(file, next_data, MAX_LFS_FILESIZE); -+} -+ -+static loff_t bch2_seek_hole(struct file *file, u64 offset) -+{ -+ struct bch_inode_info *inode = file_bch_inode(file); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ subvol_inum inum = inode_inum(inode); -+ u64 isize, next_hole = MAX_LFS_FILESIZE; -+ u32 snapshot; -+ int ret; -+ -+ isize = i_size_read(&inode->v); -+ if (offset >= isize) -+ return -ENXIO; -+ -+ trans = bch2_trans_get(c); -+retry: -+ bch2_trans_begin(trans); -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ goto err; -+ -+ for_each_btree_key_norestart(trans, iter, BTREE_ID_extents, -+ SPOS(inode->v.i_ino, offset >> 9, snapshot), -+ BTREE_ITER_SLOTS, k, ret) { -+ if (k.k->p.inode != inode->v.i_ino) { -+ next_hole = bch2_seek_pagecache_hole(&inode->v, -+ offset, MAX_LFS_FILESIZE, 0, false); -+ break; -+ } else if (!bkey_extent_is_data(k.k)) { -+ next_hole = bch2_seek_pagecache_hole(&inode->v, -+ max(offset, bkey_start_offset(k.k) << 9), -+ k.k->p.offset << 9, 0, false); -+ -+ if (next_hole < k.k->p.offset << 9) -+ break; -+ } else { -+ offset = max(offset, bkey_start_offset(k.k) << 9); -+ } -+ } -+ bch2_trans_iter_exit(trans, &iter); -+err: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ bch2_trans_put(trans); -+ if (ret) -+ return ret; -+ -+ if (next_hole > isize) -+ next_hole = isize; -+ -+ return vfs_setpos(file, next_hole, MAX_LFS_FILESIZE); -+} -+ -+loff_t bch2_llseek(struct file *file, loff_t offset, int whence) -+{ -+ loff_t ret; -+ -+ switch (whence) { -+ case SEEK_SET: -+ case SEEK_CUR: -+ case SEEK_END: -+ ret = generic_file_llseek(file, offset, whence); -+ break; -+ case SEEK_DATA: -+ ret = bch2_seek_data(file, offset); -+ break; -+ case SEEK_HOLE: -+ ret = bch2_seek_hole(file, offset); -+ break; -+ default: -+ ret = -EINVAL; -+ break; -+ } -+ -+ return bch2_err_class(ret); -+} -+ -+void bch2_fs_fsio_exit(struct bch_fs *c) -+{ -+ bioset_exit(&c->nocow_flush_bioset); -+} -+ -+int bch2_fs_fsio_init(struct bch_fs *c) -+{ -+ if (bioset_init(&c->nocow_flush_bioset, -+ 1, offsetof(struct nocow_flush, bio), 0)) -+ return -BCH_ERR_ENOMEM_nocow_flush_bioset_init; -+ -+ return 0; -+} -+ -+#endif /* NO_BCACHEFS_FS */ -diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h -new file mode 100644 -index 000000000000..ca70346e68dc ---- /dev/null -+++ b/fs/bcachefs/fs-io.h -@@ -0,0 +1,184 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_FS_IO_H -+#define _BCACHEFS_FS_IO_H -+ -+#ifndef NO_BCACHEFS_FS -+ -+#include "buckets.h" -+#include "fs.h" -+#include "io_write_types.h" -+#include "quota.h" -+ -+#include -+ -+struct folio_vec { -+ struct folio *fv_folio; -+ size_t fv_offset; -+ size_t fv_len; -+}; -+ -+static inline struct folio_vec biovec_to_foliovec(struct bio_vec bv) -+{ -+ -+ struct folio *folio = page_folio(bv.bv_page); -+ size_t offset = (folio_page_idx(folio, bv.bv_page) << PAGE_SHIFT) + -+ bv.bv_offset; -+ size_t len = min_t(size_t, folio_size(folio) - offset, bv.bv_len); -+ -+ return (struct folio_vec) { -+ .fv_folio = folio, -+ .fv_offset = offset, -+ .fv_len = len, -+ }; -+} -+ -+static inline struct folio_vec bio_iter_iovec_folio(struct bio *bio, -+ struct bvec_iter iter) -+{ -+ return biovec_to_foliovec(bio_iter_iovec(bio, iter)); -+} -+ -+#define __bio_for_each_folio(bvl, bio, iter, start) \ -+ for (iter = (start); \ -+ (iter).bi_size && \ -+ ((bvl = bio_iter_iovec_folio((bio), (iter))), 1); \ -+ bio_advance_iter_single((bio), &(iter), (bvl).fv_len)) -+ -+/** -+ * bio_for_each_folio - iterate over folios within a bio -+ * -+ * Like other non-_all versions, this iterates over what bio->bi_iter currently -+ * points to. This version is for drivers, where the bio may have previously -+ * been split or cloned. -+ */ -+#define bio_for_each_folio(bvl, bio, iter) \ -+ __bio_for_each_folio(bvl, bio, iter, (bio)->bi_iter) -+ -+struct quota_res { -+ u64 sectors; -+}; -+ -+#ifdef CONFIG_BCACHEFS_QUOTA -+ -+static inline void __bch2_quota_reservation_put(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ struct quota_res *res) -+{ -+ BUG_ON(res->sectors > inode->ei_quota_reserved); -+ -+ bch2_quota_acct(c, inode->ei_qid, Q_SPC, -+ -((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC); -+ inode->ei_quota_reserved -= res->sectors; -+ res->sectors = 0; -+} -+ -+static inline void bch2_quota_reservation_put(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ struct quota_res *res) -+{ -+ if (res->sectors) { -+ mutex_lock(&inode->ei_quota_lock); -+ __bch2_quota_reservation_put(c, inode, res); -+ mutex_unlock(&inode->ei_quota_lock); -+ } -+} -+ -+static inline int bch2_quota_reservation_add(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ struct quota_res *res, -+ u64 sectors, -+ bool check_enospc) -+{ -+ int ret; -+ -+ if (test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags)) -+ return 0; -+ -+ mutex_lock(&inode->ei_quota_lock); -+ ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, -+ check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK); -+ if (likely(!ret)) { -+ inode->ei_quota_reserved += sectors; -+ res->sectors += sectors; -+ } -+ mutex_unlock(&inode->ei_quota_lock); -+ -+ return ret; -+} -+ -+#else -+ -+static inline void __bch2_quota_reservation_put(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ struct quota_res *res) {} -+ -+static inline void bch2_quota_reservation_put(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ struct quota_res *res) {} -+ -+static inline int bch2_quota_reservation_add(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ struct quota_res *res, -+ unsigned sectors, -+ bool check_enospc) -+{ -+ return 0; -+} -+ -+#endif -+ -+void __bch2_i_sectors_acct(struct bch_fs *, struct bch_inode_info *, -+ struct quota_res *, s64); -+ -+static inline void bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, -+ struct quota_res *quota_res, s64 sectors) -+{ -+ if (sectors) { -+ mutex_lock(&inode->ei_quota_lock); -+ __bch2_i_sectors_acct(c, inode, quota_res, sectors); -+ mutex_unlock(&inode->ei_quota_lock); -+ } -+} -+ -+static inline struct address_space *faults_disabled_mapping(void) -+{ -+ return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL); -+} -+ -+static inline void set_fdm_dropped_locks(void) -+{ -+ current->faults_disabled_mapping = -+ (void *) (((unsigned long) current->faults_disabled_mapping)|1); -+} -+ -+static inline bool fdm_dropped_locks(void) -+{ -+ return ((unsigned long) current->faults_disabled_mapping) & 1; -+} -+ -+void bch2_inode_flush_nocow_writes_async(struct bch_fs *, -+ struct bch_inode_info *, struct closure *); -+ -+int __must_check bch2_write_inode_size(struct bch_fs *, -+ struct bch_inode_info *, -+ loff_t, unsigned); -+ -+int bch2_fsync(struct file *, loff_t, loff_t, int); -+ -+int bchfs_truncate(struct mnt_idmap *, -+ struct bch_inode_info *, struct iattr *); -+long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t); -+ -+loff_t bch2_remap_file_range(struct file *, loff_t, struct file *, -+ loff_t, loff_t, unsigned); -+ -+loff_t bch2_llseek(struct file *, loff_t, int); -+ -+void bch2_fs_fsio_exit(struct bch_fs *); -+int bch2_fs_fsio_init(struct bch_fs *); -+#else -+static inline void bch2_fs_fsio_exit(struct bch_fs *c) {} -+static inline int bch2_fs_fsio_init(struct bch_fs *c) { return 0; } -+#endif -+ -+#endif /* _BCACHEFS_FS_IO_H */ -diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c -new file mode 100644 -index 000000000000..5a39bcb597a3 ---- /dev/null -+++ b/fs/bcachefs/fs-ioctl.c -@@ -0,0 +1,572 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#ifndef NO_BCACHEFS_FS -+ -+#include "bcachefs.h" -+#include "chardev.h" -+#include "dirent.h" -+#include "fs.h" -+#include "fs-common.h" -+#include "fs-ioctl.h" -+#include "quota.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define FS_IOC_GOINGDOWN _IOR('X', 125, __u32) -+#define FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */ -+#define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ -+#define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ -+ -+struct flags_set { -+ unsigned mask; -+ unsigned flags; -+ -+ unsigned projid; -+ -+ bool set_projinherit; -+ bool projinherit; -+}; -+ -+static int bch2_inode_flags_set(struct btree_trans *trans, -+ struct bch_inode_info *inode, -+ struct bch_inode_unpacked *bi, -+ void *p) -+{ -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ /* -+ * We're relying on btree locking here for exclusion with other ioctl -+ * calls - use the flags in the btree (@bi), not inode->i_flags: -+ */ -+ struct flags_set *s = p; -+ unsigned newflags = s->flags; -+ unsigned oldflags = bi->bi_flags & s->mask; -+ -+ if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) && -+ !capable(CAP_LINUX_IMMUTABLE)) -+ return -EPERM; -+ -+ if (!S_ISREG(bi->bi_mode) && -+ !S_ISDIR(bi->bi_mode) && -+ (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags) -+ return -EINVAL; -+ -+ if (s->set_projinherit) { -+ bi->bi_fields_set &= ~(1 << Inode_opt_project); -+ bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project); -+ } -+ -+ bi->bi_flags &= ~s->mask; -+ bi->bi_flags |= newflags; -+ -+ bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); -+ return 0; -+} -+ -+static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg) -+{ -+ unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags); -+ -+ return put_user(flags, arg); -+} -+ -+static int bch2_ioc_setflags(struct bch_fs *c, -+ struct file *file, -+ struct bch_inode_info *inode, -+ void __user *arg) -+{ -+ struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) }; -+ unsigned uflags; -+ int ret; -+ -+ if (get_user(uflags, (int __user *) arg)) -+ return -EFAULT; -+ -+ s.flags = map_flags_rev(bch_flags_to_uflags, uflags); -+ if (uflags) -+ return -EOPNOTSUPP; -+ -+ ret = mnt_want_write_file(file); -+ if (ret) -+ return ret; -+ -+ inode_lock(&inode->v); -+ if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { -+ ret = -EACCES; -+ goto setflags_out; -+ } -+ -+ mutex_lock(&inode->ei_update_lock); -+ ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s, -+ ATTR_CTIME); -+ mutex_unlock(&inode->ei_update_lock); -+ -+setflags_out: -+ inode_unlock(&inode->v); -+ mnt_drop_write_file(file); -+ return ret; -+} -+ -+static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode, -+ struct fsxattr __user *arg) -+{ -+ struct fsxattr fa = { 0 }; -+ -+ fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags); -+ -+ if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) -+ fa.fsx_xflags |= FS_XFLAG_PROJINHERIT; -+ -+ fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ]; -+ -+ if (copy_to_user(arg, &fa, sizeof(fa))) -+ return -EFAULT; -+ -+ return 0; -+} -+ -+static int fssetxattr_inode_update_fn(struct btree_trans *trans, -+ struct bch_inode_info *inode, -+ struct bch_inode_unpacked *bi, -+ void *p) -+{ -+ struct flags_set *s = p; -+ -+ if (s->projid != bi->bi_project) { -+ bi->bi_fields_set |= 1U << Inode_opt_project; -+ bi->bi_project = s->projid; -+ } -+ -+ return bch2_inode_flags_set(trans, inode, bi, p); -+} -+ -+static int bch2_ioc_fssetxattr(struct bch_fs *c, -+ struct file *file, -+ struct bch_inode_info *inode, -+ struct fsxattr __user *arg) -+{ -+ struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) }; -+ struct fsxattr fa; -+ int ret; -+ -+ if (copy_from_user(&fa, arg, sizeof(fa))) -+ return -EFAULT; -+ -+ s.set_projinherit = true; -+ s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0; -+ fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT; -+ -+ s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags); -+ if (fa.fsx_xflags) -+ return -EOPNOTSUPP; -+ -+ if (fa.fsx_projid >= U32_MAX) -+ return -EINVAL; -+ -+ /* -+ * inode fields accessible via the xattr interface are stored with a +1 -+ * bias, so that 0 means unset: -+ */ -+ s.projid = fa.fsx_projid + 1; -+ -+ ret = mnt_want_write_file(file); -+ if (ret) -+ return ret; -+ -+ inode_lock(&inode->v); -+ if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { -+ ret = -EACCES; -+ goto err; -+ } -+ -+ mutex_lock(&inode->ei_update_lock); -+ ret = bch2_set_projid(c, inode, fa.fsx_projid); -+ if (ret) -+ goto err_unlock; -+ -+ ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, -+ ATTR_CTIME); -+err_unlock: -+ mutex_unlock(&inode->ei_update_lock); -+err: -+ inode_unlock(&inode->v); -+ mnt_drop_write_file(file); -+ return ret; -+} -+ -+static int bch2_reinherit_attrs_fn(struct btree_trans *trans, -+ struct bch_inode_info *inode, -+ struct bch_inode_unpacked *bi, -+ void *p) -+{ -+ struct bch_inode_info *dir = p; -+ -+ return !bch2_reinherit_attrs(bi, &dir->ei_inode); -+} -+ -+static int bch2_ioc_reinherit_attrs(struct bch_fs *c, -+ struct file *file, -+ struct bch_inode_info *src, -+ const char __user *name) -+{ -+ struct bch_hash_info hash = bch2_hash_info_init(c, &src->ei_inode); -+ struct bch_inode_info *dst; -+ struct inode *vinode = NULL; -+ char *kname = NULL; -+ struct qstr qstr; -+ int ret = 0; -+ subvol_inum inum; -+ -+ kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL); -+ if (!kname) -+ return -ENOMEM; -+ -+ ret = strncpy_from_user(kname, name, BCH_NAME_MAX); -+ if (unlikely(ret < 0)) -+ goto err1; -+ -+ qstr.len = ret; -+ qstr.name = kname; -+ -+ ret = bch2_dirent_lookup(c, inode_inum(src), &hash, &qstr, &inum); -+ if (ret) -+ goto err1; -+ -+ vinode = bch2_vfs_inode_get(c, inum); -+ ret = PTR_ERR_OR_ZERO(vinode); -+ if (ret) -+ goto err1; -+ -+ dst = to_bch_ei(vinode); -+ -+ ret = mnt_want_write_file(file); -+ if (ret) -+ goto err2; -+ -+ bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst); -+ -+ if (inode_attr_changing(src, dst, Inode_opt_project)) { -+ ret = bch2_fs_quota_transfer(c, dst, -+ src->ei_qid, -+ 1 << QTYP_PRJ, -+ KEY_TYPE_QUOTA_PREALLOC); -+ if (ret) -+ goto err3; -+ } -+ -+ ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0); -+err3: -+ bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst); -+ -+ /* return true if we did work */ -+ if (ret >= 0) -+ ret = !ret; -+ -+ mnt_drop_write_file(file); -+err2: -+ iput(vinode); -+err1: -+ kfree(kname); -+ -+ return ret; -+} -+ -+static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) -+{ -+ u32 flags; -+ int ret = 0; -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ if (get_user(flags, arg)) -+ return -EFAULT; -+ -+ bch_notice(c, "shutdown by ioctl type %u", flags); -+ -+ down_write(&c->vfs_sb->s_umount); -+ -+ switch (flags) { -+ case FSOP_GOING_FLAGS_DEFAULT: -+ ret = freeze_bdev(c->vfs_sb->s_bdev); -+ if (ret) -+ goto err; -+ -+ bch2_journal_flush(&c->journal); -+ c->vfs_sb->s_flags |= SB_RDONLY; -+ bch2_fs_emergency_read_only(c); -+ thaw_bdev(c->vfs_sb->s_bdev); -+ break; -+ -+ case FSOP_GOING_FLAGS_LOGFLUSH: -+ bch2_journal_flush(&c->journal); -+ fallthrough; -+ -+ case FSOP_GOING_FLAGS_NOLOGFLUSH: -+ c->vfs_sb->s_flags |= SB_RDONLY; -+ bch2_fs_emergency_read_only(c); -+ break; -+ default: -+ ret = -EINVAL; -+ break; -+ } -+err: -+ up_write(&c->vfs_sb->s_umount); -+ return ret; -+} -+ -+static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, -+ struct bch_ioctl_subvolume arg) -+{ -+ struct inode *dir; -+ struct bch_inode_info *inode; -+ struct user_namespace *s_user_ns; -+ struct dentry *dst_dentry; -+ struct path src_path, dst_path; -+ int how = LOOKUP_FOLLOW; -+ int error; -+ subvol_inum snapshot_src = { 0 }; -+ unsigned lookup_flags = 0; -+ unsigned create_flags = BCH_CREATE_SUBVOL; -+ -+ if (arg.flags & ~(BCH_SUBVOL_SNAPSHOT_CREATE| -+ BCH_SUBVOL_SNAPSHOT_RO)) -+ return -EINVAL; -+ -+ if (!(arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) && -+ (arg.src_ptr || -+ (arg.flags & BCH_SUBVOL_SNAPSHOT_RO))) -+ return -EINVAL; -+ -+ if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) -+ create_flags |= BCH_CREATE_SNAPSHOT; -+ -+ if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO) -+ create_flags |= BCH_CREATE_SNAPSHOT_RO; -+ -+ /* why do we need this lock? */ -+ down_read(&c->vfs_sb->s_umount); -+ -+ if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) -+ sync_inodes_sb(c->vfs_sb); -+retry: -+ if (arg.src_ptr) { -+ error = user_path_at(arg.dirfd, -+ (const char __user *)(unsigned long)arg.src_ptr, -+ how, &src_path); -+ if (error) -+ goto err1; -+ -+ if (src_path.dentry->d_sb->s_fs_info != c) { -+ path_put(&src_path); -+ error = -EXDEV; -+ goto err1; -+ } -+ -+ snapshot_src = inode_inum(to_bch_ei(src_path.dentry->d_inode)); -+ } -+ -+ dst_dentry = user_path_create(arg.dirfd, -+ (const char __user *)(unsigned long)arg.dst_ptr, -+ &dst_path, lookup_flags); -+ error = PTR_ERR_OR_ZERO(dst_dentry); -+ if (error) -+ goto err2; -+ -+ if (dst_dentry->d_sb->s_fs_info != c) { -+ error = -EXDEV; -+ goto err3; -+ } -+ -+ if (dst_dentry->d_inode) { -+ error = -EEXIST; -+ goto err3; -+ } -+ -+ dir = dst_path.dentry->d_inode; -+ if (IS_DEADDIR(dir)) { -+ error = -BCH_ERR_ENOENT_directory_dead; -+ goto err3; -+ } -+ -+ s_user_ns = dir->i_sb->s_user_ns; -+ if (!kuid_has_mapping(s_user_ns, current_fsuid()) || -+ !kgid_has_mapping(s_user_ns, current_fsgid())) { -+ error = -EOVERFLOW; -+ goto err3; -+ } -+ -+ error = inode_permission(file_mnt_idmap(filp), -+ dir, MAY_WRITE | MAY_EXEC); -+ if (error) -+ goto err3; -+ -+ if (!IS_POSIXACL(dir)) -+ arg.mode &= ~current_umask(); -+ -+ error = security_path_mkdir(&dst_path, dst_dentry, arg.mode); -+ if (error) -+ goto err3; -+ -+ if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) && -+ !arg.src_ptr) -+ snapshot_src.subvol = to_bch_ei(dir)->ei_inode.bi_subvol; -+ -+ inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), -+ dst_dentry, arg.mode|S_IFDIR, -+ 0, snapshot_src, create_flags); -+ error = PTR_ERR_OR_ZERO(inode); -+ if (error) -+ goto err3; -+ -+ d_instantiate(dst_dentry, &inode->v); -+ fsnotify_mkdir(dir, dst_dentry); -+err3: -+ done_path_create(&dst_path, dst_dentry); -+err2: -+ if (arg.src_ptr) -+ path_put(&src_path); -+ -+ if (retry_estale(error, lookup_flags)) { -+ lookup_flags |= LOOKUP_REVAL; -+ goto retry; -+ } -+err1: -+ up_read(&c->vfs_sb->s_umount); -+ -+ return error; -+} -+ -+static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, -+ struct bch_ioctl_subvolume arg) -+{ -+ down_write(&c->snapshot_create_lock); -+ long ret = __bch2_ioctl_subvolume_create(c, filp, arg); -+ up_write(&c->snapshot_create_lock); -+ -+ return ret; -+} -+ -+static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, -+ struct bch_ioctl_subvolume arg) -+{ -+ struct path path; -+ struct inode *dir; -+ int ret = 0; -+ -+ if (arg.flags) -+ return -EINVAL; -+ -+ ret = user_path_at(arg.dirfd, -+ (const char __user *)(unsigned long)arg.dst_ptr, -+ LOOKUP_FOLLOW, &path); -+ if (ret) -+ return ret; -+ -+ if (path.dentry->d_sb->s_fs_info != c) { -+ ret = -EXDEV; -+ goto err; -+ } -+ -+ dir = path.dentry->d_parent->d_inode; -+ -+ ret = __bch2_unlink(dir, path.dentry, true); -+ if (ret) -+ goto err; -+ -+ fsnotify_rmdir(dir, path.dentry); -+ d_delete(path.dentry); -+err: -+ path_put(&path); -+ return ret; -+} -+ -+long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) -+{ -+ struct bch_inode_info *inode = file_bch_inode(file); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ long ret; -+ -+ switch (cmd) { -+ case FS_IOC_GETFLAGS: -+ ret = bch2_ioc_getflags(inode, (int __user *) arg); -+ break; -+ -+ case FS_IOC_SETFLAGS: -+ ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg); -+ break; -+ -+ case FS_IOC_FSGETXATTR: -+ ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg); -+ break; -+ -+ case FS_IOC_FSSETXATTR: -+ ret = bch2_ioc_fssetxattr(c, file, inode, -+ (void __user *) arg); -+ break; -+ -+ case BCHFS_IOC_REINHERIT_ATTRS: -+ ret = bch2_ioc_reinherit_attrs(c, file, inode, -+ (void __user *) arg); -+ break; -+ -+ case FS_IOC_GETVERSION: -+ ret = -ENOTTY; -+ break; -+ -+ case FS_IOC_SETVERSION: -+ ret = -ENOTTY; -+ break; -+ -+ case FS_IOC_GOINGDOWN: -+ ret = bch2_ioc_goingdown(c, (u32 __user *) arg); -+ break; -+ -+ case BCH_IOCTL_SUBVOLUME_CREATE: { -+ struct bch_ioctl_subvolume i; -+ -+ ret = copy_from_user(&i, (void __user *) arg, sizeof(i)) -+ ? -EFAULT -+ : bch2_ioctl_subvolume_create(c, file, i); -+ break; -+ } -+ -+ case BCH_IOCTL_SUBVOLUME_DESTROY: { -+ struct bch_ioctl_subvolume i; -+ -+ ret = copy_from_user(&i, (void __user *) arg, sizeof(i)) -+ ? -EFAULT -+ : bch2_ioctl_subvolume_destroy(c, file, i); -+ break; -+ } -+ -+ default: -+ ret = bch2_fs_ioctl(c, cmd, (void __user *) arg); -+ break; -+ } -+ -+ return bch2_err_class(ret); -+} -+ -+#ifdef CONFIG_COMPAT -+long bch2_compat_fs_ioctl(struct file *file, unsigned cmd, unsigned long arg) -+{ -+ /* These are just misnamed, they actually get/put from/to user an int */ -+ switch (cmd) { -+ case FS_IOC_GETFLAGS: -+ cmd = FS_IOC_GETFLAGS; -+ break; -+ case FS_IOC32_SETFLAGS: -+ cmd = FS_IOC_SETFLAGS; -+ break; -+ default: -+ return -ENOIOCTLCMD; -+ } -+ return bch2_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); -+} -+#endif -+ -+#endif /* NO_BCACHEFS_FS */ -diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h -new file mode 100644 -index 000000000000..d30f9bb056fd ---- /dev/null -+++ b/fs/bcachefs/fs-ioctl.h -@@ -0,0 +1,81 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_FS_IOCTL_H -+#define _BCACHEFS_FS_IOCTL_H -+ -+/* Inode flags: */ -+ -+/* bcachefs inode flags -> vfs inode flags: */ -+static const __maybe_unused unsigned bch_flags_to_vfs[] = { -+ [__BCH_INODE_sync] = S_SYNC, -+ [__BCH_INODE_immutable] = S_IMMUTABLE, -+ [__BCH_INODE_append] = S_APPEND, -+ [__BCH_INODE_noatime] = S_NOATIME, -+}; -+ -+/* bcachefs inode flags -> FS_IOC_GETFLAGS: */ -+static const __maybe_unused unsigned bch_flags_to_uflags[] = { -+ [__BCH_INODE_sync] = FS_SYNC_FL, -+ [__BCH_INODE_immutable] = FS_IMMUTABLE_FL, -+ [__BCH_INODE_append] = FS_APPEND_FL, -+ [__BCH_INODE_nodump] = FS_NODUMP_FL, -+ [__BCH_INODE_noatime] = FS_NOATIME_FL, -+}; -+ -+/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ -+static const __maybe_unused unsigned bch_flags_to_xflags[] = { -+ [__BCH_INODE_sync] = FS_XFLAG_SYNC, -+ [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE, -+ [__BCH_INODE_append] = FS_XFLAG_APPEND, -+ [__BCH_INODE_nodump] = FS_XFLAG_NODUMP, -+ [__BCH_INODE_noatime] = FS_XFLAG_NOATIME, -+ //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT; -+}; -+ -+#define set_flags(_map, _in, _out) \ -+do { \ -+ unsigned _i; \ -+ \ -+ for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ -+ if ((_in) & (1 << _i)) \ -+ (_out) |= _map[_i]; \ -+ else \ -+ (_out) &= ~_map[_i]; \ -+} while (0) -+ -+#define map_flags(_map, _in) \ -+({ \ -+ unsigned _out = 0; \ -+ \ -+ set_flags(_map, _in, _out); \ -+ _out; \ -+}) -+ -+#define map_flags_rev(_map, _in) \ -+({ \ -+ unsigned _i, _out = 0; \ -+ \ -+ for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ -+ if ((_in) & _map[_i]) { \ -+ (_out) |= 1 << _i; \ -+ (_in) &= ~_map[_i]; \ -+ } \ -+ (_out); \ -+}) -+ -+#define map_defined(_map) \ -+({ \ -+ unsigned _in = ~0; \ -+ \ -+ map_flags_rev(_map, _in); \ -+}) -+ -+/* Set VFS inode flags from bcachefs inode: */ -+static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode) -+{ -+ set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags); -+} -+ -+long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long); -+long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long); -+ -+#endif /* _BCACHEFS_FS_IOCTL_H */ -diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c -new file mode 100644 -index 000000000000..82b668ea20aa ---- /dev/null -+++ b/fs/bcachefs/fs.c -@@ -0,0 +1,1977 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#ifndef NO_BCACHEFS_FS -+ -+#include "bcachefs.h" -+#include "acl.h" -+#include "bkey_buf.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "chardev.h" -+#include "dirent.h" -+#include "errcode.h" -+#include "extents.h" -+#include "fs.h" -+#include "fs-common.h" -+#include "fs-io.h" -+#include "fs-ioctl.h" -+#include "fs-io-buffered.h" -+#include "fs-io-direct.h" -+#include "fs-io-pagecache.h" -+#include "fsck.h" -+#include "inode.h" -+#include "io_read.h" -+#include "journal.h" -+#include "keylist.h" -+#include "quota.h" -+#include "snapshot.h" -+#include "super.h" -+#include "xattr.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static struct kmem_cache *bch2_inode_cache; -+ -+static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum, -+ struct bch_inode_info *, -+ struct bch_inode_unpacked *, -+ struct bch_subvolume *); -+ -+void bch2_inode_update_after_write(struct btree_trans *trans, -+ struct bch_inode_info *inode, -+ struct bch_inode_unpacked *bi, -+ unsigned fields) -+{ -+ struct bch_fs *c = trans->c; -+ -+ BUG_ON(bi->bi_inum != inode->v.i_ino); -+ -+ bch2_assert_pos_locked(trans, BTREE_ID_inodes, -+ POS(0, bi->bi_inum), -+ c->opts.inodes_use_key_cache); -+ -+ set_nlink(&inode->v, bch2_inode_nlink_get(bi)); -+ i_uid_write(&inode->v, bi->bi_uid); -+ i_gid_write(&inode->v, bi->bi_gid); -+ inode->v.i_mode = bi->bi_mode; -+ -+ if (fields & ATTR_ATIME) -+ inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime); -+ if (fields & ATTR_MTIME) -+ inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime); -+ if (fields & ATTR_CTIME) -+ inode_set_ctime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_ctime)); -+ -+ inode->ei_inode = *bi; -+ -+ bch2_inode_flags_to_vfs(inode); -+} -+ -+int __must_check bch2_write_inode(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ inode_set_fn set, -+ void *p, unsigned fields) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter = { NULL }; -+ struct bch_inode_unpacked inode_u; -+ int ret; -+retry: -+ bch2_trans_begin(trans); -+ -+ ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), -+ BTREE_ITER_INTENT) ?: -+ (set ? set(trans, inode, &inode_u, p) : 0) ?: -+ bch2_inode_write(trans, &iter, &inode_u) ?: -+ bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); -+ -+ /* -+ * the btree node lock protects inode->ei_inode, not ei_update_lock; -+ * this is important for inode updates via bchfs_write_index_update -+ */ -+ if (!ret) -+ bch2_inode_update_after_write(trans, inode, &inode_u, fields); -+ -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c, -+ "inode %u:%llu not found when updating", -+ inode_inum(inode).subvol, -+ inode_inum(inode).inum); -+ -+ bch2_trans_put(trans); -+ return ret < 0 ? ret : 0; -+} -+ -+int bch2_fs_quota_transfer(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ struct bch_qid new_qid, -+ unsigned qtypes, -+ enum quota_acct_mode mode) -+{ -+ unsigned i; -+ int ret; -+ -+ qtypes &= enabled_qtypes(c); -+ -+ for (i = 0; i < QTYP_NR; i++) -+ if (new_qid.q[i] == inode->ei_qid.q[i]) -+ qtypes &= ~(1U << i); -+ -+ if (!qtypes) -+ return 0; -+ -+ mutex_lock(&inode->ei_quota_lock); -+ -+ ret = bch2_quota_transfer(c, qtypes, new_qid, -+ inode->ei_qid, -+ inode->v.i_blocks + -+ inode->ei_quota_reserved, -+ mode); -+ if (!ret) -+ for (i = 0; i < QTYP_NR; i++) -+ if (qtypes & (1 << i)) -+ inode->ei_qid.q[i] = new_qid.q[i]; -+ -+ mutex_unlock(&inode->ei_quota_lock); -+ -+ return ret; -+} -+ -+static int bch2_iget5_test(struct inode *vinode, void *p) -+{ -+ struct bch_inode_info *inode = to_bch_ei(vinode); -+ subvol_inum *inum = p; -+ -+ return inode->ei_subvol == inum->subvol && -+ inode->ei_inode.bi_inum == inum->inum; -+} -+ -+static int bch2_iget5_set(struct inode *vinode, void *p) -+{ -+ struct bch_inode_info *inode = to_bch_ei(vinode); -+ subvol_inum *inum = p; -+ -+ inode->v.i_ino = inum->inum; -+ inode->ei_subvol = inum->subvol; -+ inode->ei_inode.bi_inum = inum->inum; -+ return 0; -+} -+ -+static unsigned bch2_inode_hash(subvol_inum inum) -+{ -+ return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL); -+} -+ -+struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) -+{ -+ struct bch_inode_unpacked inode_u; -+ struct bch_inode_info *inode; -+ struct btree_trans *trans; -+ struct bch_subvolume subvol; -+ int ret; -+ -+ inode = to_bch_ei(iget5_locked(c->vfs_sb, -+ bch2_inode_hash(inum), -+ bch2_iget5_test, -+ bch2_iget5_set, -+ &inum)); -+ if (unlikely(!inode)) -+ return ERR_PTR(-ENOMEM); -+ if (!(inode->v.i_state & I_NEW)) -+ return &inode->v; -+ -+ trans = bch2_trans_get(c); -+ ret = lockrestart_do(trans, -+ bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: -+ bch2_inode_find_by_inum_trans(trans, inum, &inode_u)); -+ -+ if (!ret) -+ bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol); -+ bch2_trans_put(trans); -+ -+ if (ret) { -+ iget_failed(&inode->v); -+ return ERR_PTR(bch2_err_class(ret)); -+ } -+ -+ mutex_lock(&c->vfs_inodes_lock); -+ list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); -+ mutex_unlock(&c->vfs_inodes_lock); -+ -+ unlock_new_inode(&inode->v); -+ -+ return &inode->v; -+} -+ -+struct bch_inode_info * -+__bch2_create(struct mnt_idmap *idmap, -+ struct bch_inode_info *dir, struct dentry *dentry, -+ umode_t mode, dev_t rdev, subvol_inum snapshot_src, -+ unsigned flags) -+{ -+ struct bch_fs *c = dir->v.i_sb->s_fs_info; -+ struct btree_trans *trans; -+ struct bch_inode_unpacked dir_u; -+ struct bch_inode_info *inode, *old; -+ struct bch_inode_unpacked inode_u; -+ struct posix_acl *default_acl = NULL, *acl = NULL; -+ subvol_inum inum; -+ struct bch_subvolume subvol; -+ u64 journal_seq = 0; -+ int ret; -+ -+ /* -+ * preallocate acls + vfs inode before btree transaction, so that -+ * nothing can fail after the transaction succeeds: -+ */ -+#ifdef CONFIG_BCACHEFS_POSIX_ACL -+ ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl); -+ if (ret) -+ return ERR_PTR(ret); -+#endif -+ inode = to_bch_ei(new_inode(c->vfs_sb)); -+ if (unlikely(!inode)) { -+ inode = ERR_PTR(-ENOMEM); -+ goto err; -+ } -+ -+ bch2_inode_init_early(c, &inode_u); -+ -+ if (!(flags & BCH_CREATE_TMPFILE)) -+ mutex_lock(&dir->ei_update_lock); -+ -+ trans = bch2_trans_get(c); -+retry: -+ bch2_trans_begin(trans); -+ -+ ret = bch2_create_trans(trans, -+ inode_inum(dir), &dir_u, &inode_u, -+ !(flags & BCH_CREATE_TMPFILE) -+ ? &dentry->d_name : NULL, -+ from_kuid(i_user_ns(&dir->v), current_fsuid()), -+ from_kgid(i_user_ns(&dir->v), current_fsgid()), -+ mode, rdev, -+ default_acl, acl, snapshot_src, flags) ?: -+ bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, -+ KEY_TYPE_QUOTA_PREALLOC); -+ if (unlikely(ret)) -+ goto err_before_quota; -+ -+ inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol; -+ inum.inum = inode_u.bi_inum; -+ -+ ret = bch2_subvolume_get(trans, inum.subvol, true, -+ BTREE_ITER_WITH_UPDATES, &subvol) ?: -+ bch2_trans_commit(trans, NULL, &journal_seq, 0); -+ if (unlikely(ret)) { -+ bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, -+ KEY_TYPE_QUOTA_WARN); -+err_before_quota: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ goto err_trans; -+ } -+ -+ if (!(flags & BCH_CREATE_TMPFILE)) { -+ bch2_inode_update_after_write(trans, dir, &dir_u, -+ ATTR_MTIME|ATTR_CTIME); -+ mutex_unlock(&dir->ei_update_lock); -+ } -+ -+ bch2_iget5_set(&inode->v, &inum); -+ bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol); -+ -+ set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); -+ set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl); -+ -+ /* -+ * we must insert the new inode into the inode cache before calling -+ * bch2_trans_exit() and dropping locks, else we could race with another -+ * thread pulling the inode in and modifying it: -+ */ -+ -+ inode->v.i_state |= I_CREATING; -+ -+ old = to_bch_ei(inode_insert5(&inode->v, -+ bch2_inode_hash(inum), -+ bch2_iget5_test, -+ bch2_iget5_set, -+ &inum)); -+ BUG_ON(!old); -+ -+ if (unlikely(old != inode)) { -+ /* -+ * We raced, another process pulled the new inode into cache -+ * before us: -+ */ -+ make_bad_inode(&inode->v); -+ iput(&inode->v); -+ -+ inode = old; -+ } else { -+ mutex_lock(&c->vfs_inodes_lock); -+ list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); -+ mutex_unlock(&c->vfs_inodes_lock); -+ /* -+ * we really don't want insert_inode_locked2() to be setting -+ * I_NEW... -+ */ -+ unlock_new_inode(&inode->v); -+ } -+ -+ bch2_trans_put(trans); -+err: -+ posix_acl_release(default_acl); -+ posix_acl_release(acl); -+ return inode; -+err_trans: -+ if (!(flags & BCH_CREATE_TMPFILE)) -+ mutex_unlock(&dir->ei_update_lock); -+ -+ bch2_trans_put(trans); -+ make_bad_inode(&inode->v); -+ iput(&inode->v); -+ inode = ERR_PTR(ret); -+ goto err; -+} -+ -+/* methods */ -+ -+static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, -+ unsigned int flags) -+{ -+ struct bch_fs *c = vdir->i_sb->s_fs_info; -+ struct bch_inode_info *dir = to_bch_ei(vdir); -+ struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode); -+ struct inode *vinode = NULL; -+ subvol_inum inum = { .subvol = 1 }; -+ int ret; -+ -+ ret = bch2_dirent_lookup(c, inode_inum(dir), &hash, -+ &dentry->d_name, &inum); -+ -+ if (!ret) -+ vinode = bch2_vfs_inode_get(c, inum); -+ -+ return d_splice_alias(vinode, dentry); -+} -+ -+static int bch2_mknod(struct mnt_idmap *idmap, -+ struct inode *vdir, struct dentry *dentry, -+ umode_t mode, dev_t rdev) -+{ -+ struct bch_inode_info *inode = -+ __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev, -+ (subvol_inum) { 0 }, 0); -+ -+ if (IS_ERR(inode)) -+ return bch2_err_class(PTR_ERR(inode)); -+ -+ d_instantiate(dentry, &inode->v); -+ return 0; -+} -+ -+static int bch2_create(struct mnt_idmap *idmap, -+ struct inode *vdir, struct dentry *dentry, -+ umode_t mode, bool excl) -+{ -+ return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0); -+} -+ -+static int __bch2_link(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ struct bch_inode_info *dir, -+ struct dentry *dentry) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct bch_inode_unpacked dir_u, inode_u; -+ int ret; -+ -+ mutex_lock(&inode->ei_update_lock); -+ -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_link_trans(trans, -+ inode_inum(dir), &dir_u, -+ inode_inum(inode), &inode_u, -+ &dentry->d_name)); -+ -+ if (likely(!ret)) { -+ bch2_inode_update_after_write(trans, dir, &dir_u, -+ ATTR_MTIME|ATTR_CTIME); -+ bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME); -+ } -+ -+ bch2_trans_put(trans); -+ mutex_unlock(&inode->ei_update_lock); -+ return ret; -+} -+ -+static int bch2_link(struct dentry *old_dentry, struct inode *vdir, -+ struct dentry *dentry) -+{ -+ struct bch_fs *c = vdir->i_sb->s_fs_info; -+ struct bch_inode_info *dir = to_bch_ei(vdir); -+ struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode); -+ int ret; -+ -+ lockdep_assert_held(&inode->v.i_rwsem); -+ -+ ret = __bch2_link(c, inode, dir, dentry); -+ if (unlikely(ret)) -+ return ret; -+ -+ ihold(&inode->v); -+ d_instantiate(dentry, &inode->v); -+ return 0; -+} -+ -+int __bch2_unlink(struct inode *vdir, struct dentry *dentry, -+ bool deleting_snapshot) -+{ -+ struct bch_fs *c = vdir->i_sb->s_fs_info; -+ struct bch_inode_info *dir = to_bch_ei(vdir); -+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); -+ struct bch_inode_unpacked dir_u, inode_u; -+ struct btree_trans *trans = bch2_trans_get(c); -+ int ret; -+ -+ bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); -+ -+ ret = commit_do(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL, -+ bch2_unlink_trans(trans, -+ inode_inum(dir), &dir_u, -+ &inode_u, &dentry->d_name, -+ deleting_snapshot)); -+ if (unlikely(ret)) -+ goto err; -+ -+ bch2_inode_update_after_write(trans, dir, &dir_u, -+ ATTR_MTIME|ATTR_CTIME); -+ bch2_inode_update_after_write(trans, inode, &inode_u, -+ ATTR_MTIME); -+ -+ if (inode_u.bi_subvol) { -+ /* -+ * Subvolume deletion is asynchronous, but we still want to tell -+ * the VFS that it's been deleted here: -+ */ -+ set_nlink(&inode->v, 0); -+ } -+err: -+ bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); -+ bch2_trans_put(trans); -+ -+ return ret; -+} -+ -+static int bch2_unlink(struct inode *vdir, struct dentry *dentry) -+{ -+ return __bch2_unlink(vdir, dentry, false); -+} -+ -+static int bch2_symlink(struct mnt_idmap *idmap, -+ struct inode *vdir, struct dentry *dentry, -+ const char *symname) -+{ -+ struct bch_fs *c = vdir->i_sb->s_fs_info; -+ struct bch_inode_info *dir = to_bch_ei(vdir), *inode; -+ int ret; -+ -+ inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0, -+ (subvol_inum) { 0 }, BCH_CREATE_TMPFILE); -+ if (IS_ERR(inode)) -+ return bch2_err_class(PTR_ERR(inode)); -+ -+ inode_lock(&inode->v); -+ ret = page_symlink(&inode->v, symname, strlen(symname) + 1); -+ inode_unlock(&inode->v); -+ -+ if (unlikely(ret)) -+ goto err; -+ -+ ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX); -+ if (unlikely(ret)) -+ goto err; -+ -+ ret = __bch2_link(c, inode, dir, dentry); -+ if (unlikely(ret)) -+ goto err; -+ -+ d_instantiate(dentry, &inode->v); -+ return 0; -+err: -+ iput(&inode->v); -+ return ret; -+} -+ -+static int bch2_mkdir(struct mnt_idmap *idmap, -+ struct inode *vdir, struct dentry *dentry, umode_t mode) -+{ -+ return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0); -+} -+ -+static int bch2_rename2(struct mnt_idmap *idmap, -+ struct inode *src_vdir, struct dentry *src_dentry, -+ struct inode *dst_vdir, struct dentry *dst_dentry, -+ unsigned flags) -+{ -+ struct bch_fs *c = src_vdir->i_sb->s_fs_info; -+ struct bch_inode_info *src_dir = to_bch_ei(src_vdir); -+ struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir); -+ struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode); -+ struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode); -+ struct bch_inode_unpacked dst_dir_u, src_dir_u; -+ struct bch_inode_unpacked src_inode_u, dst_inode_u; -+ struct btree_trans *trans; -+ enum bch_rename_mode mode = flags & RENAME_EXCHANGE -+ ? BCH_RENAME_EXCHANGE -+ : dst_dentry->d_inode -+ ? BCH_RENAME_OVERWRITE : BCH_RENAME; -+ int ret; -+ -+ if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE)) -+ return -EINVAL; -+ -+ if (mode == BCH_RENAME_OVERWRITE) { -+ ret = filemap_write_and_wait_range(src_inode->v.i_mapping, -+ 0, LLONG_MAX); -+ if (ret) -+ return ret; -+ } -+ -+ trans = bch2_trans_get(c); -+ -+ bch2_lock_inodes(INODE_UPDATE_LOCK, -+ src_dir, -+ dst_dir, -+ src_inode, -+ dst_inode); -+ -+ if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) { -+ ret = bch2_fs_quota_transfer(c, src_inode, -+ dst_dir->ei_qid, -+ 1 << QTYP_PRJ, -+ KEY_TYPE_QUOTA_PREALLOC); -+ if (ret) -+ goto err; -+ } -+ -+ if (mode == BCH_RENAME_EXCHANGE && -+ inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) { -+ ret = bch2_fs_quota_transfer(c, dst_inode, -+ src_dir->ei_qid, -+ 1 << QTYP_PRJ, -+ KEY_TYPE_QUOTA_PREALLOC); -+ if (ret) -+ goto err; -+ } -+ -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_rename_trans(trans, -+ inode_inum(src_dir), &src_dir_u, -+ inode_inum(dst_dir), &dst_dir_u, -+ &src_inode_u, -+ &dst_inode_u, -+ &src_dentry->d_name, -+ &dst_dentry->d_name, -+ mode)); -+ if (unlikely(ret)) -+ goto err; -+ -+ BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum); -+ BUG_ON(dst_inode && -+ dst_inode->v.i_ino != dst_inode_u.bi_inum); -+ -+ bch2_inode_update_after_write(trans, src_dir, &src_dir_u, -+ ATTR_MTIME|ATTR_CTIME); -+ -+ if (src_dir != dst_dir) -+ bch2_inode_update_after_write(trans, dst_dir, &dst_dir_u, -+ ATTR_MTIME|ATTR_CTIME); -+ -+ bch2_inode_update_after_write(trans, src_inode, &src_inode_u, -+ ATTR_CTIME); -+ -+ if (dst_inode) -+ bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u, -+ ATTR_CTIME); -+err: -+ bch2_trans_put(trans); -+ -+ bch2_fs_quota_transfer(c, src_inode, -+ bch_qid(&src_inode->ei_inode), -+ 1 << QTYP_PRJ, -+ KEY_TYPE_QUOTA_NOCHECK); -+ if (dst_inode) -+ bch2_fs_quota_transfer(c, dst_inode, -+ bch_qid(&dst_inode->ei_inode), -+ 1 << QTYP_PRJ, -+ KEY_TYPE_QUOTA_NOCHECK); -+ -+ bch2_unlock_inodes(INODE_UPDATE_LOCK, -+ src_dir, -+ dst_dir, -+ src_inode, -+ dst_inode); -+ -+ return ret; -+} -+ -+static void bch2_setattr_copy(struct mnt_idmap *idmap, -+ struct bch_inode_info *inode, -+ struct bch_inode_unpacked *bi, -+ struct iattr *attr) -+{ -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ unsigned int ia_valid = attr->ia_valid; -+ -+ if (ia_valid & ATTR_UID) -+ bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid); -+ if (ia_valid & ATTR_GID) -+ bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid); -+ -+ if (ia_valid & ATTR_SIZE) -+ bi->bi_size = attr->ia_size; -+ -+ if (ia_valid & ATTR_ATIME) -+ bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime); -+ if (ia_valid & ATTR_MTIME) -+ bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime); -+ if (ia_valid & ATTR_CTIME) -+ bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime); -+ -+ if (ia_valid & ATTR_MODE) { -+ umode_t mode = attr->ia_mode; -+ kgid_t gid = ia_valid & ATTR_GID -+ ? attr->ia_gid -+ : inode->v.i_gid; -+ -+ if (!in_group_p(gid) && -+ !capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID)) -+ mode &= ~S_ISGID; -+ bi->bi_mode = mode; -+ } -+} -+ -+int bch2_setattr_nonsize(struct mnt_idmap *idmap, -+ struct bch_inode_info *inode, -+ struct iattr *attr) -+{ -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct bch_qid qid; -+ struct btree_trans *trans; -+ struct btree_iter inode_iter = { NULL }; -+ struct bch_inode_unpacked inode_u; -+ struct posix_acl *acl = NULL; -+ int ret; -+ -+ mutex_lock(&inode->ei_update_lock); -+ -+ qid = inode->ei_qid; -+ -+ if (attr->ia_valid & ATTR_UID) -+ qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid); -+ -+ if (attr->ia_valid & ATTR_GID) -+ qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid); -+ -+ ret = bch2_fs_quota_transfer(c, inode, qid, ~0, -+ KEY_TYPE_QUOTA_PREALLOC); -+ if (ret) -+ goto err; -+ -+ trans = bch2_trans_get(c); -+retry: -+ bch2_trans_begin(trans); -+ kfree(acl); -+ acl = NULL; -+ -+ ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode), -+ BTREE_ITER_INTENT); -+ if (ret) -+ goto btree_err; -+ -+ bch2_setattr_copy(idmap, inode, &inode_u, attr); -+ -+ if (attr->ia_valid & ATTR_MODE) { -+ ret = bch2_acl_chmod(trans, inode_inum(inode), &inode_u, -+ inode_u.bi_mode, &acl); -+ if (ret) -+ goto btree_err; -+ } -+ -+ ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL); -+btree_err: -+ bch2_trans_iter_exit(trans, &inode_iter); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ if (unlikely(ret)) -+ goto err_trans; -+ -+ bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid); -+ -+ if (acl) -+ set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); -+err_trans: -+ bch2_trans_put(trans); -+err: -+ mutex_unlock(&inode->ei_update_lock); -+ -+ return bch2_err_class(ret); -+} -+ -+static int bch2_getattr(struct mnt_idmap *idmap, -+ const struct path *path, struct kstat *stat, -+ u32 request_mask, unsigned query_flags) -+{ -+ struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry)); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ -+ stat->dev = inode->v.i_sb->s_dev; -+ stat->ino = inode->v.i_ino; -+ stat->mode = inode->v.i_mode; -+ stat->nlink = inode->v.i_nlink; -+ stat->uid = inode->v.i_uid; -+ stat->gid = inode->v.i_gid; -+ stat->rdev = inode->v.i_rdev; -+ stat->size = i_size_read(&inode->v); -+ stat->atime = inode->v.i_atime; -+ stat->mtime = inode->v.i_mtime; -+ stat->ctime = inode_get_ctime(&inode->v); -+ stat->blksize = block_bytes(c); -+ stat->blocks = inode->v.i_blocks; -+ -+ if (request_mask & STATX_BTIME) { -+ stat->result_mask |= STATX_BTIME; -+ stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime); -+ } -+ -+ if (inode->ei_inode.bi_flags & BCH_INODE_immutable) -+ stat->attributes |= STATX_ATTR_IMMUTABLE; -+ stat->attributes_mask |= STATX_ATTR_IMMUTABLE; -+ -+ if (inode->ei_inode.bi_flags & BCH_INODE_append) -+ stat->attributes |= STATX_ATTR_APPEND; -+ stat->attributes_mask |= STATX_ATTR_APPEND; -+ -+ if (inode->ei_inode.bi_flags & BCH_INODE_nodump) -+ stat->attributes |= STATX_ATTR_NODUMP; -+ stat->attributes_mask |= STATX_ATTR_NODUMP; -+ -+ return 0; -+} -+ -+static int bch2_setattr(struct mnt_idmap *idmap, -+ struct dentry *dentry, struct iattr *iattr) -+{ -+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); -+ int ret; -+ -+ lockdep_assert_held(&inode->v.i_rwsem); -+ -+ ret = setattr_prepare(idmap, dentry, iattr); -+ if (ret) -+ return ret; -+ -+ return iattr->ia_valid & ATTR_SIZE -+ ? bchfs_truncate(idmap, inode, iattr) -+ : bch2_setattr_nonsize(idmap, inode, iattr); -+} -+ -+static int bch2_tmpfile(struct mnt_idmap *idmap, -+ struct inode *vdir, struct file *file, umode_t mode) -+{ -+ struct bch_inode_info *inode = -+ __bch2_create(idmap, to_bch_ei(vdir), -+ file->f_path.dentry, mode, 0, -+ (subvol_inum) { 0 }, BCH_CREATE_TMPFILE); -+ -+ if (IS_ERR(inode)) -+ return bch2_err_class(PTR_ERR(inode)); -+ -+ d_mark_tmpfile(file, &inode->v); -+ d_instantiate(file->f_path.dentry, &inode->v); -+ return finish_open_simple(file, 0); -+} -+ -+static int bch2_fill_extent(struct bch_fs *c, -+ struct fiemap_extent_info *info, -+ struct bkey_s_c k, unsigned flags) -+{ -+ if (bkey_extent_is_direct_data(k.k)) { -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ int ret; -+ -+ if (k.k->type == KEY_TYPE_reflink_v) -+ flags |= FIEMAP_EXTENT_SHARED; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ int flags2 = 0; -+ u64 offset = p.ptr.offset; -+ -+ if (p.ptr.unwritten) -+ flags2 |= FIEMAP_EXTENT_UNWRITTEN; -+ -+ if (p.crc.compression_type) -+ flags2 |= FIEMAP_EXTENT_ENCODED; -+ else -+ offset += p.crc.offset; -+ -+ if ((offset & (block_sectors(c) - 1)) || -+ (k.k->size & (block_sectors(c) - 1))) -+ flags2 |= FIEMAP_EXTENT_NOT_ALIGNED; -+ -+ ret = fiemap_fill_next_extent(info, -+ bkey_start_offset(k.k) << 9, -+ offset << 9, -+ k.k->size << 9, flags|flags2); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+ } else if (bkey_extent_is_inline_data(k.k)) { -+ return fiemap_fill_next_extent(info, -+ bkey_start_offset(k.k) << 9, -+ 0, k.k->size << 9, -+ flags| -+ FIEMAP_EXTENT_DATA_INLINE); -+ } else if (k.k->type == KEY_TYPE_reservation) { -+ return fiemap_fill_next_extent(info, -+ bkey_start_offset(k.k) << 9, -+ 0, k.k->size << 9, -+ flags| -+ FIEMAP_EXTENT_DELALLOC| -+ FIEMAP_EXTENT_UNWRITTEN); -+ } else { -+ BUG(); -+ } -+} -+ -+static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, -+ u64 start, u64 len) -+{ -+ struct bch_fs *c = vinode->i_sb->s_fs_info; -+ struct bch_inode_info *ei = to_bch_ei(vinode); -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bkey_buf cur, prev; -+ struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); -+ unsigned offset_into_extent, sectors; -+ bool have_extent = false; -+ u32 snapshot; -+ int ret = 0; -+ -+ ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC); -+ if (ret) -+ return ret; -+ -+ if (start + len < start) -+ return -EINVAL; -+ -+ start >>= 9; -+ -+ bch2_bkey_buf_init(&cur); -+ bch2_bkey_buf_init(&prev); -+ trans = bch2_trans_get(c); -+retry: -+ bch2_trans_begin(trans); -+ -+ ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot); -+ if (ret) -+ goto err; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -+ SPOS(ei->v.i_ino, start, snapshot), 0); -+ -+ while (!(ret = btree_trans_too_many_iters(trans)) && -+ (k = bch2_btree_iter_peek_upto(&iter, end)).k && -+ !(ret = bkey_err(k))) { -+ enum btree_id data_btree = BTREE_ID_extents; -+ -+ if (!bkey_extent_is_data(k.k) && -+ k.k->type != KEY_TYPE_reservation) { -+ bch2_btree_iter_advance(&iter); -+ continue; -+ } -+ -+ offset_into_extent = iter.pos.offset - -+ bkey_start_offset(k.k); -+ sectors = k.k->size - offset_into_extent; -+ -+ bch2_bkey_buf_reassemble(&cur, c, k); -+ -+ ret = bch2_read_indirect_extent(trans, &data_btree, -+ &offset_into_extent, &cur); -+ if (ret) -+ break; -+ -+ k = bkey_i_to_s_c(cur.k); -+ bch2_bkey_buf_realloc(&prev, c, k.k->u64s); -+ -+ sectors = min(sectors, k.k->size - offset_into_extent); -+ -+ bch2_cut_front(POS(k.k->p.inode, -+ bkey_start_offset(k.k) + -+ offset_into_extent), -+ cur.k); -+ bch2_key_resize(&cur.k->k, sectors); -+ cur.k->k.p = iter.pos; -+ cur.k->k.p.offset += cur.k->k.size; -+ -+ if (have_extent) { -+ bch2_trans_unlock(trans); -+ ret = bch2_fill_extent(c, info, -+ bkey_i_to_s_c(prev.k), 0); -+ if (ret) -+ break; -+ } -+ -+ bkey_copy(prev.k, cur.k); -+ have_extent = true; -+ -+ bch2_btree_iter_set_pos(&iter, -+ POS(iter.pos.inode, iter.pos.offset + sectors)); -+ } -+ start = iter.pos.offset; -+ bch2_trans_iter_exit(trans, &iter); -+err: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ if (!ret && have_extent) { -+ bch2_trans_unlock(trans); -+ ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), -+ FIEMAP_EXTENT_LAST); -+ } -+ -+ bch2_trans_put(trans); -+ bch2_bkey_buf_exit(&cur, c); -+ bch2_bkey_buf_exit(&prev, c); -+ return ret < 0 ? ret : 0; -+} -+ -+static const struct vm_operations_struct bch_vm_ops = { -+ .fault = bch2_page_fault, -+ .map_pages = filemap_map_pages, -+ .page_mkwrite = bch2_page_mkwrite, -+}; -+ -+static int bch2_mmap(struct file *file, struct vm_area_struct *vma) -+{ -+ file_accessed(file); -+ -+ vma->vm_ops = &bch_vm_ops; -+ return 0; -+} -+ -+/* Directories: */ -+ -+static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence) -+{ -+ return generic_file_llseek_size(file, offset, whence, -+ S64_MAX, S64_MAX); -+} -+ -+static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) -+{ -+ struct bch_inode_info *inode = file_bch_inode(file); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ int ret; -+ -+ if (!dir_emit_dots(file, ctx)) -+ return 0; -+ -+ ret = bch2_readdir(c, inode_inum(inode), ctx); -+ if (ret) -+ bch_err_fn(c, ret); -+ -+ return bch2_err_class(ret); -+} -+ -+static const struct file_operations bch_file_operations = { -+ .llseek = bch2_llseek, -+ .read_iter = bch2_read_iter, -+ .write_iter = bch2_write_iter, -+ .mmap = bch2_mmap, -+ .open = generic_file_open, -+ .fsync = bch2_fsync, -+ .splice_read = filemap_splice_read, -+ .splice_write = iter_file_splice_write, -+ .fallocate = bch2_fallocate_dispatch, -+ .unlocked_ioctl = bch2_fs_file_ioctl, -+#ifdef CONFIG_COMPAT -+ .compat_ioctl = bch2_compat_fs_ioctl, -+#endif -+ .remap_file_range = bch2_remap_file_range, -+}; -+ -+static const struct inode_operations bch_file_inode_operations = { -+ .getattr = bch2_getattr, -+ .setattr = bch2_setattr, -+ .fiemap = bch2_fiemap, -+ .listxattr = bch2_xattr_list, -+#ifdef CONFIG_BCACHEFS_POSIX_ACL -+ .get_acl = bch2_get_acl, -+ .set_acl = bch2_set_acl, -+#endif -+}; -+ -+static const struct inode_operations bch_dir_inode_operations = { -+ .lookup = bch2_lookup, -+ .create = bch2_create, -+ .link = bch2_link, -+ .unlink = bch2_unlink, -+ .symlink = bch2_symlink, -+ .mkdir = bch2_mkdir, -+ .rmdir = bch2_unlink, -+ .mknod = bch2_mknod, -+ .rename = bch2_rename2, -+ .getattr = bch2_getattr, -+ .setattr = bch2_setattr, -+ .tmpfile = bch2_tmpfile, -+ .listxattr = bch2_xattr_list, -+#ifdef CONFIG_BCACHEFS_POSIX_ACL -+ .get_acl = bch2_get_acl, -+ .set_acl = bch2_set_acl, -+#endif -+}; -+ -+static const struct file_operations bch_dir_file_operations = { -+ .llseek = bch2_dir_llseek, -+ .read = generic_read_dir, -+ .iterate_shared = bch2_vfs_readdir, -+ .fsync = bch2_fsync, -+ .unlocked_ioctl = bch2_fs_file_ioctl, -+#ifdef CONFIG_COMPAT -+ .compat_ioctl = bch2_compat_fs_ioctl, -+#endif -+}; -+ -+static const struct inode_operations bch_symlink_inode_operations = { -+ .get_link = page_get_link, -+ .getattr = bch2_getattr, -+ .setattr = bch2_setattr, -+ .listxattr = bch2_xattr_list, -+#ifdef CONFIG_BCACHEFS_POSIX_ACL -+ .get_acl = bch2_get_acl, -+ .set_acl = bch2_set_acl, -+#endif -+}; -+ -+static const struct inode_operations bch_special_inode_operations = { -+ .getattr = bch2_getattr, -+ .setattr = bch2_setattr, -+ .listxattr = bch2_xattr_list, -+#ifdef CONFIG_BCACHEFS_POSIX_ACL -+ .get_acl = bch2_get_acl, -+ .set_acl = bch2_set_acl, -+#endif -+}; -+ -+static const struct address_space_operations bch_address_space_operations = { -+ .read_folio = bch2_read_folio, -+ .writepages = bch2_writepages, -+ .readahead = bch2_readahead, -+ .dirty_folio = filemap_dirty_folio, -+ .write_begin = bch2_write_begin, -+ .write_end = bch2_write_end, -+ .invalidate_folio = bch2_invalidate_folio, -+ .release_folio = bch2_release_folio, -+ .direct_IO = noop_direct_IO, -+#ifdef CONFIG_MIGRATION -+ .migrate_folio = filemap_migrate_folio, -+#endif -+ .error_remove_page = generic_error_remove_page, -+}; -+ -+struct bcachefs_fid { -+ u64 inum; -+ u32 subvol; -+ u32 gen; -+} __packed; -+ -+struct bcachefs_fid_with_parent { -+ struct bcachefs_fid fid; -+ struct bcachefs_fid dir; -+} __packed; -+ -+static int bcachefs_fid_valid(int fh_len, int fh_type) -+{ -+ switch (fh_type) { -+ case FILEID_BCACHEFS_WITHOUT_PARENT: -+ return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32); -+ case FILEID_BCACHEFS_WITH_PARENT: -+ return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32); -+ default: -+ return false; -+ } -+} -+ -+static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode) -+{ -+ return (struct bcachefs_fid) { -+ .inum = inode->ei_inode.bi_inum, -+ .subvol = inode->ei_subvol, -+ .gen = inode->ei_inode.bi_generation, -+ }; -+} -+ -+static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len, -+ struct inode *vdir) -+{ -+ struct bch_inode_info *inode = to_bch_ei(vinode); -+ struct bch_inode_info *dir = to_bch_ei(vdir); -+ -+ if (*len < sizeof(struct bcachefs_fid_with_parent) / sizeof(u32)) -+ return FILEID_INVALID; -+ -+ if (!S_ISDIR(inode->v.i_mode) && dir) { -+ struct bcachefs_fid_with_parent *fid = (void *) fh; -+ -+ fid->fid = bch2_inode_to_fid(inode); -+ fid->dir = bch2_inode_to_fid(dir); -+ -+ *len = sizeof(*fid) / sizeof(u32); -+ return FILEID_BCACHEFS_WITH_PARENT; -+ } else { -+ struct bcachefs_fid *fid = (void *) fh; -+ -+ *fid = bch2_inode_to_fid(inode); -+ -+ *len = sizeof(*fid) / sizeof(u32); -+ return FILEID_BCACHEFS_WITHOUT_PARENT; -+ } -+} -+ -+static struct inode *bch2_nfs_get_inode(struct super_block *sb, -+ struct bcachefs_fid fid) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ struct inode *vinode = bch2_vfs_inode_get(c, (subvol_inum) { -+ .subvol = fid.subvol, -+ .inum = fid.inum, -+ }); -+ if (!IS_ERR(vinode) && vinode->i_generation != fid.gen) { -+ iput(vinode); -+ vinode = ERR_PTR(-ESTALE); -+ } -+ return vinode; -+} -+ -+static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid, -+ int fh_len, int fh_type) -+{ -+ struct bcachefs_fid *fid = (void *) _fid; -+ -+ if (!bcachefs_fid_valid(fh_len, fh_type)) -+ return NULL; -+ -+ return d_obtain_alias(bch2_nfs_get_inode(sb, *fid)); -+} -+ -+static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid, -+ int fh_len, int fh_type) -+{ -+ struct bcachefs_fid_with_parent *fid = (void *) _fid; -+ -+ if (!bcachefs_fid_valid(fh_len, fh_type) || -+ fh_type != FILEID_BCACHEFS_WITH_PARENT) -+ return NULL; -+ -+ return d_obtain_alias(bch2_nfs_get_inode(sb, fid->dir)); -+} -+ -+static struct dentry *bch2_get_parent(struct dentry *child) -+{ -+ struct bch_inode_info *inode = to_bch_ei(child->d_inode); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ subvol_inum parent_inum = { -+ .subvol = inode->ei_inode.bi_parent_subvol ?: -+ inode->ei_subvol, -+ .inum = inode->ei_inode.bi_dir, -+ }; -+ -+ return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum)); -+} -+ -+static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child) -+{ -+ struct bch_inode_info *inode = to_bch_ei(child->d_inode); -+ struct bch_inode_info *dir = to_bch_ei(parent->d_inode); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct btree_trans *trans; -+ struct btree_iter iter1; -+ struct btree_iter iter2; -+ struct bkey_s_c k; -+ struct bkey_s_c_dirent d; -+ struct bch_inode_unpacked inode_u; -+ subvol_inum target; -+ u32 snapshot; -+ struct qstr dirent_name; -+ unsigned name_len = 0; -+ int ret; -+ -+ if (!S_ISDIR(dir->v.i_mode)) -+ return -EINVAL; -+ -+ trans = bch2_trans_get(c); -+ -+ bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents, -+ POS(dir->ei_inode.bi_inum, 0), 0); -+ bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents, -+ POS(dir->ei_inode.bi_inum, 0), 0); -+retry: -+ bch2_trans_begin(trans); -+ -+ ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot); -+ if (ret) -+ goto err; -+ -+ bch2_btree_iter_set_snapshot(&iter1, snapshot); -+ bch2_btree_iter_set_snapshot(&iter2, snapshot); -+ -+ ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u); -+ if (ret) -+ goto err; -+ -+ if (inode_u.bi_dir == dir->ei_inode.bi_inum) { -+ bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset)); -+ -+ k = bch2_btree_iter_peek_slot(&iter1); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (k.k->type != KEY_TYPE_dirent) { -+ ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; -+ goto err; -+ } -+ -+ d = bkey_s_c_to_dirent(k); -+ ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target); -+ if (ret > 0) -+ ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; -+ if (ret) -+ goto err; -+ -+ if (target.subvol == inode->ei_subvol && -+ target.inum == inode->ei_inode.bi_inum) -+ goto found; -+ } else { -+ /* -+ * File with multiple hardlinks and our backref is to the wrong -+ * directory - linear search: -+ */ -+ for_each_btree_key_continue_norestart(iter2, 0, k, ret) { -+ if (k.k->p.inode > dir->ei_inode.bi_inum) -+ break; -+ -+ if (k.k->type != KEY_TYPE_dirent) -+ continue; -+ -+ d = bkey_s_c_to_dirent(k); -+ ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target); -+ if (ret < 0) -+ break; -+ if (ret) -+ continue; -+ -+ if (target.subvol == inode->ei_subvol && -+ target.inum == inode->ei_inode.bi_inum) -+ goto found; -+ } -+ } -+ -+ ret = -ENOENT; -+ goto err; -+found: -+ dirent_name = bch2_dirent_get_name(d); -+ -+ name_len = min_t(unsigned, dirent_name.len, NAME_MAX); -+ memcpy(name, dirent_name.name, name_len); -+ name[name_len] = '\0'; -+err: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ bch2_trans_iter_exit(trans, &iter1); -+ bch2_trans_iter_exit(trans, &iter2); -+ bch2_trans_put(trans); -+ -+ return ret; -+} -+ -+static const struct export_operations bch_export_ops = { -+ .encode_fh = bch2_encode_fh, -+ .fh_to_dentry = bch2_fh_to_dentry, -+ .fh_to_parent = bch2_fh_to_parent, -+ .get_parent = bch2_get_parent, -+ .get_name = bch2_get_name, -+}; -+ -+static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, -+ struct bch_inode_info *inode, -+ struct bch_inode_unpacked *bi, -+ struct bch_subvolume *subvol) -+{ -+ bch2_inode_update_after_write(trans, inode, bi, ~0); -+ -+ if (BCH_SUBVOLUME_SNAP(subvol)) -+ set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags); -+ else -+ clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags); -+ -+ inode->v.i_blocks = bi->bi_sectors; -+ inode->v.i_ino = bi->bi_inum; -+ inode->v.i_rdev = bi->bi_dev; -+ inode->v.i_generation = bi->bi_generation; -+ inode->v.i_size = bi->bi_size; -+ -+ inode->ei_flags = 0; -+ inode->ei_quota_reserved = 0; -+ inode->ei_qid = bch_qid(bi); -+ inode->ei_subvol = inum.subvol; -+ -+ inode->v.i_mapping->a_ops = &bch_address_space_operations; -+ -+ switch (inode->v.i_mode & S_IFMT) { -+ case S_IFREG: -+ inode->v.i_op = &bch_file_inode_operations; -+ inode->v.i_fop = &bch_file_operations; -+ break; -+ case S_IFDIR: -+ inode->v.i_op = &bch_dir_inode_operations; -+ inode->v.i_fop = &bch_dir_file_operations; -+ break; -+ case S_IFLNK: -+ inode_nohighmem(&inode->v); -+ inode->v.i_op = &bch_symlink_inode_operations; -+ break; -+ default: -+ init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev); -+ inode->v.i_op = &bch_special_inode_operations; -+ break; -+ } -+ -+ mapping_set_large_folios(inode->v.i_mapping); -+} -+ -+static struct inode *bch2_alloc_inode(struct super_block *sb) -+{ -+ struct bch_inode_info *inode; -+ -+ inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS); -+ if (!inode) -+ return NULL; -+ -+ inode_init_once(&inode->v); -+ mutex_init(&inode->ei_update_lock); -+ two_state_lock_init(&inode->ei_pagecache_lock); -+ INIT_LIST_HEAD(&inode->ei_vfs_inode_list); -+ mutex_init(&inode->ei_quota_lock); -+ -+ return &inode->v; -+} -+ -+static void bch2_i_callback(struct rcu_head *head) -+{ -+ struct inode *vinode = container_of(head, struct inode, i_rcu); -+ struct bch_inode_info *inode = to_bch_ei(vinode); -+ -+ kmem_cache_free(bch2_inode_cache, inode); -+} -+ -+static void bch2_destroy_inode(struct inode *vinode) -+{ -+ call_rcu(&vinode->i_rcu, bch2_i_callback); -+} -+ -+static int inode_update_times_fn(struct btree_trans *trans, -+ struct bch_inode_info *inode, -+ struct bch_inode_unpacked *bi, -+ void *p) -+{ -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ -+ bi->bi_atime = timespec_to_bch2_time(c, inode->v.i_atime); -+ bi->bi_mtime = timespec_to_bch2_time(c, inode->v.i_mtime); -+ bi->bi_ctime = timespec_to_bch2_time(c, inode_get_ctime(&inode->v)); -+ -+ return 0; -+} -+ -+static int bch2_vfs_write_inode(struct inode *vinode, -+ struct writeback_control *wbc) -+{ -+ struct bch_fs *c = vinode->i_sb->s_fs_info; -+ struct bch_inode_info *inode = to_bch_ei(vinode); -+ int ret; -+ -+ mutex_lock(&inode->ei_update_lock); -+ ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, -+ ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); -+ mutex_unlock(&inode->ei_update_lock); -+ -+ return bch2_err_class(ret); -+} -+ -+static void bch2_evict_inode(struct inode *vinode) -+{ -+ struct bch_fs *c = vinode->i_sb->s_fs_info; -+ struct bch_inode_info *inode = to_bch_ei(vinode); -+ -+ truncate_inode_pages_final(&inode->v.i_data); -+ -+ clear_inode(&inode->v); -+ -+ BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved); -+ -+ if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) { -+ bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks), -+ KEY_TYPE_QUOTA_WARN); -+ bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, -+ KEY_TYPE_QUOTA_WARN); -+ bch2_inode_rm(c, inode_inum(inode)); -+ } -+ -+ mutex_lock(&c->vfs_inodes_lock); -+ list_del_init(&inode->ei_vfs_inode_list); -+ mutex_unlock(&c->vfs_inodes_lock); -+} -+ -+void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s) -+{ -+ struct bch_inode_info *inode, **i; -+ DARRAY(struct bch_inode_info *) grabbed; -+ bool clean_pass = false, this_pass_clean; -+ -+ /* -+ * Initially, we scan for inodes without I_DONTCACHE, then mark them to -+ * be pruned with d_mark_dontcache(). -+ * -+ * Once we've had a clean pass where we didn't find any inodes without -+ * I_DONTCACHE, we wait for them to be freed: -+ */ -+ -+ darray_init(&grabbed); -+ darray_make_room(&grabbed, 1024); -+again: -+ cond_resched(); -+ this_pass_clean = true; -+ -+ mutex_lock(&c->vfs_inodes_lock); -+ list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) { -+ if (!snapshot_list_has_id(s, inode->ei_subvol)) -+ continue; -+ -+ if (!(inode->v.i_state & I_DONTCACHE) && -+ !(inode->v.i_state & I_FREEING) && -+ igrab(&inode->v)) { -+ this_pass_clean = false; -+ -+ if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) { -+ iput(&inode->v); -+ break; -+ } -+ } else if (clean_pass && this_pass_clean) { -+ wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW); -+ DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW); -+ -+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); -+ mutex_unlock(&c->vfs_inodes_lock); -+ -+ schedule(); -+ finish_wait(wq, &wait.wq_entry); -+ goto again; -+ } -+ } -+ mutex_unlock(&c->vfs_inodes_lock); -+ -+ darray_for_each(grabbed, i) { -+ inode = *i; -+ d_mark_dontcache(&inode->v); -+ d_prune_aliases(&inode->v); -+ iput(&inode->v); -+ } -+ grabbed.nr = 0; -+ -+ if (!clean_pass || !this_pass_clean) { -+ clean_pass = this_pass_clean; -+ goto again; -+ } -+ -+ darray_exit(&grabbed); -+} -+ -+static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) -+{ -+ struct super_block *sb = dentry->d_sb; -+ struct bch_fs *c = sb->s_fs_info; -+ struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c); -+ unsigned shift = sb->s_blocksize_bits - 9; -+ /* -+ * this assumes inodes take up 64 bytes, which is a decent average -+ * number: -+ */ -+ u64 avail_inodes = ((usage.capacity - usage.used) << 3); -+ u64 fsid; -+ -+ buf->f_type = BCACHEFS_STATFS_MAGIC; -+ buf->f_bsize = sb->s_blocksize; -+ buf->f_blocks = usage.capacity >> shift; -+ buf->f_bfree = usage.free >> shift; -+ buf->f_bavail = avail_factor(usage.free) >> shift; -+ -+ buf->f_files = usage.nr_inodes + avail_inodes; -+ buf->f_ffree = avail_inodes; -+ -+ fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^ -+ le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64)); -+ buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; -+ buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; -+ buf->f_namelen = BCH_NAME_MAX; -+ -+ return 0; -+} -+ -+static int bch2_sync_fs(struct super_block *sb, int wait) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ int ret; -+ -+ if (c->opts.journal_flush_disabled) -+ return 0; -+ -+ if (!wait) { -+ bch2_journal_flush_async(&c->journal, NULL); -+ return 0; -+ } -+ -+ ret = bch2_journal_flush(&c->journal); -+ return bch2_err_class(ret); -+} -+ -+static struct bch_fs *bch2_path_to_fs(const char *path) -+{ -+ struct bch_fs *c; -+ dev_t dev; -+ int ret; -+ -+ ret = lookup_bdev(path, &dev); -+ if (ret) -+ return ERR_PTR(ret); -+ -+ c = bch2_dev_to_fs(dev); -+ if (c) -+ closure_put(&c->cl); -+ return c ?: ERR_PTR(-ENOENT); -+} -+ -+static char **split_devs(const char *_dev_name, unsigned *nr) -+{ -+ char *dev_name = NULL, **devs = NULL, *s; -+ size_t i = 0, nr_devs = 0; -+ -+ dev_name = kstrdup(_dev_name, GFP_KERNEL); -+ if (!dev_name) -+ return NULL; -+ -+ for (s = dev_name; s; s = strchr(s + 1, ':')) -+ nr_devs++; -+ -+ devs = kcalloc(nr_devs + 1, sizeof(const char *), GFP_KERNEL); -+ if (!devs) { -+ kfree(dev_name); -+ return NULL; -+ } -+ -+ while ((s = strsep(&dev_name, ":"))) -+ devs[i++] = s; -+ -+ *nr = nr_devs; -+ return devs; -+} -+ -+static int bch2_remount(struct super_block *sb, int *flags, char *data) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ struct bch_opts opts = bch2_opts_empty(); -+ int ret; -+ -+ opt_set(opts, read_only, (*flags & SB_RDONLY) != 0); -+ -+ ret = bch2_parse_mount_opts(c, &opts, data); -+ if (ret) -+ goto err; -+ -+ if (opts.read_only != c->opts.read_only) { -+ down_write(&c->state_lock); -+ -+ if (opts.read_only) { -+ bch2_fs_read_only(c); -+ -+ sb->s_flags |= SB_RDONLY; -+ } else { -+ ret = bch2_fs_read_write(c); -+ if (ret) { -+ bch_err(c, "error going rw: %i", ret); -+ up_write(&c->state_lock); -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ sb->s_flags &= ~SB_RDONLY; -+ } -+ -+ c->opts.read_only = opts.read_only; -+ -+ up_write(&c->state_lock); -+ } -+ -+ if (opt_defined(opts, errors)) -+ c->opts.errors = opts.errors; -+err: -+ return bch2_err_class(ret); -+} -+ -+static int bch2_show_devname(struct seq_file *seq, struct dentry *root) -+{ -+ struct bch_fs *c = root->d_sb->s_fs_info; -+ struct bch_dev *ca; -+ unsigned i; -+ bool first = true; -+ -+ for_each_online_member(ca, c, i) { -+ if (!first) -+ seq_putc(seq, ':'); -+ first = false; -+ seq_puts(seq, "/dev/"); -+ seq_puts(seq, ca->name); -+ } -+ -+ return 0; -+} -+ -+static int bch2_show_options(struct seq_file *seq, struct dentry *root) -+{ -+ struct bch_fs *c = root->d_sb->s_fs_info; -+ enum bch_opt_id i; -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ for (i = 0; i < bch2_opts_nr; i++) { -+ const struct bch_option *opt = &bch2_opt_table[i]; -+ u64 v = bch2_opt_get_by_id(&c->opts, i); -+ -+ if (!(opt->flags & OPT_MOUNT)) -+ continue; -+ -+ if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) -+ continue; -+ -+ printbuf_reset(&buf); -+ bch2_opt_to_text(&buf, c, c->disk_sb.sb, opt, v, -+ OPT_SHOW_MOUNT_STYLE); -+ seq_putc(seq, ','); -+ seq_puts(seq, buf.buf); -+ } -+ -+ if (buf.allocation_failure) -+ ret = -ENOMEM; -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static void bch2_put_super(struct super_block *sb) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ -+ __bch2_fs_stop(c); -+} -+ -+/* -+ * bcachefs doesn't currently integrate intwrite freeze protection but the -+ * internal write references serve the same purpose. Therefore reuse the -+ * read-only transition code to perform the quiesce. The caveat is that we don't -+ * currently have the ability to block tasks that want a write reference while -+ * the superblock is frozen. This is fine for now, but we should either add -+ * blocking support or find a way to integrate sb_start_intwrite() and friends. -+ */ -+static int bch2_freeze(struct super_block *sb) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ -+ down_write(&c->state_lock); -+ bch2_fs_read_only(c); -+ up_write(&c->state_lock); -+ return 0; -+} -+ -+static int bch2_unfreeze(struct super_block *sb) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ int ret; -+ -+ down_write(&c->state_lock); -+ ret = bch2_fs_read_write(c); -+ up_write(&c->state_lock); -+ return ret; -+} -+ -+static const struct super_operations bch_super_operations = { -+ .alloc_inode = bch2_alloc_inode, -+ .destroy_inode = bch2_destroy_inode, -+ .write_inode = bch2_vfs_write_inode, -+ .evict_inode = bch2_evict_inode, -+ .sync_fs = bch2_sync_fs, -+ .statfs = bch2_statfs, -+ .show_devname = bch2_show_devname, -+ .show_options = bch2_show_options, -+ .remount_fs = bch2_remount, -+ .put_super = bch2_put_super, -+ .freeze_fs = bch2_freeze, -+ .unfreeze_fs = bch2_unfreeze, -+}; -+ -+static int bch2_set_super(struct super_block *s, void *data) -+{ -+ s->s_fs_info = data; -+ return 0; -+} -+ -+static int bch2_noset_super(struct super_block *s, void *data) -+{ -+ return -EBUSY; -+} -+ -+static int bch2_test_super(struct super_block *s, void *data) -+{ -+ struct bch_fs *c = s->s_fs_info; -+ struct bch_fs **devs = data; -+ unsigned i; -+ -+ if (!c) -+ return false; -+ -+ for (i = 0; devs[i]; i++) -+ if (c != devs[i]) -+ return false; -+ return true; -+} -+ -+static struct dentry *bch2_mount(struct file_system_type *fs_type, -+ int flags, const char *dev_name, void *data) -+{ -+ struct bch_fs *c; -+ struct bch_dev *ca; -+ struct super_block *sb; -+ struct inode *vinode; -+ struct bch_opts opts = bch2_opts_empty(); -+ char **devs; -+ struct bch_fs **devs_to_fs = NULL; -+ unsigned i, nr_devs; -+ int ret; -+ -+ opt_set(opts, read_only, (flags & SB_RDONLY) != 0); -+ -+ ret = bch2_parse_mount_opts(NULL, &opts, data); -+ if (ret) -+ return ERR_PTR(ret); -+ -+ if (!dev_name || strlen(dev_name) == 0) -+ return ERR_PTR(-EINVAL); -+ -+ devs = split_devs(dev_name, &nr_devs); -+ if (!devs) -+ return ERR_PTR(-ENOMEM); -+ -+ devs_to_fs = kcalloc(nr_devs + 1, sizeof(void *), GFP_KERNEL); -+ if (!devs_to_fs) { -+ sb = ERR_PTR(-ENOMEM); -+ goto got_sb; -+ } -+ -+ for (i = 0; i < nr_devs; i++) -+ devs_to_fs[i] = bch2_path_to_fs(devs[i]); -+ -+ sb = sget(fs_type, bch2_test_super, bch2_noset_super, -+ flags|SB_NOSEC, devs_to_fs); -+ if (!IS_ERR(sb)) -+ goto got_sb; -+ -+ c = bch2_fs_open(devs, nr_devs, opts); -+ if (IS_ERR(c)) { -+ sb = ERR_CAST(c); -+ goto got_sb; -+ } -+ -+ /* Some options can't be parsed until after the fs is started: */ -+ ret = bch2_parse_mount_opts(c, &opts, data); -+ if (ret) { -+ bch2_fs_stop(c); -+ sb = ERR_PTR(ret); -+ goto got_sb; -+ } -+ -+ bch2_opts_apply(&c->opts, opts); -+ -+ sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c); -+ if (IS_ERR(sb)) -+ bch2_fs_stop(c); -+got_sb: -+ kfree(devs_to_fs); -+ kfree(devs[0]); -+ kfree(devs); -+ -+ if (IS_ERR(sb)) { -+ ret = PTR_ERR(sb); -+ ret = bch2_err_class(ret); -+ return ERR_PTR(ret); -+ } -+ -+ c = sb->s_fs_info; -+ -+ if (sb->s_root) { -+ if ((flags ^ sb->s_flags) & SB_RDONLY) { -+ ret = -EBUSY; -+ goto err_put_super; -+ } -+ goto out; -+ } -+ -+ sb->s_blocksize = block_bytes(c); -+ sb->s_blocksize_bits = ilog2(block_bytes(c)); -+ sb->s_maxbytes = MAX_LFS_FILESIZE; -+ sb->s_op = &bch_super_operations; -+ sb->s_export_op = &bch_export_ops; -+#ifdef CONFIG_BCACHEFS_QUOTA -+ sb->s_qcop = &bch2_quotactl_operations; -+ sb->s_quota_types = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ; -+#endif -+ sb->s_xattr = bch2_xattr_handlers; -+ sb->s_magic = BCACHEFS_STATFS_MAGIC; -+ sb->s_time_gran = c->sb.nsec_per_time_unit; -+ sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; -+ sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); -+ c->vfs_sb = sb; -+ strscpy(sb->s_id, c->name, sizeof(sb->s_id)); -+ -+ ret = super_setup_bdi(sb); -+ if (ret) -+ goto err_put_super; -+ -+ sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; -+ -+ for_each_online_member(ca, c, i) { -+ struct block_device *bdev = ca->disk_sb.bdev; -+ -+ /* XXX: create an anonymous device for multi device filesystems */ -+ sb->s_bdev = bdev; -+ sb->s_dev = bdev->bd_dev; -+ percpu_ref_put(&ca->io_ref); -+ break; -+ } -+ -+ c->dev = sb->s_dev; -+ -+#ifdef CONFIG_BCACHEFS_POSIX_ACL -+ if (c->opts.acl) -+ sb->s_flags |= SB_POSIXACL; -+#endif -+ -+ sb->s_shrink.seeks = 0; -+ -+ vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); -+ ret = PTR_ERR_OR_ZERO(vinode); -+ if (ret) { -+ bch_err_msg(c, ret, "mounting: error getting root inode"); -+ goto err_put_super; -+ } -+ -+ sb->s_root = d_make_root(vinode); -+ if (!sb->s_root) { -+ bch_err(c, "error mounting: error allocating root dentry"); -+ ret = -ENOMEM; -+ goto err_put_super; -+ } -+ -+ sb->s_flags |= SB_ACTIVE; -+out: -+ return dget(sb->s_root); -+ -+err_put_super: -+ sb->s_fs_info = NULL; -+ c->vfs_sb = NULL; -+ deactivate_locked_super(sb); -+ bch2_fs_stop(c); -+ return ERR_PTR(bch2_err_class(ret)); -+} -+ -+static void bch2_kill_sb(struct super_block *sb) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ -+ if (c) -+ c->vfs_sb = NULL; -+ generic_shutdown_super(sb); -+ if (c) -+ bch2_fs_free(c); -+} -+ -+static struct file_system_type bcache_fs_type = { -+ .owner = THIS_MODULE, -+ .name = "bcachefs", -+ .mount = bch2_mount, -+ .kill_sb = bch2_kill_sb, -+ .fs_flags = FS_REQUIRES_DEV, -+}; -+ -+MODULE_ALIAS_FS("bcachefs"); -+ -+void bch2_vfs_exit(void) -+{ -+ unregister_filesystem(&bcache_fs_type); -+ kmem_cache_destroy(bch2_inode_cache); -+} -+ -+int __init bch2_vfs_init(void) -+{ -+ int ret = -ENOMEM; -+ -+ bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT); -+ if (!bch2_inode_cache) -+ goto err; -+ -+ ret = register_filesystem(&bcache_fs_type); -+ if (ret) -+ goto err; -+ -+ return 0; -+err: -+ bch2_vfs_exit(); -+ return ret; -+} -+ -+#endif /* NO_BCACHEFS_FS */ -diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h -new file mode 100644 -index 000000000000..5edf1d4b9e6b ---- /dev/null -+++ b/fs/bcachefs/fs.h -@@ -0,0 +1,209 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_FS_H -+#define _BCACHEFS_FS_H -+ -+#include "inode.h" -+#include "opts.h" -+#include "str_hash.h" -+#include "quota_types.h" -+#include "two_state_shared_lock.h" -+ -+#include -+#include -+ -+struct bch_inode_info { -+ struct inode v; -+ struct list_head ei_vfs_inode_list; -+ unsigned long ei_flags; -+ -+ struct mutex ei_update_lock; -+ u64 ei_quota_reserved; -+ unsigned long ei_last_dirtied; -+ two_state_lock_t ei_pagecache_lock; -+ -+ struct mutex ei_quota_lock; -+ struct bch_qid ei_qid; -+ -+ u32 ei_subvol; -+ -+ /* -+ * When we've been doing nocow writes we'll need to issue flushes to the -+ * underlying block devices -+ * -+ * XXX: a device may have had a flush issued by some other codepath. It -+ * would be better to keep for each device a sequence number that's -+ * incremented when we isusue a cache flush, and track here the sequence -+ * number that needs flushing. -+ */ -+ struct bch_devs_mask ei_devs_need_flush; -+ -+ /* copy of inode in btree: */ -+ struct bch_inode_unpacked ei_inode; -+}; -+ -+#define bch2_pagecache_add_put(i) bch2_two_state_unlock(&i->ei_pagecache_lock, 0) -+#define bch2_pagecache_add_tryget(i) bch2_two_state_trylock(&i->ei_pagecache_lock, 0) -+#define bch2_pagecache_add_get(i) bch2_two_state_lock(&i->ei_pagecache_lock, 0) -+ -+#define bch2_pagecache_block_put(i) bch2_two_state_unlock(&i->ei_pagecache_lock, 1) -+#define bch2_pagecache_block_get(i) bch2_two_state_lock(&i->ei_pagecache_lock, 1) -+ -+static inline subvol_inum inode_inum(struct bch_inode_info *inode) -+{ -+ return (subvol_inum) { -+ .subvol = inode->ei_subvol, -+ .inum = inode->ei_inode.bi_inum, -+ }; -+} -+ -+/* -+ * Set if we've gotten a btree error for this inode, and thus the vfs inode and -+ * btree inode may be inconsistent: -+ */ -+#define EI_INODE_ERROR 0 -+ -+/* -+ * Set in the inode is in a snapshot subvolume - we don't do quota accounting in -+ * those: -+ */ -+#define EI_INODE_SNAPSHOT 1 -+ -+#define to_bch_ei(_inode) \ -+ container_of_or_null(_inode, struct bch_inode_info, v) -+ -+static inline int ptrcmp(void *l, void *r) -+{ -+ return cmp_int(l, r); -+} -+ -+enum bch_inode_lock_op { -+ INODE_LOCK = (1U << 0), -+ INODE_PAGECACHE_BLOCK = (1U << 1), -+ INODE_UPDATE_LOCK = (1U << 2), -+}; -+ -+#define bch2_lock_inodes(_locks, ...) \ -+do { \ -+ struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \ -+ unsigned i; \ -+ \ -+ bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp); \ -+ \ -+ for (i = 1; i < ARRAY_SIZE(a); i++) \ -+ if (a[i] != a[i - 1]) { \ -+ if ((_locks) & INODE_LOCK) \ -+ down_write_nested(&a[i]->v.i_rwsem, i); \ -+ if ((_locks) & INODE_PAGECACHE_BLOCK) \ -+ bch2_pagecache_block_get(a[i]);\ -+ if ((_locks) & INODE_UPDATE_LOCK) \ -+ mutex_lock_nested(&a[i]->ei_update_lock, i);\ -+ } \ -+} while (0) -+ -+#define bch2_unlock_inodes(_locks, ...) \ -+do { \ -+ struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \ -+ unsigned i; \ -+ \ -+ bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp); \ -+ \ -+ for (i = 1; i < ARRAY_SIZE(a); i++) \ -+ if (a[i] != a[i - 1]) { \ -+ if ((_locks) & INODE_LOCK) \ -+ up_write(&a[i]->v.i_rwsem); \ -+ if ((_locks) & INODE_PAGECACHE_BLOCK) \ -+ bch2_pagecache_block_put(a[i]);\ -+ if ((_locks) & INODE_UPDATE_LOCK) \ -+ mutex_unlock(&a[i]->ei_update_lock); \ -+ } \ -+} while (0) -+ -+static inline struct bch_inode_info *file_bch_inode(struct file *file) -+{ -+ return to_bch_ei(file_inode(file)); -+} -+ -+static inline bool inode_attr_changing(struct bch_inode_info *dir, -+ struct bch_inode_info *inode, -+ enum inode_opt_id id) -+{ -+ return !(inode->ei_inode.bi_fields_set & (1 << id)) && -+ bch2_inode_opt_get(&dir->ei_inode, id) != -+ bch2_inode_opt_get(&inode->ei_inode, id); -+} -+ -+static inline bool inode_attrs_changing(struct bch_inode_info *dir, -+ struct bch_inode_info *inode) -+{ -+ unsigned id; -+ -+ for (id = 0; id < Inode_opt_nr; id++) -+ if (inode_attr_changing(dir, inode, id)) -+ return true; -+ -+ return false; -+} -+ -+struct bch_inode_unpacked; -+ -+#ifndef NO_BCACHEFS_FS -+ -+struct bch_inode_info * -+__bch2_create(struct mnt_idmap *, struct bch_inode_info *, -+ struct dentry *, umode_t, dev_t, subvol_inum, unsigned); -+ -+int bch2_fs_quota_transfer(struct bch_fs *, -+ struct bch_inode_info *, -+ struct bch_qid, -+ unsigned, -+ enum quota_acct_mode); -+ -+static inline int bch2_set_projid(struct bch_fs *c, -+ struct bch_inode_info *inode, -+ u32 projid) -+{ -+ struct bch_qid qid = inode->ei_qid; -+ -+ qid.q[QTYP_PRJ] = projid; -+ -+ return bch2_fs_quota_transfer(c, inode, qid, -+ 1 << QTYP_PRJ, -+ KEY_TYPE_QUOTA_PREALLOC); -+} -+ -+struct inode *bch2_vfs_inode_get(struct bch_fs *, subvol_inum); -+ -+/* returns 0 if we want to do the update, or error is passed up */ -+typedef int (*inode_set_fn)(struct btree_trans *, -+ struct bch_inode_info *, -+ struct bch_inode_unpacked *, void *); -+ -+void bch2_inode_update_after_write(struct btree_trans *, -+ struct bch_inode_info *, -+ struct bch_inode_unpacked *, -+ unsigned); -+int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *, -+ inode_set_fn, void *, unsigned); -+ -+int bch2_setattr_nonsize(struct mnt_idmap *, -+ struct bch_inode_info *, -+ struct iattr *); -+int __bch2_unlink(struct inode *, struct dentry *, bool); -+ -+void bch2_evict_subvolume_inodes(struct bch_fs *, snapshot_id_list *); -+ -+void bch2_vfs_exit(void); -+int bch2_vfs_init(void); -+ -+#else -+ -+#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); }) -+ -+static inline void bch2_evict_subvolume_inodes(struct bch_fs *c, -+ snapshot_id_list *s) {} -+static inline void bch2_vfs_exit(void) {} -+static inline int bch2_vfs_init(void) { return 0; } -+ -+#endif /* NO_BCACHEFS_FS */ -+ -+#endif /* _BCACHEFS_FS_H */ -diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c -new file mode 100644 -index 000000000000..9f3e9bd3d767 ---- /dev/null -+++ b/fs/bcachefs/fsck.c -@@ -0,0 +1,2490 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "bkey_buf.h" -+#include "btree_cache.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "darray.h" -+#include "dirent.h" -+#include "error.h" -+#include "fs-common.h" -+#include "fsck.h" -+#include "inode.h" -+#include "keylist.h" -+#include "recovery.h" -+#include "snapshot.h" -+#include "super.h" -+#include "xattr.h" -+ -+#include -+#include /* struct qstr */ -+ -+#define QSTR(n) { { { .len = strlen(n) } }, .name = n } -+ -+/* -+ * XXX: this is handling transaction restarts without returning -+ * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: -+ */ -+static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, -+ u32 snapshot) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ u64 sectors = 0; -+ int ret; -+ -+ for_each_btree_key_upto(trans, iter, BTREE_ID_extents, -+ SPOS(inum, 0, snapshot), -+ POS(inum, U64_MAX), -+ 0, k, ret) -+ if (bkey_extent_is_allocation(k.k)) -+ sectors += k.k->size; -+ -+ bch2_trans_iter_exit(trans, &iter); -+ -+ return ret ?: sectors; -+} -+ -+static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, -+ u32 snapshot) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bkey_s_c_dirent d; -+ u64 subdirs = 0; -+ int ret; -+ -+ for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, -+ SPOS(inum, 0, snapshot), -+ POS(inum, U64_MAX), -+ 0, k, ret) { -+ if (k.k->type != KEY_TYPE_dirent) -+ continue; -+ -+ d = bkey_s_c_to_dirent(k); -+ if (d.v->d_type == DT_DIR) -+ subdirs++; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ return ret ?: subdirs; -+} -+ -+static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot, -+ u32 *subvol) -+{ -+ struct bch_snapshot s; -+ int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, -+ POS(0, snapshot), 0, -+ snapshot, &s); -+ if (!ret) -+ *subvol = le32_to_cpu(s.subvol); -+ else if (bch2_err_matches(ret, ENOENT)) -+ bch_err(trans->c, "snapshot %u not found", snapshot); -+ return ret; -+ -+} -+ -+static int __subvol_lookup(struct btree_trans *trans, u32 subvol, -+ u32 *snapshot, u64 *inum) -+{ -+ struct bch_subvolume s; -+ int ret; -+ -+ ret = bch2_subvolume_get(trans, subvol, false, 0, &s); -+ -+ *snapshot = le32_to_cpu(s.snapshot); -+ *inum = le64_to_cpu(s.inode); -+ return ret; -+} -+ -+static int subvol_lookup(struct btree_trans *trans, u32 subvol, -+ u32 *snapshot, u64 *inum) -+{ -+ return lockrestart_do(trans, __subvol_lookup(trans, subvol, snapshot, inum)); -+} -+ -+static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, -+ struct bch_inode_unpacked *inode) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, -+ POS(0, inode_nr), -+ BTREE_ITER_ALL_SNAPSHOTS); -+ k = bch2_btree_iter_peek(&iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) { -+ ret = -BCH_ERR_ENOENT_inode; -+ goto err; -+ } -+ -+ ret = bch2_inode_unpack(k, inode); -+err: -+ bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, -+ struct bch_inode_unpacked *inode, -+ u32 *snapshot) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -+ SPOS(0, inode_nr, *snapshot), 0); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ ret = bkey_is_inode(k.k) -+ ? bch2_inode_unpack(k, inode) -+ : -BCH_ERR_ENOENT_inode; -+ if (!ret) -+ *snapshot = iter.pos.snapshot; -+err: -+ bch_err_msg(trans->c, ret, "fetching inode %llu:%u", inode_nr, *snapshot); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int lookup_inode(struct btree_trans *trans, u64 inode_nr, -+ struct bch_inode_unpacked *inode, -+ u32 *snapshot) -+{ -+ return lockrestart_do(trans, __lookup_inode(trans, inode_nr, inode, snapshot)); -+} -+ -+static int __lookup_dirent(struct btree_trans *trans, -+ struct bch_hash_info hash_info, -+ subvol_inum dir, struct qstr *name, -+ u64 *target, unsigned *type) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c_dirent d; -+ int ret; -+ -+ ret = bch2_hash_lookup(trans, &iter, bch2_dirent_hash_desc, -+ &hash_info, dir, name, 0); -+ if (ret) -+ return ret; -+ -+ d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); -+ *target = le64_to_cpu(d.v->d_inum); -+ *type = d.v->d_type; -+ bch2_trans_iter_exit(trans, &iter); -+ return 0; -+} -+ -+static int __write_inode(struct btree_trans *trans, -+ struct bch_inode_unpacked *inode, -+ u32 snapshot) -+{ -+ struct bkey_inode_buf *inode_p = -+ bch2_trans_kmalloc(trans, sizeof(*inode_p)); -+ -+ if (IS_ERR(inode_p)) -+ return PTR_ERR(inode_p); -+ -+ bch2_inode_pack(inode_p, inode); -+ inode_p->inode.k.p.snapshot = snapshot; -+ -+ return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes, -+ &inode_p->inode.k_i, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+} -+ -+static int fsck_write_inode(struct btree_trans *trans, -+ struct bch_inode_unpacked *inode, -+ u32 snapshot) -+{ -+ int ret = commit_do(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL| -+ BTREE_INSERT_LAZY_RW, -+ __write_inode(trans, inode, snapshot)); -+ if (ret) -+ bch_err_fn(trans->c, ret); -+ return ret; -+} -+ -+static int __remove_dirent(struct btree_trans *trans, struct bpos pos) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bch_inode_unpacked dir_inode; -+ struct bch_hash_info dir_hash_info; -+ int ret; -+ -+ ret = lookup_first_inode(trans, pos.inode, &dir_inode); -+ if (ret) -+ goto err; -+ -+ dir_hash_info = bch2_hash_info_init(c, &dir_inode); -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT); -+ -+ ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, -+ &dir_hash_info, &iter, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+ bch2_trans_iter_exit(trans, &iter); -+err: -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+/* Get lost+found, create if it doesn't exist: */ -+static int lookup_lostfound(struct btree_trans *trans, u32 subvol, -+ struct bch_inode_unpacked *lostfound) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_inode_unpacked root; -+ struct bch_hash_info root_hash_info; -+ struct qstr lostfound_str = QSTR("lost+found"); -+ subvol_inum root_inum = { .subvol = subvol }; -+ u64 inum = 0; -+ unsigned d_type = 0; -+ u32 snapshot; -+ int ret; -+ -+ ret = __subvol_lookup(trans, subvol, &snapshot, &root_inum.inum); -+ if (ret) -+ return ret; -+ -+ ret = __lookup_inode(trans, root_inum.inum, &root, &snapshot); -+ if (ret) -+ return ret; -+ -+ root_hash_info = bch2_hash_info_init(c, &root); -+ -+ ret = __lookup_dirent(trans, root_hash_info, root_inum, -+ &lostfound_str, &inum, &d_type); -+ if (bch2_err_matches(ret, ENOENT)) { -+ bch_notice(c, "creating lost+found"); -+ goto create_lostfound; -+ } -+ -+ bch_err_fn(c, ret); -+ if (ret) -+ return ret; -+ -+ if (d_type != DT_DIR) { -+ bch_err(c, "error looking up lost+found: not a directory"); -+ return -BCH_ERR_ENOENT_not_directory; -+ } -+ -+ /* -+ * The bch2_check_dirents pass has already run, dangling dirents -+ * shouldn't exist here: -+ */ -+ return __lookup_inode(trans, inum, lostfound, &snapshot); -+ -+create_lostfound: -+ bch2_inode_init_early(c, lostfound); -+ -+ ret = bch2_create_trans(trans, root_inum, &root, -+ lostfound, &lostfound_str, -+ 0, 0, S_IFDIR|0700, 0, NULL, NULL, -+ (subvol_inum) { }, 0); -+ bch_err_msg(c, ret, "creating lost+found"); -+ return ret; -+} -+ -+static int __reattach_inode(struct btree_trans *trans, -+ struct bch_inode_unpacked *inode, -+ u32 inode_snapshot) -+{ -+ struct bch_hash_info dir_hash; -+ struct bch_inode_unpacked lostfound; -+ char name_buf[20]; -+ struct qstr name; -+ u64 dir_offset = 0; -+ u32 subvol; -+ int ret; -+ -+ ret = __snapshot_lookup_subvol(trans, inode_snapshot, &subvol); -+ if (ret) -+ return ret; -+ -+ ret = lookup_lostfound(trans, subvol, &lostfound); -+ if (ret) -+ return ret; -+ -+ if (S_ISDIR(inode->bi_mode)) { -+ lostfound.bi_nlink++; -+ -+ ret = __write_inode(trans, &lostfound, U32_MAX); -+ if (ret) -+ return ret; -+ } -+ -+ dir_hash = bch2_hash_info_init(trans->c, &lostfound); -+ -+ snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); -+ name = (struct qstr) QSTR(name_buf); -+ -+ ret = bch2_dirent_create(trans, -+ (subvol_inum) { -+ .subvol = subvol, -+ .inum = lostfound.bi_inum, -+ }, -+ &dir_hash, -+ inode_d_type(inode), -+ &name, inode->bi_inum, &dir_offset, -+ BCH_HASH_SET_MUST_CREATE); -+ if (ret) -+ return ret; -+ -+ inode->bi_dir = lostfound.bi_inum; -+ inode->bi_dir_offset = dir_offset; -+ -+ return __write_inode(trans, inode, inode_snapshot); -+} -+ -+static int reattach_inode(struct btree_trans *trans, -+ struct bch_inode_unpacked *inode, -+ u32 inode_snapshot) -+{ -+ int ret = commit_do(trans, NULL, NULL, -+ BTREE_INSERT_LAZY_RW| -+ BTREE_INSERT_NOFAIL, -+ __reattach_inode(trans, inode, inode_snapshot)); -+ bch_err_msg(trans->c, ret, "reattaching inode %llu", inode->bi_inum); -+ return ret; -+} -+ -+static int remove_backpointer(struct btree_trans *trans, -+ struct bch_inode_unpacked *inode) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c_dirent d; -+ int ret; -+ -+ d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, -+ POS(inode->bi_dir, inode->bi_dir_offset), 0, -+ dirent); -+ ret = bkey_err(d) ?: -+ __remove_dirent(trans, d.k->p); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+struct snapshots_seen_entry { -+ u32 id; -+ u32 equiv; -+}; -+ -+struct snapshots_seen { -+ struct bpos pos; -+ DARRAY(struct snapshots_seen_entry) ids; -+}; -+ -+static inline void snapshots_seen_exit(struct snapshots_seen *s) -+{ -+ darray_exit(&s->ids); -+} -+ -+static inline void snapshots_seen_init(struct snapshots_seen *s) -+{ -+ memset(s, 0, sizeof(*s)); -+} -+ -+static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) -+{ -+ struct snapshots_seen_entry *i, n = { -+ .id = id, -+ .equiv = bch2_snapshot_equiv(c, id), -+ }; -+ int ret = 0; -+ -+ darray_for_each(s->ids, i) { -+ if (i->id == id) -+ return 0; -+ if (i->id > id) -+ break; -+ } -+ -+ ret = darray_insert_item(&s->ids, i - s->ids.data, n); -+ if (ret) -+ bch_err(c, "error reallocating snapshots_seen table (size %zu)", -+ s->ids.size); -+ return ret; -+} -+ -+static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, -+ enum btree_id btree_id, struct bpos pos) -+{ -+ struct snapshots_seen_entry *i, n = { -+ .id = pos.snapshot, -+ .equiv = bch2_snapshot_equiv(c, pos.snapshot), -+ }; -+ int ret = 0; -+ -+ if (!bkey_eq(s->pos, pos)) -+ s->ids.nr = 0; -+ -+ s->pos = pos; -+ s->pos.snapshot = n.equiv; -+ -+ darray_for_each(s->ids, i) { -+ if (i->id == n.id) -+ return 0; -+ -+ /* -+ * We currently don't rigorously track for snapshot cleanup -+ * needing to be run, so it shouldn't be a fsck error yet: -+ */ -+ if (i->equiv == n.equiv) { -+ bch_err(c, "snapshot deletion did not finish:\n" -+ " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n", -+ bch2_btree_id_str(btree_id), -+ pos.inode, pos.offset, -+ i->id, n.id, n.equiv); -+ set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); -+ return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_delete_dead_snapshots); -+ } -+ } -+ -+ ret = darray_push(&s->ids, n); -+ if (ret) -+ bch_err(c, "error reallocating snapshots_seen table (size %zu)", -+ s->ids.size); -+ return ret; -+} -+ -+/** -+ * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor, -+ * and @ancestor hasn't been overwritten in @seen -+ * -+ * @c: filesystem handle -+ * @seen: list of snapshot ids already seen at current position -+ * @id: descendent snapshot id -+ * @ancestor: ancestor snapshot id -+ * -+ * Returns: whether key in @ancestor snapshot is visible in @id snapshot -+ */ -+static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, -+ u32 id, u32 ancestor) -+{ -+ ssize_t i; -+ -+ EBUG_ON(id > ancestor); -+ EBUG_ON(!bch2_snapshot_is_equiv(c, id)); -+ EBUG_ON(!bch2_snapshot_is_equiv(c, ancestor)); -+ -+ /* @ancestor should be the snapshot most recently added to @seen */ -+ EBUG_ON(ancestor != seen->pos.snapshot); -+ EBUG_ON(ancestor != seen->ids.data[seen->ids.nr - 1].equiv); -+ -+ if (id == ancestor) -+ return true; -+ -+ if (!bch2_snapshot_is_ancestor(c, id, ancestor)) -+ return false; -+ -+ /* -+ * We know that @id is a descendant of @ancestor, we're checking if -+ * we've seen a key that overwrote @ancestor - i.e. also a descendent of -+ * @ascestor and with @id as a descendent. -+ * -+ * But we already know that we're scanning IDs between @id and @ancestor -+ * numerically, since snapshot ID lists are kept sorted, so if we find -+ * an id that's an ancestor of @id we're done: -+ */ -+ -+ for (i = seen->ids.nr - 2; -+ i >= 0 && seen->ids.data[i].equiv >= id; -+ --i) -+ if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i].equiv)) -+ return false; -+ -+ return true; -+} -+ -+/** -+ * ref_visible - given a key with snapshot id @src that points to a key with -+ * snapshot id @dst, test whether there is some snapshot in which @dst is -+ * visible. -+ * -+ * @c: filesystem handle -+ * @s: list of snapshot IDs already seen at @src -+ * @src: snapshot ID of src key -+ * @dst: snapshot ID of dst key -+ * Returns: true if there is some snapshot in which @dst is visible -+ * -+ * Assumes we're visiting @src keys in natural key order -+ */ -+static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s, -+ u32 src, u32 dst) -+{ -+ return dst <= src -+ ? key_visible_in_snapshot(c, s, dst, src) -+ : bch2_snapshot_is_ancestor(c, src, dst); -+} -+ -+static int ref_visible2(struct bch_fs *c, -+ u32 src, struct snapshots_seen *src_seen, -+ u32 dst, struct snapshots_seen *dst_seen) -+{ -+ src = bch2_snapshot_equiv(c, src); -+ dst = bch2_snapshot_equiv(c, dst); -+ -+ if (dst > src) { -+ swap(dst, src); -+ swap(dst_seen, src_seen); -+ } -+ return key_visible_in_snapshot(c, src_seen, dst, src); -+} -+ -+#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ -+ for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ -+ (_i)->snapshot <= (_snapshot); _i++) \ -+ if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) -+ -+struct inode_walker_entry { -+ struct bch_inode_unpacked inode; -+ u32 snapshot; -+ bool seen_this_pos; -+ u64 count; -+}; -+ -+struct inode_walker { -+ bool first_this_inode; -+ bool recalculate_sums; -+ struct bpos last_pos; -+ -+ DARRAY(struct inode_walker_entry) inodes; -+}; -+ -+static void inode_walker_exit(struct inode_walker *w) -+{ -+ darray_exit(&w->inodes); -+} -+ -+static struct inode_walker inode_walker_init(void) -+{ -+ return (struct inode_walker) { 0, }; -+} -+ -+static int add_inode(struct bch_fs *c, struct inode_walker *w, -+ struct bkey_s_c inode) -+{ -+ struct bch_inode_unpacked u; -+ -+ BUG_ON(bch2_inode_unpack(inode, &u)); -+ -+ return darray_push(&w->inodes, ((struct inode_walker_entry) { -+ .inode = u, -+ .snapshot = bch2_snapshot_equiv(c, inode.k->p.snapshot), -+ })); -+} -+ -+static int get_inodes_all_snapshots(struct btree_trans *trans, -+ struct inode_walker *w, u64 inum) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ u32 restart_count = trans->restart_count; -+ int ret; -+ -+ w->recalculate_sums = false; -+ w->inodes.nr = 0; -+ -+ for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum), -+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) { -+ if (k.k->p.offset != inum) -+ break; -+ -+ if (bkey_is_inode(k.k)) -+ add_inode(c, w, k); -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (ret) -+ return ret; -+ -+ w->first_this_inode = true; -+ -+ return trans_was_restarted(trans, restart_count); -+} -+ -+static struct inode_walker_entry * -+lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, -+ u32 snapshot, bool is_whiteout) -+{ -+ struct inode_walker_entry *i; -+ -+ snapshot = bch2_snapshot_equiv(c, snapshot); -+ -+ darray_for_each(w->inodes, i) -+ if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot)) -+ goto found; -+ -+ return NULL; -+found: -+ BUG_ON(snapshot > i->snapshot); -+ -+ if (snapshot != i->snapshot && !is_whiteout) { -+ struct inode_walker_entry new = *i; -+ size_t pos; -+ int ret; -+ -+ new.snapshot = snapshot; -+ new.count = 0; -+ -+ bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u", -+ w->last_pos.inode, snapshot, i->snapshot); -+ -+ while (i > w->inodes.data && i[-1].snapshot > snapshot) -+ --i; -+ -+ pos = i - w->inodes.data; -+ ret = darray_insert_item(&w->inodes, pos, new); -+ if (ret) -+ return ERR_PTR(ret); -+ -+ i = w->inodes.data + pos; -+ } -+ -+ return i; -+} -+ -+static struct inode_walker_entry *walk_inode(struct btree_trans *trans, -+ struct inode_walker *w, struct bpos pos, -+ bool is_whiteout) -+{ -+ if (w->last_pos.inode != pos.inode) { -+ int ret = get_inodes_all_snapshots(trans, w, pos.inode); -+ if (ret) -+ return ERR_PTR(ret); -+ } else if (bkey_cmp(w->last_pos, pos)) { -+ struct inode_walker_entry *i; -+ -+ darray_for_each(w->inodes, i) -+ i->seen_this_pos = false; -+ -+ } -+ -+ w->last_pos = pos; -+ -+ return lookup_inode_for_snapshot(trans->c, w, pos.snapshot, is_whiteout); -+} -+ -+static int __get_visible_inodes(struct btree_trans *trans, -+ struct inode_walker *w, -+ struct snapshots_seen *s, -+ u64 inum) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ w->inodes.nr = 0; -+ -+ for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), -+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) { -+ u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); -+ -+ if (k.k->p.offset != inum) -+ break; -+ -+ if (!ref_visible(c, s, s->pos.snapshot, equiv)) -+ continue; -+ -+ if (bkey_is_inode(k.k)) -+ add_inode(c, w, k); -+ -+ if (equiv >= s->pos.snapshot) -+ break; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ return ret; -+} -+ -+static int check_key_has_snapshot(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c, -+ bkey_in_missing_snapshot, -+ "key in missing snapshot: %s", -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -+ ret = bch2_btree_delete_at(trans, iter, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1; -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static int hash_redo_key(struct btree_trans *trans, -+ const struct bch_hash_desc desc, -+ struct bch_hash_info *hash_info, -+ struct btree_iter *k_iter, struct bkey_s_c k) -+{ -+ struct bkey_i *delete; -+ struct bkey_i *tmp; -+ -+ delete = bch2_trans_kmalloc(trans, sizeof(*delete)); -+ if (IS_ERR(delete)) -+ return PTR_ERR(delete); -+ -+ tmp = bch2_bkey_make_mut_noupdate(trans, k); -+ if (IS_ERR(tmp)) -+ return PTR_ERR(tmp); -+ -+ bkey_init(&delete->k); -+ delete->k.p = k_iter->pos; -+ return bch2_btree_iter_traverse(k_iter) ?: -+ bch2_trans_update(trans, k_iter, delete, 0) ?: -+ bch2_hash_set_snapshot(trans, desc, hash_info, -+ (subvol_inum) { 0, k.k->p.inode }, -+ k.k->p.snapshot, tmp, -+ BCH_HASH_SET_MUST_CREATE, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL| -+ BTREE_INSERT_LAZY_RW); -+} -+ -+static int hash_check_key(struct btree_trans *trans, -+ const struct bch_hash_desc desc, -+ struct bch_hash_info *hash_info, -+ struct btree_iter *k_iter, struct bkey_s_c hash_k) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter = { NULL }; -+ struct printbuf buf = PRINTBUF; -+ struct bkey_s_c k; -+ u64 hash; -+ int ret = 0; -+ -+ if (hash_k.k->type != desc.key_type) -+ return 0; -+ -+ hash = desc.hash_bkey(hash_info, hash_k); -+ -+ if (likely(hash == hash_k.k->p.offset)) -+ return 0; -+ -+ if (hash_k.k->p.offset < hash) -+ goto bad_hash; -+ -+ for_each_btree_key_norestart(trans, iter, desc.btree_id, -+ SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), -+ BTREE_ITER_SLOTS, k, ret) { -+ if (bkey_eq(k.k->p, hash_k.k->p)) -+ break; -+ -+ if (fsck_err_on(k.k->type == desc.key_type && -+ !desc.cmp_bkey(k, hash_k), c, -+ hash_table_key_duplicate, -+ "duplicate hash table keys:\n%s", -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, hash_k), -+ buf.buf))) { -+ ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1; -+ break; -+ } -+ -+ if (bkey_deleted(k.k)) { -+ bch2_trans_iter_exit(trans, &iter); -+ goto bad_hash; -+ } -+ } -+out: -+ bch2_trans_iter_exit(trans, &iter); -+ printbuf_exit(&buf); -+ return ret; -+bad_hash: -+ if (fsck_err(c, hash_table_key_wrong_offset, -+ "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", -+ bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { -+ ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); -+ bch_err_fn(c, ret); -+ if (ret) -+ return ret; -+ ret = -BCH_ERR_transaction_restart_nested; -+ } -+fsck_err: -+ goto out; -+} -+ -+static int check_inode(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k, -+ struct bch_inode_unpacked *prev, -+ struct snapshots_seen *s, -+ bool full) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_inode_unpacked u; -+ bool do_update = false; -+ int ret; -+ -+ ret = check_key_has_snapshot(trans, iter, k); -+ if (ret < 0) -+ goto err; -+ if (ret) -+ return 0; -+ -+ ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); -+ if (ret) -+ goto err; -+ -+ if (!bkey_is_inode(k.k)) -+ return 0; -+ -+ BUG_ON(bch2_inode_unpack(k, &u)); -+ -+ if (!full && -+ !(u.bi_flags & (BCH_INODE_i_size_dirty| -+ BCH_INODE_i_sectors_dirty| -+ BCH_INODE_unlinked))) -+ return 0; -+ -+ if (prev->bi_inum != u.bi_inum) -+ *prev = u; -+ -+ if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed || -+ inode_d_type(prev) != inode_d_type(&u), -+ c, inode_snapshot_mismatch, -+ "inodes in different snapshots don't match")) { -+ bch_err(c, "repair not implemented yet"); -+ return -EINVAL; -+ } -+ -+ if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && -+ bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { -+ struct bpos new_min_pos; -+ -+ ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos); -+ if (ret) -+ goto err; -+ -+ u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; -+ -+ ret = __write_inode(trans, &u, iter->pos.snapshot); -+ bch_err_msg(c, ret, "in fsck updating inode"); -+ if (ret) -+ return ret; -+ -+ if (!bpos_eq(new_min_pos, POS_MIN)) -+ bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); -+ return 0; -+ } -+ -+ if (u.bi_flags & BCH_INODE_unlinked && -+ (!c->sb.clean || -+ fsck_err(c, inode_unlinked_but_clean, -+ "filesystem marked clean, but inode %llu unlinked", -+ u.bi_inum))) { -+ bch2_trans_unlock(trans); -+ bch2_fs_lazy_rw(c); -+ -+ ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); -+ bch_err_msg(c, ret, "in fsck deleting inode"); -+ return ret; -+ } -+ -+ if (u.bi_flags & BCH_INODE_i_size_dirty && -+ (!c->sb.clean || -+ fsck_err(c, inode_i_size_dirty_but_clean, -+ "filesystem marked clean, but inode %llu has i_size dirty", -+ u.bi_inum))) { -+ bch_verbose(c, "truncating inode %llu", u.bi_inum); -+ -+ bch2_trans_unlock(trans); -+ bch2_fs_lazy_rw(c); -+ -+ /* -+ * XXX: need to truncate partial blocks too here - or ideally -+ * just switch units to bytes and that issue goes away -+ */ -+ ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, -+ SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9, -+ iter->pos.snapshot), -+ POS(u.bi_inum, U64_MAX), -+ 0, NULL); -+ bch_err_msg(c, ret, "in fsck truncating inode"); -+ if (ret) -+ return ret; -+ -+ /* -+ * We truncated without our normal sector accounting hook, just -+ * make sure we recalculate it: -+ */ -+ u.bi_flags |= BCH_INODE_i_sectors_dirty; -+ -+ u.bi_flags &= ~BCH_INODE_i_size_dirty; -+ do_update = true; -+ } -+ -+ if (u.bi_flags & BCH_INODE_i_sectors_dirty && -+ (!c->sb.clean || -+ fsck_err(c, inode_i_sectors_dirty_but_clean, -+ "filesystem marked clean, but inode %llu has i_sectors dirty", -+ u.bi_inum))) { -+ s64 sectors; -+ -+ bch_verbose(c, "recounting sectors for inode %llu", -+ u.bi_inum); -+ -+ sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot); -+ if (sectors < 0) { -+ bch_err_msg(c, sectors, "in fsck recounting inode sectors"); -+ return sectors; -+ } -+ -+ u.bi_sectors = sectors; -+ u.bi_flags &= ~BCH_INODE_i_sectors_dirty; -+ do_update = true; -+ } -+ -+ if (u.bi_flags & BCH_INODE_backptr_untrusted) { -+ u.bi_dir = 0; -+ u.bi_dir_offset = 0; -+ u.bi_flags &= ~BCH_INODE_backptr_untrusted; -+ do_update = true; -+ } -+ -+ if (do_update) { -+ ret = __write_inode(trans, &u, iter->pos.snapshot); -+ bch_err_msg(c, ret, "in fsck updating inode"); -+ if (ret) -+ return ret; -+ } -+err: -+fsck_err: -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+noinline_for_stack -+int bch2_check_inodes(struct bch_fs *c) -+{ -+ bool full = c->opts.fsck; -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bch_inode_unpacked prev = { 0 }; -+ struct snapshots_seen s; -+ struct bkey_s_c k; -+ int ret; -+ -+ snapshots_seen_init(&s); -+ -+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, -+ POS_MIN, -+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, -+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, -+ check_inode(trans, &iter, k, &prev, &s, full)); -+ -+ snapshots_seen_exit(&s); -+ bch2_trans_put(trans); -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bpos pos) -+{ -+ return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); -+} -+ -+static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, -+ struct bkey_s_c_dirent d) -+{ -+ return inode->bi_dir == d.k->p.inode && -+ inode->bi_dir_offset == d.k->p.offset; -+} -+ -+static bool dirent_points_to_inode(struct bkey_s_c_dirent d, -+ struct bch_inode_unpacked *inode) -+{ -+ return d.v->d_type == DT_SUBVOL -+ ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol -+ : le64_to_cpu(d.v->d_inum) == inode->bi_inum; -+} -+ -+static int inode_backpointer_exists(struct btree_trans *trans, -+ struct bch_inode_unpacked *inode, -+ u32 snapshot) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c_dirent d; -+ int ret; -+ -+ d = dirent_get_by_pos(trans, &iter, -+ SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot)); -+ ret = bkey_err(d); -+ if (ret) -+ return bch2_err_matches(ret, ENOENT) ? 0 : ret; -+ -+ ret = dirent_points_to_inode(d, inode); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) -+{ -+ struct bch_fs *c = trans->c; -+ struct inode_walker_entry *i; -+ u32 restart_count = trans->restart_count; -+ int ret = 0; -+ s64 count2; -+ -+ darray_for_each(w->inodes, i) { -+ if (i->inode.bi_sectors == i->count) -+ continue; -+ -+ count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot); -+ -+ if (w->recalculate_sums) -+ i->count = count2; -+ -+ if (i->count != count2) { -+ bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", -+ w->last_pos.inode, i->snapshot, i->count, count2); -+ return -BCH_ERR_internal_fsck_err; -+ } -+ -+ if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), -+ c, inode_i_sectors_wrong, -+ "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", -+ w->last_pos.inode, i->snapshot, -+ i->inode.bi_sectors, i->count)) { -+ i->inode.bi_sectors = i->count; -+ ret = fsck_write_inode(trans, &i->inode, i->snapshot); -+ if (ret) -+ break; -+ } -+ } -+fsck_err: -+ bch_err_fn(c, ret); -+ return ret ?: trans_was_restarted(trans, restart_count); -+} -+ -+struct extent_end { -+ u32 snapshot; -+ u64 offset; -+ struct snapshots_seen seen; -+}; -+ -+struct extent_ends { -+ struct bpos last_pos; -+ DARRAY(struct extent_end) e; -+}; -+ -+static void extent_ends_reset(struct extent_ends *extent_ends) -+{ -+ struct extent_end *i; -+ -+ darray_for_each(extent_ends->e, i) -+ snapshots_seen_exit(&i->seen); -+ -+ extent_ends->e.nr = 0; -+} -+ -+static void extent_ends_exit(struct extent_ends *extent_ends) -+{ -+ extent_ends_reset(extent_ends); -+ darray_exit(&extent_ends->e); -+} -+ -+static void extent_ends_init(struct extent_ends *extent_ends) -+{ -+ memset(extent_ends, 0, sizeof(*extent_ends)); -+} -+ -+static int extent_ends_at(struct bch_fs *c, -+ struct extent_ends *extent_ends, -+ struct snapshots_seen *seen, -+ struct bkey_s_c k) -+{ -+ struct extent_end *i, n = (struct extent_end) { -+ .offset = k.k->p.offset, -+ .snapshot = k.k->p.snapshot, -+ .seen = *seen, -+ }; -+ -+ n.seen.ids.data = kmemdup(seen->ids.data, -+ sizeof(seen->ids.data[0]) * seen->ids.size, -+ GFP_KERNEL); -+ if (!n.seen.ids.data) -+ return -BCH_ERR_ENOMEM_fsck_extent_ends_at; -+ -+ darray_for_each(extent_ends->e, i) { -+ if (i->snapshot == k.k->p.snapshot) { -+ snapshots_seen_exit(&i->seen); -+ *i = n; -+ return 0; -+ } -+ -+ if (i->snapshot >= k.k->p.snapshot) -+ break; -+ } -+ -+ return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n); -+} -+ -+static int overlapping_extents_found(struct btree_trans *trans, -+ enum btree_id btree, -+ struct bpos pos1, struct snapshots_seen *pos1_seen, -+ struct bkey pos2, -+ bool *fixed, -+ struct extent_end *extent_end) -+{ -+ struct bch_fs *c = trans->c; -+ struct printbuf buf = PRINTBUF; -+ struct btree_iter iter1, iter2 = { NULL }; -+ struct bkey_s_c k1, k2; -+ int ret; -+ -+ BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); -+ -+ bch2_trans_iter_init(trans, &iter1, btree, pos1, -+ BTREE_ITER_ALL_SNAPSHOTS| -+ BTREE_ITER_NOT_EXTENTS); -+ k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX)); -+ ret = bkey_err(k1); -+ if (ret) -+ goto err; -+ -+ prt_str(&buf, "\n "); -+ bch2_bkey_val_to_text(&buf, c, k1); -+ -+ if (!bpos_eq(pos1, k1.k->p)) { -+ prt_str(&buf, "\n wanted\n "); -+ bch2_bpos_to_text(&buf, pos1); -+ prt_str(&buf, "\n "); -+ bch2_bkey_to_text(&buf, &pos2); -+ -+ bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", -+ __func__, buf.buf); -+ ret = -BCH_ERR_internal_fsck_err; -+ goto err; -+ } -+ -+ bch2_trans_copy_iter(&iter2, &iter1); -+ -+ while (1) { -+ bch2_btree_iter_advance(&iter2); -+ -+ k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX)); -+ ret = bkey_err(k2); -+ if (ret) -+ goto err; -+ -+ if (bpos_ge(k2.k->p, pos2.p)) -+ break; -+ } -+ -+ prt_str(&buf, "\n "); -+ bch2_bkey_val_to_text(&buf, c, k2); -+ -+ if (bpos_gt(k2.k->p, pos2.p) || -+ pos2.size != k2.k->size) { -+ bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", -+ __func__, buf.buf); -+ ret = -BCH_ERR_internal_fsck_err; -+ goto err; -+ } -+ -+ prt_printf(&buf, "\n overwriting %s extent", -+ pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); -+ -+ if (fsck_err(c, extent_overlapping, -+ "overlapping extents%s", buf.buf)) { -+ struct btree_iter *old_iter = &iter1; -+ struct disk_reservation res = { 0 }; -+ -+ if (pos1.snapshot < pos2.p.snapshot) { -+ old_iter = &iter2; -+ swap(k1, k2); -+ } -+ -+ trans->extra_journal_res += bch2_bkey_sectors_compressed(k2); -+ -+ ret = bch2_trans_update_extent_overwrite(trans, old_iter, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE, -+ k1, k2) ?: -+ bch2_trans_commit(trans, &res, NULL, -+ BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL); -+ bch2_disk_reservation_put(c, &res); -+ -+ if (ret) -+ goto err; -+ -+ *fixed = true; -+ -+ if (pos1.snapshot == pos2.p.snapshot) { -+ /* -+ * We overwrote the first extent, and did the overwrite -+ * in the same snapshot: -+ */ -+ extent_end->offset = bkey_start_offset(&pos2); -+ } else if (pos1.snapshot > pos2.p.snapshot) { -+ /* -+ * We overwrote the first extent in pos2's snapshot: -+ */ -+ ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot); -+ } else { -+ /* -+ * We overwrote the second extent - restart -+ * check_extent() from the top: -+ */ -+ ret = -BCH_ERR_transaction_restart_nested; -+ } -+ } -+fsck_err: -+err: -+ bch2_trans_iter_exit(trans, &iter2); -+ bch2_trans_iter_exit(trans, &iter1); -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static int check_overlapping_extents(struct btree_trans *trans, -+ struct snapshots_seen *seen, -+ struct extent_ends *extent_ends, -+ struct bkey_s_c k, -+ u32 equiv, -+ struct btree_iter *iter, -+ bool *fixed) -+{ -+ struct bch_fs *c = trans->c; -+ struct extent_end *i; -+ int ret = 0; -+ -+ /* transaction restart, running again */ -+ if (bpos_eq(extent_ends->last_pos, k.k->p)) -+ return 0; -+ -+ if (extent_ends->last_pos.inode != k.k->p.inode) -+ extent_ends_reset(extent_ends); -+ -+ darray_for_each(extent_ends->e, i) { -+ if (i->offset <= bkey_start_offset(k.k)) -+ continue; -+ -+ if (!ref_visible2(c, -+ k.k->p.snapshot, seen, -+ i->snapshot, &i->seen)) -+ continue; -+ -+ ret = overlapping_extents_found(trans, iter->btree_id, -+ SPOS(iter->pos.inode, -+ i->offset, -+ i->snapshot), -+ &i->seen, -+ *k.k, fixed, i); -+ if (ret) -+ goto err; -+ } -+ -+ ret = extent_ends_at(c, extent_ends, seen, k); -+ if (ret) -+ goto err; -+ -+ extent_ends->last_pos = k.k->p; -+err: -+ return ret; -+} -+ -+static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ struct bch_extent_crc_unpacked crc; -+ const union bch_extent_entry *i; -+ unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9; -+ -+ bkey_for_each_crc(k.k, ptrs, crc, i) -+ if (crc_is_encoded(crc) && -+ crc.uncompressed_size > encoded_extent_max_sectors) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, k); -+ bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); -+ printbuf_exit(&buf); -+ } -+ -+ return 0; -+} -+ -+static int check_extent(struct btree_trans *trans, struct btree_iter *iter, -+ struct bkey_s_c k, -+ struct inode_walker *inode, -+ struct snapshots_seen *s, -+ struct extent_ends *extent_ends) -+{ -+ struct bch_fs *c = trans->c; -+ struct inode_walker_entry *i; -+ struct printbuf buf = PRINTBUF; -+ struct bpos equiv = k.k->p; -+ int ret = 0; -+ -+ equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot); -+ -+ ret = check_key_has_snapshot(trans, iter, k); -+ if (ret) { -+ ret = ret < 0 ? ret : 0; -+ goto out; -+ } -+ -+ if (inode->last_pos.inode != k.k->p.inode) { -+ ret = check_i_sectors(trans, inode); -+ if (ret) -+ goto err; -+ } -+ -+ i = walk_inode(trans, inode, equiv, k.k->type == KEY_TYPE_whiteout); -+ ret = PTR_ERR_OR_ZERO(i); -+ if (ret) -+ goto err; -+ -+ ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); -+ if (ret) -+ goto err; -+ -+ if (k.k->type != KEY_TYPE_whiteout) { -+ if (fsck_err_on(!i, c, extent_in_missing_inode, -+ "extent in missing inode:\n %s", -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -+ goto delete; -+ -+ if (fsck_err_on(i && -+ !S_ISREG(i->inode.bi_mode) && -+ !S_ISLNK(i->inode.bi_mode), -+ c, extent_in_non_reg_inode, -+ "extent in non regular inode mode %o:\n %s", -+ i->inode.bi_mode, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) -+ goto delete; -+ -+ ret = check_overlapping_extents(trans, s, extent_ends, k, -+ equiv.snapshot, iter, -+ &inode->recalculate_sums); -+ if (ret) -+ goto err; -+ } -+ -+ /* -+ * Check inodes in reverse order, from oldest snapshots to newest, -+ * starting from the inode that matches this extent's snapshot. If we -+ * didn't have one, iterate over all inodes: -+ */ -+ if (!i) -+ i = inode->inodes.data + inode->inodes.nr - 1; -+ -+ for (; -+ inode->inodes.data && i >= inode->inodes.data; -+ --i) { -+ if (i->snapshot > equiv.snapshot || -+ !key_visible_in_snapshot(c, s, i->snapshot, equiv.snapshot)) -+ continue; -+ -+ if (k.k->type != KEY_TYPE_whiteout) { -+ if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && -+ k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && -+ !bkey_extent_is_reservation(k), -+ c, extent_past_end_of_inode, -+ "extent type past end of inode %llu:%u, i_size %llu\n %s", -+ i->inode.bi_inum, i->snapshot, i->inode.bi_size, -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -+ struct btree_iter iter2; -+ -+ bch2_trans_copy_iter(&iter2, iter); -+ bch2_btree_iter_set_snapshot(&iter2, i->snapshot); -+ ret = bch2_btree_iter_traverse(&iter2) ?: -+ bch2_btree_delete_at(trans, &iter2, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+ bch2_trans_iter_exit(trans, &iter2); -+ if (ret) -+ goto err; -+ -+ iter->k.type = KEY_TYPE_whiteout; -+ } -+ -+ if (bkey_extent_is_allocation(k.k)) -+ i->count += k.k->size; -+ } -+ -+ i->seen_this_pos = true; -+ } -+out: -+err: -+fsck_err: -+ printbuf_exit(&buf); -+ bch_err_fn(c, ret); -+ return ret; -+delete: -+ ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+ goto out; -+} -+ -+/* -+ * Walk extents: verify that extents have a corresponding S_ISREG inode, and -+ * that i_size an i_sectors are consistent -+ */ -+int bch2_check_extents(struct bch_fs *c) -+{ -+ struct inode_walker w = inode_walker_init(); -+ struct snapshots_seen s; -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct extent_ends extent_ends; -+ struct disk_reservation res = { 0 }; -+ int ret = 0; -+ -+ snapshots_seen_init(&s); -+ extent_ends_init(&extent_ends); -+ -+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_extents, -+ POS(BCACHEFS_ROOT_INO, 0), -+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, -+ &res, NULL, -+ BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({ -+ bch2_disk_reservation_put(c, &res); -+ check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: -+ check_extent_overbig(trans, &iter, k); -+ })) ?: -+ check_i_sectors(trans, &w); -+ -+ bch2_disk_reservation_put(c, &res); -+ extent_ends_exit(&extent_ends); -+ inode_walker_exit(&w); -+ snapshots_seen_exit(&s); -+ bch2_trans_put(trans); -+ -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+int bch2_check_indirect_extents(struct bch_fs *c) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct disk_reservation res = { 0 }; -+ int ret = 0; -+ -+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, -+ POS_MIN, -+ BTREE_ITER_PREFETCH, k, -+ &res, NULL, -+ BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({ -+ bch2_disk_reservation_put(c, &res); -+ check_extent_overbig(trans, &iter, k); -+ })); -+ -+ bch2_disk_reservation_put(c, &res); -+ bch2_trans_put(trans); -+ -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) -+{ -+ struct bch_fs *c = trans->c; -+ struct inode_walker_entry *i; -+ u32 restart_count = trans->restart_count; -+ int ret = 0; -+ s64 count2; -+ -+ darray_for_each(w->inodes, i) { -+ if (i->inode.bi_nlink == i->count) -+ continue; -+ -+ count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot); -+ if (count2 < 0) -+ return count2; -+ -+ if (i->count != count2) { -+ bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu", -+ i->count, count2); -+ i->count = count2; -+ if (i->inode.bi_nlink == i->count) -+ continue; -+ } -+ -+ if (fsck_err_on(i->inode.bi_nlink != i->count, -+ c, inode_dir_wrong_nlink, -+ "directory %llu:%u with wrong i_nlink: got %u, should be %llu", -+ w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { -+ i->inode.bi_nlink = i->count; -+ ret = fsck_write_inode(trans, &i->inode, i->snapshot); -+ if (ret) -+ break; -+ } -+ } -+fsck_err: -+ bch_err_fn(c, ret); -+ return ret ?: trans_was_restarted(trans, restart_count); -+} -+ -+static int check_dirent_target(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c_dirent d, -+ struct bch_inode_unpacked *target, -+ u32 target_snapshot) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_i_dirent *n; -+ bool backpointer_exists = true; -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ if (!target->bi_dir && -+ !target->bi_dir_offset) { -+ target->bi_dir = d.k->p.inode; -+ target->bi_dir_offset = d.k->p.offset; -+ -+ ret = __write_inode(trans, target, target_snapshot); -+ if (ret) -+ goto err; -+ } -+ -+ if (!inode_points_to_dirent(target, d)) { -+ ret = inode_backpointer_exists(trans, target, d.k->p.snapshot); -+ if (ret < 0) -+ goto err; -+ -+ backpointer_exists = ret; -+ ret = 0; -+ -+ if (fsck_err_on(S_ISDIR(target->bi_mode) && backpointer_exists, -+ c, inode_dir_multiple_links, -+ "directory %llu with multiple links", -+ target->bi_inum)) { -+ ret = __remove_dirent(trans, d.k->p); -+ goto out; -+ } -+ -+ if (fsck_err_on(backpointer_exists && !target->bi_nlink, -+ c, inode_multiple_links_but_nlink_0, -+ "inode %llu type %s has multiple links but i_nlink 0", -+ target->bi_inum, bch2_d_types[d.v->d_type])) { -+ target->bi_nlink++; -+ target->bi_flags &= ~BCH_INODE_unlinked; -+ -+ ret = __write_inode(trans, target, target_snapshot); -+ if (ret) -+ goto err; -+ } -+ -+ if (fsck_err_on(!backpointer_exists, -+ c, inode_wrong_backpointer, -+ "inode %llu:%u has wrong backpointer:\n" -+ "got %llu:%llu\n" -+ "should be %llu:%llu", -+ target->bi_inum, target_snapshot, -+ target->bi_dir, -+ target->bi_dir_offset, -+ d.k->p.inode, -+ d.k->p.offset)) { -+ target->bi_dir = d.k->p.inode; -+ target->bi_dir_offset = d.k->p.offset; -+ -+ ret = __write_inode(trans, target, target_snapshot); -+ if (ret) -+ goto err; -+ } -+ } -+ -+ if (fsck_err_on(d.v->d_type != inode_d_type(target), -+ c, dirent_d_type_wrong, -+ "incorrect d_type: got %s, should be %s:\n%s", -+ bch2_d_type_str(d.v->d_type), -+ bch2_d_type_str(inode_d_type(target)), -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { -+ n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); -+ ret = PTR_ERR_OR_ZERO(n); -+ if (ret) -+ goto err; -+ -+ bkey_reassemble(&n->k_i, d.s_c); -+ n->v.d_type = inode_d_type(target); -+ -+ ret = bch2_trans_update(trans, iter, &n->k_i, 0); -+ if (ret) -+ goto err; -+ -+ d = dirent_i_to_s_c(n); -+ } -+ -+ if (d.v->d_type == DT_SUBVOL && -+ target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol) && -+ (c->sb.version < bcachefs_metadata_version_subvol_dirent || -+ fsck_err(c, dirent_d_parent_subvol_wrong, -+ "dirent has wrong d_parent_subvol field: got %u, should be %u", -+ le32_to_cpu(d.v->d_parent_subvol), -+ target->bi_parent_subvol))) { -+ n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); -+ ret = PTR_ERR_OR_ZERO(n); -+ if (ret) -+ goto err; -+ -+ bkey_reassemble(&n->k_i, d.s_c); -+ n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol); -+ -+ ret = bch2_trans_update(trans, iter, &n->k_i, 0); -+ if (ret) -+ goto err; -+ -+ d = dirent_i_to_s_c(n); -+ } -+out: -+err: -+fsck_err: -+ printbuf_exit(&buf); -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, -+ struct bkey_s_c k, -+ struct bch_hash_info *hash_info, -+ struct inode_walker *dir, -+ struct inode_walker *target, -+ struct snapshots_seen *s) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_s_c_dirent d; -+ struct inode_walker_entry *i; -+ struct printbuf buf = PRINTBUF; -+ struct bpos equiv; -+ int ret = 0; -+ -+ ret = check_key_has_snapshot(trans, iter, k); -+ if (ret) { -+ ret = ret < 0 ? ret : 0; -+ goto out; -+ } -+ -+ equiv = k.k->p; -+ equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot); -+ -+ ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); -+ if (ret) -+ goto err; -+ -+ if (k.k->type == KEY_TYPE_whiteout) -+ goto out; -+ -+ if (dir->last_pos.inode != k.k->p.inode) { -+ ret = check_subdir_count(trans, dir); -+ if (ret) -+ goto err; -+ } -+ -+ BUG_ON(!iter->path->should_be_locked); -+ -+ i = walk_inode(trans, dir, equiv, k.k->type == KEY_TYPE_whiteout); -+ ret = PTR_ERR_OR_ZERO(i); -+ if (ret < 0) -+ goto err; -+ -+ if (dir->first_this_inode && dir->inodes.nr) -+ *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode); -+ dir->first_this_inode = false; -+ -+ if (fsck_err_on(!i, c, dirent_in_missing_dir_inode, -+ "dirent in nonexisting directory:\n%s", -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -+ ret = bch2_btree_delete_at(trans, iter, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+ goto out; -+ } -+ -+ if (!i) -+ goto out; -+ -+ if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), -+ c, dirent_in_non_dir_inode, -+ "dirent in non directory inode type %s:\n%s", -+ bch2_d_type_str(inode_d_type(&i->inode)), -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -+ ret = bch2_btree_delete_at(trans, iter, 0); -+ goto out; -+ } -+ -+ ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); -+ if (ret < 0) -+ goto err; -+ if (ret) { -+ /* dirent has been deleted */ -+ ret = 0; -+ goto out; -+ } -+ -+ if (k.k->type != KEY_TYPE_dirent) -+ goto out; -+ -+ d = bkey_s_c_to_dirent(k); -+ -+ if (d.v->d_type == DT_SUBVOL) { -+ struct bch_inode_unpacked subvol_root; -+ u32 target_subvol = le32_to_cpu(d.v->d_child_subvol); -+ u32 target_snapshot; -+ u64 target_inum; -+ -+ ret = __subvol_lookup(trans, target_subvol, -+ &target_snapshot, &target_inum); -+ if (ret && !bch2_err_matches(ret, ENOENT)) -+ goto err; -+ -+ if (fsck_err_on(ret, c, dirent_to_missing_subvol, -+ "dirent points to missing subvolume %u", -+ le32_to_cpu(d.v->d_child_subvol))) { -+ ret = __remove_dirent(trans, d.k->p); -+ goto err; -+ } -+ -+ ret = __lookup_inode(trans, target_inum, -+ &subvol_root, &target_snapshot); -+ if (ret && !bch2_err_matches(ret, ENOENT)) -+ goto err; -+ -+ if (fsck_err_on(ret, c, subvol_to_missing_root, -+ "subvolume %u points to missing subvolume root %llu", -+ target_subvol, -+ target_inum)) { -+ bch_err(c, "repair not implemented yet"); -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ if (fsck_err_on(subvol_root.bi_subvol != target_subvol, -+ c, subvol_root_wrong_bi_subvol, -+ "subvol root %llu has wrong bi_subvol field: got %u, should be %u", -+ target_inum, -+ subvol_root.bi_subvol, target_subvol)) { -+ subvol_root.bi_subvol = target_subvol; -+ ret = __write_inode(trans, &subvol_root, target_snapshot); -+ if (ret) -+ goto err; -+ } -+ -+ ret = check_dirent_target(trans, iter, d, &subvol_root, -+ target_snapshot); -+ if (ret) -+ goto err; -+ } else { -+ ret = __get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); -+ if (ret) -+ goto err; -+ -+ if (fsck_err_on(!target->inodes.nr, -+ c, dirent_to_missing_inode, -+ "dirent points to missing inode: (equiv %u)\n%s", -+ equiv.snapshot, -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, k), -+ buf.buf))) { -+ ret = __remove_dirent(trans, d.k->p); -+ if (ret) -+ goto err; -+ } -+ -+ darray_for_each(target->inodes, i) { -+ ret = check_dirent_target(trans, iter, d, -+ &i->inode, i->snapshot); -+ if (ret) -+ goto err; -+ } -+ } -+ -+ if (d.v->d_type == DT_DIR) -+ for_each_visible_inode(c, s, dir, equiv.snapshot, i) -+ i->count++; -+ -+out: -+err: -+fsck_err: -+ printbuf_exit(&buf); -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+/* -+ * Walk dirents: verify that they all have a corresponding S_ISDIR inode, -+ * validate d_type -+ */ -+int bch2_check_dirents(struct bch_fs *c) -+{ -+ struct inode_walker dir = inode_walker_init(); -+ struct inode_walker target = inode_walker_init(); -+ struct snapshots_seen s; -+ struct bch_hash_info hash_info; -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ snapshots_seen_init(&s); -+ -+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, -+ POS(BCACHEFS_ROOT_INO, 0), -+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, -+ k, -+ NULL, NULL, -+ BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, -+ check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)); -+ -+ bch2_trans_put(trans); -+ snapshots_seen_exit(&s); -+ inode_walker_exit(&dir); -+ inode_walker_exit(&target); -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, -+ struct bkey_s_c k, -+ struct bch_hash_info *hash_info, -+ struct inode_walker *inode) -+{ -+ struct bch_fs *c = trans->c; -+ struct inode_walker_entry *i; -+ int ret; -+ -+ ret = check_key_has_snapshot(trans, iter, k); -+ if (ret) -+ return ret; -+ -+ i = walk_inode(trans, inode, k.k->p, k.k->type == KEY_TYPE_whiteout); -+ ret = PTR_ERR_OR_ZERO(i); -+ if (ret) -+ return ret; -+ -+ if (inode->first_this_inode && inode->inodes.nr) -+ *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode); -+ inode->first_this_inode = false; -+ -+ if (fsck_err_on(!i, c, xattr_in_missing_inode, -+ "xattr for missing inode %llu", -+ k.k->p.inode)) -+ return bch2_btree_delete_at(trans, iter, 0); -+ -+ if (!i) -+ return 0; -+ -+ ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); -+fsck_err: -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+/* -+ * Walk xattrs: verify that they all have a corresponding inode -+ */ -+int bch2_check_xattrs(struct bch_fs *c) -+{ -+ struct inode_walker inode = inode_walker_init(); -+ struct bch_hash_info hash_info; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ ret = bch2_trans_run(c, -+ for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, -+ POS(BCACHEFS_ROOT_INO, 0), -+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, -+ k, -+ NULL, NULL, -+ BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, -+ check_xattr(trans, &iter, k, &hash_info, &inode))); -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int check_root_trans(struct btree_trans *trans) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_inode_unpacked root_inode; -+ u32 snapshot; -+ u64 inum; -+ int ret; -+ -+ ret = __subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); -+ if (ret && !bch2_err_matches(ret, ENOENT)) -+ return ret; -+ -+ if (mustfix_fsck_err_on(ret, c, root_subvol_missing, -+ "root subvol missing")) { -+ struct bkey_i_subvolume root_subvol; -+ -+ snapshot = U32_MAX; -+ inum = BCACHEFS_ROOT_INO; -+ -+ bkey_subvolume_init(&root_subvol.k_i); -+ root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL; -+ root_subvol.v.flags = 0; -+ root_subvol.v.snapshot = cpu_to_le32(snapshot); -+ root_subvol.v.inode = cpu_to_le64(inum); -+ ret = commit_do(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL| -+ BTREE_INSERT_LAZY_RW, -+ bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, -+ &root_subvol.k_i, 0)); -+ bch_err_msg(c, ret, "writing root subvol"); -+ if (ret) -+ goto err; -+ -+ } -+ -+ ret = __lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); -+ if (ret && !bch2_err_matches(ret, ENOENT)) -+ return ret; -+ -+ if (mustfix_fsck_err_on(ret, c, root_dir_missing, -+ "root directory missing") || -+ mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), -+ c, root_inode_not_dir, -+ "root inode not a directory")) { -+ bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, -+ 0, NULL); -+ root_inode.bi_inum = inum; -+ -+ ret = __write_inode(trans, &root_inode, snapshot); -+ bch_err_msg(c, ret, "writing root inode"); -+ } -+err: -+fsck_err: -+ return ret; -+} -+ -+/* Get root directory, create if it doesn't exist: */ -+int bch2_check_root(struct bch_fs *c) -+{ -+ int ret; -+ -+ ret = bch2_trans_do(c, NULL, NULL, -+ BTREE_INSERT_NOFAIL| -+ BTREE_INSERT_LAZY_RW, -+ check_root_trans(trans)); -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+struct pathbuf_entry { -+ u64 inum; -+ u32 snapshot; -+}; -+ -+typedef DARRAY(struct pathbuf_entry) pathbuf; -+ -+static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) -+{ -+ struct pathbuf_entry *i; -+ -+ darray_for_each(*p, i) -+ if (i->inum == inum && -+ i->snapshot == snapshot) -+ return true; -+ -+ return false; -+} -+ -+static int path_down(struct bch_fs *c, pathbuf *p, -+ u64 inum, u32 snapshot) -+{ -+ int ret = darray_push(p, ((struct pathbuf_entry) { -+ .inum = inum, -+ .snapshot = snapshot, -+ })); -+ -+ if (ret) -+ bch_err(c, "fsck: error allocating memory for pathbuf, size %zu", -+ p->size); -+ return ret; -+} -+ -+/* -+ * Check that a given inode is reachable from the root: -+ * -+ * XXX: we should also be verifying that inodes are in the right subvolumes -+ */ -+static int check_path(struct btree_trans *trans, -+ pathbuf *p, -+ struct bch_inode_unpacked *inode, -+ u32 snapshot) -+{ -+ struct bch_fs *c = trans->c; -+ int ret = 0; -+ -+ snapshot = bch2_snapshot_equiv(c, snapshot); -+ p->nr = 0; -+ -+ while (!(inode->bi_inum == BCACHEFS_ROOT_INO && -+ inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) { -+ struct btree_iter dirent_iter; -+ struct bkey_s_c_dirent d; -+ u32 parent_snapshot = snapshot; -+ -+ if (inode->bi_subvol) { -+ u64 inum; -+ -+ ret = subvol_lookup(trans, inode->bi_parent_subvol, -+ &parent_snapshot, &inum); -+ if (ret) -+ break; -+ } -+ -+ ret = lockrestart_do(trans, -+ PTR_ERR_OR_ZERO((d = dirent_get_by_pos(trans, &dirent_iter, -+ SPOS(inode->bi_dir, inode->bi_dir_offset, -+ parent_snapshot))).k)); -+ if (ret && !bch2_err_matches(ret, ENOENT)) -+ break; -+ -+ if (!ret && !dirent_points_to_inode(d, inode)) { -+ bch2_trans_iter_exit(trans, &dirent_iter); -+ ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; -+ } -+ -+ if (bch2_err_matches(ret, ENOENT)) { -+ if (fsck_err(c, inode_unreachable, -+ "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu", -+ inode->bi_inum, snapshot, -+ bch2_d_type_str(inode_d_type(inode)), -+ inode->bi_nlink, -+ inode->bi_dir, -+ inode->bi_dir_offset)) -+ ret = reattach_inode(trans, inode, snapshot); -+ break; -+ } -+ -+ bch2_trans_iter_exit(trans, &dirent_iter); -+ -+ if (!S_ISDIR(inode->bi_mode)) -+ break; -+ -+ ret = path_down(c, p, inode->bi_inum, snapshot); -+ if (ret) { -+ bch_err(c, "memory allocation failure"); -+ return ret; -+ } -+ -+ snapshot = parent_snapshot; -+ -+ ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot); -+ if (ret) { -+ /* Should have been caught in dirents pass */ -+ bch_err(c, "error looking up parent directory: %i", ret); -+ break; -+ } -+ -+ if (path_is_dup(p, inode->bi_inum, snapshot)) { -+ struct pathbuf_entry *i; -+ -+ /* XXX print path */ -+ bch_err(c, "directory structure loop"); -+ -+ darray_for_each(*p, i) -+ pr_err("%llu:%u", i->inum, i->snapshot); -+ pr_err("%llu:%u", inode->bi_inum, snapshot); -+ -+ if (!fsck_err(c, dir_loop, -+ "directory structure loop")) -+ return 0; -+ -+ ret = commit_do(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL| -+ BTREE_INSERT_LAZY_RW, -+ remove_backpointer(trans, inode)); -+ if (ret) { -+ bch_err(c, "error removing dirent: %i", ret); -+ break; -+ } -+ -+ ret = reattach_inode(trans, inode, snapshot); -+ } -+ } -+fsck_err: -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+/* -+ * Check for unreachable inodes, as well as loops in the directory structure: -+ * After bch2_check_dirents(), if an inode backpointer doesn't exist that means it's -+ * unreachable: -+ */ -+int bch2_check_directory_structure(struct bch_fs *c) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bch_inode_unpacked u; -+ pathbuf path = { 0, }; -+ int ret; -+ -+ for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN, -+ BTREE_ITER_INTENT| -+ BTREE_ITER_PREFETCH| -+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) { -+ if (!bkey_is_inode(k.k)) -+ continue; -+ -+ ret = bch2_inode_unpack(k, &u); -+ if (ret) { -+ /* Should have been caught earlier in fsck: */ -+ bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret); -+ break; -+ } -+ -+ if (u.bi_flags & BCH_INODE_unlinked) -+ continue; -+ -+ ret = check_path(trans, &path, &u, iter.pos.snapshot); -+ if (ret) -+ break; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ darray_exit(&path); -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+struct nlink_table { -+ size_t nr; -+ size_t size; -+ -+ struct nlink { -+ u64 inum; -+ u32 snapshot; -+ u32 count; -+ } *d; -+}; -+ -+static int add_nlink(struct bch_fs *c, struct nlink_table *t, -+ u64 inum, u32 snapshot) -+{ -+ if (t->nr == t->size) { -+ size_t new_size = max_t(size_t, 128UL, t->size * 2); -+ void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL); -+ -+ if (!d) { -+ bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", -+ new_size); -+ return -BCH_ERR_ENOMEM_fsck_add_nlink; -+ } -+ -+ if (t->d) -+ memcpy(d, t->d, t->size * sizeof(t->d[0])); -+ kvfree(t->d); -+ -+ t->d = d; -+ t->size = new_size; -+ } -+ -+ -+ t->d[t->nr++] = (struct nlink) { -+ .inum = inum, -+ .snapshot = snapshot, -+ }; -+ -+ return 0; -+} -+ -+static int nlink_cmp(const void *_l, const void *_r) -+{ -+ const struct nlink *l = _l; -+ const struct nlink *r = _r; -+ -+ return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot); -+} -+ -+static void inc_link(struct bch_fs *c, struct snapshots_seen *s, -+ struct nlink_table *links, -+ u64 range_start, u64 range_end, u64 inum, u32 snapshot) -+{ -+ struct nlink *link, key = { -+ .inum = inum, .snapshot = U32_MAX, -+ }; -+ -+ if (inum < range_start || inum >= range_end) -+ return; -+ -+ link = __inline_bsearch(&key, links->d, links->nr, -+ sizeof(links->d[0]), nlink_cmp); -+ if (!link) -+ return; -+ -+ while (link > links->d && link[0].inum == link[-1].inum) -+ --link; -+ -+ for (; link < links->d + links->nr && link->inum == inum; link++) -+ if (ref_visible(c, s, snapshot, link->snapshot)) { -+ link->count++; -+ if (link->snapshot >= snapshot) -+ break; -+ } -+} -+ -+noinline_for_stack -+static int check_nlinks_find_hardlinks(struct bch_fs *c, -+ struct nlink_table *t, -+ u64 start, u64 *end) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bch_inode_unpacked u; -+ int ret = 0; -+ -+ for_each_btree_key(trans, iter, BTREE_ID_inodes, -+ POS(0, start), -+ BTREE_ITER_INTENT| -+ BTREE_ITER_PREFETCH| -+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) { -+ if (!bkey_is_inode(k.k)) -+ continue; -+ -+ /* Should never fail, checked by bch2_inode_invalid: */ -+ BUG_ON(bch2_inode_unpack(k, &u)); -+ -+ /* -+ * Backpointer and directory structure checks are sufficient for -+ * directories, since they can't have hardlinks: -+ */ -+ if (S_ISDIR(u.bi_mode)) -+ continue; -+ -+ if (!u.bi_nlink) -+ continue; -+ -+ ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); -+ if (ret) { -+ *end = k.k->p.offset; -+ ret = 0; -+ break; -+ } -+ -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ -+ if (ret) -+ bch_err(c, "error in fsck: btree error %i while walking inodes", ret); -+ -+ return ret; -+} -+ -+noinline_for_stack -+static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, -+ u64 range_start, u64 range_end) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct snapshots_seen s; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bkey_s_c_dirent d; -+ int ret; -+ -+ snapshots_seen_init(&s); -+ -+ for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, -+ BTREE_ITER_INTENT| -+ BTREE_ITER_PREFETCH| -+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) { -+ ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); -+ if (ret) -+ break; -+ -+ switch (k.k->type) { -+ case KEY_TYPE_dirent: -+ d = bkey_s_c_to_dirent(k); -+ -+ if (d.v->d_type != DT_DIR && -+ d.v->d_type != DT_SUBVOL) -+ inc_link(c, &s, links, range_start, range_end, -+ le64_to_cpu(d.v->d_inum), -+ bch2_snapshot_equiv(c, d.k->p.snapshot)); -+ break; -+ } -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (ret) -+ bch_err(c, "error in fsck: btree error %i while walking dirents", ret); -+ -+ bch2_trans_put(trans); -+ snapshots_seen_exit(&s); -+ return ret; -+} -+ -+static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter, -+ struct bkey_s_c k, -+ struct nlink_table *links, -+ size_t *idx, u64 range_end) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_inode_unpacked u; -+ struct nlink *link = &links->d[*idx]; -+ int ret = 0; -+ -+ if (k.k->p.offset >= range_end) -+ return 1; -+ -+ if (!bkey_is_inode(k.k)) -+ return 0; -+ -+ BUG_ON(bch2_inode_unpack(k, &u)); -+ -+ if (S_ISDIR(u.bi_mode)) -+ return 0; -+ -+ if (!u.bi_nlink) -+ return 0; -+ -+ while ((cmp_int(link->inum, k.k->p.offset) ?: -+ cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { -+ BUG_ON(*idx == links->nr); -+ link = &links->d[++*idx]; -+ } -+ -+ if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, -+ c, inode_wrong_nlink, -+ "inode %llu type %s has wrong i_nlink (%u, should be %u)", -+ u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], -+ bch2_inode_nlink_get(&u), link->count)) { -+ bch2_inode_nlink_set(&u, link->count); -+ ret = __write_inode(trans, &u, k.k->p.snapshot); -+ } -+fsck_err: -+ return ret; -+} -+ -+noinline_for_stack -+static int check_nlinks_update_hardlinks(struct bch_fs *c, -+ struct nlink_table *links, -+ u64 range_start, u64 range_end) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ size_t idx = 0; -+ int ret = 0; -+ -+ ret = bch2_trans_run(c, -+ for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, -+ POS(0, range_start), -+ BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, -+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, -+ check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); -+ if (ret < 0) { -+ bch_err(c, "error in fsck: btree error %i while walking inodes", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+int bch2_check_nlinks(struct bch_fs *c) -+{ -+ struct nlink_table links = { 0 }; -+ u64 this_iter_range_start, next_iter_range_start = 0; -+ int ret = 0; -+ -+ do { -+ this_iter_range_start = next_iter_range_start; -+ next_iter_range_start = U64_MAX; -+ -+ ret = check_nlinks_find_hardlinks(c, &links, -+ this_iter_range_start, -+ &next_iter_range_start); -+ -+ ret = check_nlinks_walk_dirents(c, &links, -+ this_iter_range_start, -+ next_iter_range_start); -+ if (ret) -+ break; -+ -+ ret = check_nlinks_update_hardlinks(c, &links, -+ this_iter_range_start, -+ next_iter_range_start); -+ if (ret) -+ break; -+ -+ links.nr = 0; -+ } while (next_iter_range_start != U64_MAX); -+ -+ kvfree(links.d); -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_reflink_p p; -+ struct bkey_i_reflink_p *u; -+ int ret; -+ -+ if (k.k->type != KEY_TYPE_reflink_p) -+ return 0; -+ -+ p = bkey_s_c_to_reflink_p(k); -+ -+ if (!p.v->front_pad && !p.v->back_pad) -+ return 0; -+ -+ u = bch2_trans_kmalloc(trans, sizeof(*u)); -+ ret = PTR_ERR_OR_ZERO(u); -+ if (ret) -+ return ret; -+ -+ bkey_reassemble(&u->k_i, k); -+ u->v.front_pad = 0; -+ u->v.back_pad = 0; -+ -+ return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_NORUN); -+} -+ -+int bch2_fix_reflink_p(struct bch_fs *c) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) -+ return 0; -+ -+ ret = bch2_trans_run(c, -+ for_each_btree_key_commit(trans, iter, -+ BTREE_ID_extents, POS_MIN, -+ BTREE_ITER_INTENT|BTREE_ITER_PREFETCH| -+ BTREE_ITER_ALL_SNAPSHOTS, k, -+ NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, -+ fix_reflink_p_key(trans, &iter, k))); -+ bch_err_fn(c, ret); -+ return ret; -+} -diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h -new file mode 100644 -index 000000000000..da991e8cf27e ---- /dev/null -+++ b/fs/bcachefs/fsck.h -@@ -0,0 +1,15 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_FSCK_H -+#define _BCACHEFS_FSCK_H -+ -+int bch2_check_inodes(struct bch_fs *); -+int bch2_check_extents(struct bch_fs *); -+int bch2_check_indirect_extents(struct bch_fs *); -+int bch2_check_dirents(struct bch_fs *); -+int bch2_check_xattrs(struct bch_fs *); -+int bch2_check_root(struct bch_fs *); -+int bch2_check_directory_structure(struct bch_fs *); -+int bch2_check_nlinks(struct bch_fs *); -+int bch2_fix_reflink_p(struct bch_fs *); -+ -+#endif /* _BCACHEFS_FSCK_H */ -diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c -new file mode 100644 -index 000000000000..def77f2d8802 ---- /dev/null -+++ b/fs/bcachefs/inode.c -@@ -0,0 +1,1198 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "btree_key_cache.h" -+#include "btree_write_buffer.h" -+#include "bkey_methods.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "compress.h" -+#include "error.h" -+#include "extents.h" -+#include "extent_update.h" -+#include "inode.h" -+#include "str_hash.h" -+#include "snapshot.h" -+#include "subvolume.h" -+#include "varint.h" -+ -+#include -+ -+#include -+ -+#define x(name, ...) #name, -+const char * const bch2_inode_opts[] = { -+ BCH_INODE_OPTS() -+ NULL, -+}; -+ -+static const char * const bch2_inode_flag_strs[] = { -+ BCH_INODE_FLAGS() -+ NULL -+}; -+#undef x -+ -+static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 }; -+ -+static int inode_decode_field(const u8 *in, const u8 *end, -+ u64 out[2], unsigned *out_bits) -+{ -+ __be64 be[2] = { 0, 0 }; -+ unsigned bytes, shift; -+ u8 *p; -+ -+ if (in >= end) -+ return -1; -+ -+ if (!*in) -+ return -1; -+ -+ /* -+ * position of highest set bit indicates number of bytes: -+ * shift = number of bits to remove in high byte: -+ */ -+ shift = 8 - __fls(*in); /* 1 <= shift <= 8 */ -+ bytes = byte_table[shift - 1]; -+ -+ if (in + bytes > end) -+ return -1; -+ -+ p = (u8 *) be + 16 - bytes; -+ memcpy(p, in, bytes); -+ *p ^= (1 << 8) >> shift; -+ -+ out[0] = be64_to_cpu(be[0]); -+ out[1] = be64_to_cpu(be[1]); -+ *out_bits = out[0] ? 64 + fls64(out[0]) : fls64(out[1]); -+ -+ return bytes; -+} -+ -+static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed, -+ const struct bch_inode_unpacked *inode) -+{ -+ struct bkey_i_inode_v3 *k = &packed->inode; -+ u8 *out = k->v.fields; -+ u8 *end = (void *) &packed[1]; -+ u8 *last_nonzero_field = out; -+ unsigned nr_fields = 0, last_nonzero_fieldnr = 0; -+ unsigned bytes; -+ int ret; -+ -+ bkey_inode_v3_init(&packed->inode.k_i); -+ packed->inode.k.p.offset = inode->bi_inum; -+ packed->inode.v.bi_journal_seq = cpu_to_le64(inode->bi_journal_seq); -+ packed->inode.v.bi_hash_seed = inode->bi_hash_seed; -+ packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags); -+ packed->inode.v.bi_sectors = cpu_to_le64(inode->bi_sectors); -+ packed->inode.v.bi_size = cpu_to_le64(inode->bi_size); -+ packed->inode.v.bi_version = cpu_to_le64(inode->bi_version); -+ SET_INODEv3_MODE(&packed->inode.v, inode->bi_mode); -+ SET_INODEv3_FIELDS_START(&packed->inode.v, INODEv3_FIELDS_START_CUR); -+ -+ -+#define x(_name, _bits) \ -+ nr_fields++; \ -+ \ -+ if (inode->_name) { \ -+ ret = bch2_varint_encode_fast(out, inode->_name); \ -+ out += ret; \ -+ \ -+ if (_bits > 64) \ -+ *out++ = 0; \ -+ \ -+ last_nonzero_field = out; \ -+ last_nonzero_fieldnr = nr_fields; \ -+ } else { \ -+ *out++ = 0; \ -+ \ -+ if (_bits > 64) \ -+ *out++ = 0; \ -+ } -+ -+ BCH_INODE_FIELDS_v3() -+#undef x -+ BUG_ON(out > end); -+ -+ out = last_nonzero_field; -+ nr_fields = last_nonzero_fieldnr; -+ -+ bytes = out - (u8 *) &packed->inode.v; -+ set_bkey_val_bytes(&packed->inode.k, bytes); -+ memset_u64s_tail(&packed->inode.v, 0, bytes); -+ -+ SET_INODEv3_NR_FIELDS(&k->v, nr_fields); -+ -+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { -+ struct bch_inode_unpacked unpacked; -+ -+ ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i), &unpacked); -+ BUG_ON(ret); -+ BUG_ON(unpacked.bi_inum != inode->bi_inum); -+ BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed); -+ BUG_ON(unpacked.bi_sectors != inode->bi_sectors); -+ BUG_ON(unpacked.bi_size != inode->bi_size); -+ BUG_ON(unpacked.bi_version != inode->bi_version); -+ BUG_ON(unpacked.bi_mode != inode->bi_mode); -+ -+#define x(_name, _bits) if (unpacked._name != inode->_name) \ -+ panic("unpacked %llu should be %llu", \ -+ (u64) unpacked._name, (u64) inode->_name); -+ BCH_INODE_FIELDS_v3() -+#undef x -+ } -+} -+ -+void bch2_inode_pack(struct bkey_inode_buf *packed, -+ const struct bch_inode_unpacked *inode) -+{ -+ bch2_inode_pack_inlined(packed, inode); -+} -+ -+static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode, -+ struct bch_inode_unpacked *unpacked) -+{ -+ const u8 *in = inode.v->fields; -+ const u8 *end = bkey_val_end(inode); -+ u64 field[2]; -+ unsigned fieldnr = 0, field_bits; -+ int ret; -+ -+#define x(_name, _bits) \ -+ if (fieldnr++ == INODE_NR_FIELDS(inode.v)) { \ -+ unsigned offset = offsetof(struct bch_inode_unpacked, _name);\ -+ memset((void *) unpacked + offset, 0, \ -+ sizeof(*unpacked) - offset); \ -+ return 0; \ -+ } \ -+ \ -+ ret = inode_decode_field(in, end, field, &field_bits); \ -+ if (ret < 0) \ -+ return ret; \ -+ \ -+ if (field_bits > sizeof(unpacked->_name) * 8) \ -+ return -1; \ -+ \ -+ unpacked->_name = field[1]; \ -+ in += ret; -+ -+ BCH_INODE_FIELDS_v2() -+#undef x -+ -+ /* XXX: signal if there were more fields than expected? */ -+ return 0; -+} -+ -+static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked, -+ const u8 *in, const u8 *end, -+ unsigned nr_fields) -+{ -+ unsigned fieldnr = 0; -+ int ret; -+ u64 v[2]; -+ -+#define x(_name, _bits) \ -+ if (fieldnr < nr_fields) { \ -+ ret = bch2_varint_decode_fast(in, end, &v[0]); \ -+ if (ret < 0) \ -+ return ret; \ -+ in += ret; \ -+ \ -+ if (_bits > 64) { \ -+ ret = bch2_varint_decode_fast(in, end, &v[1]); \ -+ if (ret < 0) \ -+ return ret; \ -+ in += ret; \ -+ } else { \ -+ v[1] = 0; \ -+ } \ -+ } else { \ -+ v[0] = v[1] = 0; \ -+ } \ -+ \ -+ unpacked->_name = v[0]; \ -+ if (v[1] || v[0] != unpacked->_name) \ -+ return -1; \ -+ fieldnr++; -+ -+ BCH_INODE_FIELDS_v2() -+#undef x -+ -+ /* XXX: signal if there were more fields than expected? */ -+ return 0; -+} -+ -+static int bch2_inode_unpack_v3(struct bkey_s_c k, -+ struct bch_inode_unpacked *unpacked) -+{ -+ struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); -+ const u8 *in = inode.v->fields; -+ const u8 *end = bkey_val_end(inode); -+ unsigned nr_fields = INODEv3_NR_FIELDS(inode.v); -+ unsigned fieldnr = 0; -+ int ret; -+ u64 v[2]; -+ -+ unpacked->bi_inum = inode.k->p.offset; -+ unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq); -+ unpacked->bi_hash_seed = inode.v->bi_hash_seed; -+ unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags); -+ unpacked->bi_sectors = le64_to_cpu(inode.v->bi_sectors); -+ unpacked->bi_size = le64_to_cpu(inode.v->bi_size); -+ unpacked->bi_version = le64_to_cpu(inode.v->bi_version); -+ unpacked->bi_mode = INODEv3_MODE(inode.v); -+ -+#define x(_name, _bits) \ -+ if (fieldnr < nr_fields) { \ -+ ret = bch2_varint_decode_fast(in, end, &v[0]); \ -+ if (ret < 0) \ -+ return ret; \ -+ in += ret; \ -+ \ -+ if (_bits > 64) { \ -+ ret = bch2_varint_decode_fast(in, end, &v[1]); \ -+ if (ret < 0) \ -+ return ret; \ -+ in += ret; \ -+ } else { \ -+ v[1] = 0; \ -+ } \ -+ } else { \ -+ v[0] = v[1] = 0; \ -+ } \ -+ \ -+ unpacked->_name = v[0]; \ -+ if (v[1] || v[0] != unpacked->_name) \ -+ return -1; \ -+ fieldnr++; -+ -+ BCH_INODE_FIELDS_v3() -+#undef x -+ -+ /* XXX: signal if there were more fields than expected? */ -+ return 0; -+} -+ -+static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k, -+ struct bch_inode_unpacked *unpacked) -+{ -+ memset(unpacked, 0, sizeof(*unpacked)); -+ -+ switch (k.k->type) { -+ case KEY_TYPE_inode: { -+ struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); -+ -+ unpacked->bi_inum = inode.k->p.offset; -+ unpacked->bi_journal_seq= 0; -+ unpacked->bi_hash_seed = inode.v->bi_hash_seed; -+ unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags); -+ unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode); -+ -+ if (INODE_NEW_VARINT(inode.v)) { -+ return bch2_inode_unpack_v2(unpacked, inode.v->fields, -+ bkey_val_end(inode), -+ INODE_NR_FIELDS(inode.v)); -+ } else { -+ return bch2_inode_unpack_v1(inode, unpacked); -+ } -+ break; -+ } -+ case KEY_TYPE_inode_v2: { -+ struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); -+ -+ unpacked->bi_inum = inode.k->p.offset; -+ unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq); -+ unpacked->bi_hash_seed = inode.v->bi_hash_seed; -+ unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags); -+ unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode); -+ -+ return bch2_inode_unpack_v2(unpacked, inode.v->fields, -+ bkey_val_end(inode), -+ INODEv2_NR_FIELDS(inode.v)); -+ } -+ default: -+ BUG(); -+ } -+} -+ -+int bch2_inode_unpack(struct bkey_s_c k, -+ struct bch_inode_unpacked *unpacked) -+{ -+ if (likely(k.k->type == KEY_TYPE_inode_v3)) -+ return bch2_inode_unpack_v3(k, unpacked); -+ return bch2_inode_unpack_slowpath(k, unpacked); -+} -+ -+static int bch2_inode_peek_nowarn(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bch_inode_unpacked *inode, -+ subvol_inum inum, unsigned flags) -+{ -+ struct bkey_s_c k; -+ u32 snapshot; -+ int ret; -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ return ret; -+ -+ k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes, -+ SPOS(0, inum.inum, snapshot), -+ flags|BTREE_ITER_CACHED); -+ ret = bkey_err(k); -+ if (ret) -+ return ret; -+ -+ ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode; -+ if (ret) -+ goto err; -+ -+ ret = bch2_inode_unpack(k, inode); -+ if (ret) -+ goto err; -+ -+ return 0; -+err: -+ bch2_trans_iter_exit(trans, iter); -+ return ret; -+} -+ -+int bch2_inode_peek(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bch_inode_unpacked *inode, -+ subvol_inum inum, unsigned flags) -+{ -+ int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags); -+ bch_err_msg(trans->c, ret, "looking up inum %u:%llu:", inum.subvol, inum.inum); -+ return ret; -+} -+ -+int bch2_inode_write_flags(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bch_inode_unpacked *inode, -+ enum btree_update_flags flags) -+{ -+ struct bkey_inode_buf *inode_p; -+ -+ inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); -+ if (IS_ERR(inode_p)) -+ return PTR_ERR(inode_p); -+ -+ bch2_inode_pack_inlined(inode_p, inode); -+ inode_p->inode.k.p.snapshot = iter->snapshot; -+ return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags); -+} -+ -+struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) -+{ -+ struct bch_inode_unpacked u; -+ struct bkey_inode_buf *inode_p; -+ int ret; -+ -+ if (!bkey_is_inode(&k->k)) -+ return ERR_PTR(-ENOENT); -+ -+ inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); -+ if (IS_ERR(inode_p)) -+ return ERR_CAST(inode_p); -+ -+ ret = bch2_inode_unpack(bkey_i_to_s_c(k), &u); -+ if (ret) -+ return ERR_PTR(ret); -+ -+ bch2_inode_pack(inode_p, &u); -+ return &inode_p->inode.k_i; -+} -+ -+static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) -+{ -+ struct bch_inode_unpacked unpacked; -+ int ret = 0; -+ -+ bkey_fsck_err_on(k.k->p.inode, c, err, -+ inode_pos_inode_nonzero, -+ "nonzero k.p.inode"); -+ -+ bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err, -+ inode_pos_blockdev_range, -+ "fs inode in blockdev range"); -+ -+ bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err, -+ inode_unpack_error, -+ "invalid variable length fields"); -+ -+ bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err, -+ inode_checksum_type_invalid, -+ "invalid data checksum type (%u >= %u", -+ unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); -+ -+ bkey_fsck_err_on(unpacked.bi_compression && -+ !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err, -+ inode_compression_type_invalid, -+ "invalid compression opt %u", unpacked.bi_compression - 1); -+ -+ bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) && -+ unpacked.bi_nlink != 0, c, err, -+ inode_unlinked_but_nlink_nonzero, -+ "flagged as unlinked but bi_nlink != 0"); -+ -+ bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err, -+ inode_subvol_root_but_not_dir, -+ "subvolume root but not a directory"); -+fsck_err: -+ return ret; -+} -+ -+int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); -+ int ret = 0; -+ -+ bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, -+ inode_str_hash_invalid, -+ "invalid str hash type (%llu >= %u)", -+ INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); -+ -+ ret = __bch2_inode_invalid(c, k, err); -+fsck_err: -+ return ret; -+} -+ -+int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); -+ int ret = 0; -+ -+ bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, -+ inode_str_hash_invalid, -+ "invalid str hash type (%llu >= %u)", -+ INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); -+ -+ ret = __bch2_inode_invalid(c, k, err); -+fsck_err: -+ return ret; -+} -+ -+int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); -+ int ret = 0; -+ -+ bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || -+ INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err, -+ inode_v3_fields_start_bad, -+ "invalid fields_start (got %llu, min %u max %zu)", -+ INODEv3_FIELDS_START(inode.v), -+ INODEv3_FIELDS_START_INITIAL, -+ bkey_val_u64s(inode.k)); -+ -+ bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, -+ inode_str_hash_invalid, -+ "invalid str hash type (%llu >= %u)", -+ INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); -+ -+ ret = __bch2_inode_invalid(c, k, err); -+fsck_err: -+ return ret; -+} -+ -+static void __bch2_inode_unpacked_to_text(struct printbuf *out, -+ struct bch_inode_unpacked *inode) -+{ -+ prt_printf(out, "mode=%o ", inode->bi_mode); -+ -+ prt_str(out, "flags="); -+ prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1)); -+ prt_printf(out, " (%x)", inode->bi_flags); -+ -+ prt_printf(out, " journal_seq=%llu bi_size=%llu bi_sectors=%llu bi_version=%llu", -+ inode->bi_journal_seq, -+ inode->bi_size, -+ inode->bi_sectors, -+ inode->bi_version); -+ -+#define x(_name, _bits) \ -+ prt_printf(out, " "#_name "=%llu", (u64) inode->_name); -+ BCH_INODE_FIELDS_v3() -+#undef x -+} -+ -+void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) -+{ -+ prt_printf(out, "inum: %llu ", inode->bi_inum); -+ __bch2_inode_unpacked_to_text(out, inode); -+} -+ -+void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) -+{ -+ struct bch_inode_unpacked inode; -+ -+ if (bch2_inode_unpack(k, &inode)) { -+ prt_printf(out, "(unpack error)"); -+ return; -+ } -+ -+ __bch2_inode_unpacked_to_text(out, &inode); -+} -+ -+static inline u64 bkey_inode_flags(struct bkey_s_c k) -+{ -+ switch (k.k->type) { -+ case KEY_TYPE_inode: -+ return le32_to_cpu(bkey_s_c_to_inode(k).v->bi_flags); -+ case KEY_TYPE_inode_v2: -+ return le64_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_flags); -+ case KEY_TYPE_inode_v3: -+ return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_flags); -+ default: -+ return 0; -+ } -+} -+ -+static inline bool bkey_is_deleted_inode(struct bkey_s_c k) -+{ -+ return bkey_inode_flags(k) & BCH_INODE_unlinked; -+} -+ -+int bch2_trans_mark_inode(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, -+ struct bkey_i *new, -+ unsigned flags) -+{ -+ int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k); -+ bool old_deleted = bkey_is_deleted_inode(old); -+ bool new_deleted = bkey_is_deleted_inode(bkey_i_to_s_c(new)); -+ -+ if (nr) { -+ int ret = bch2_replicas_deltas_realloc(trans, 0); -+ struct replicas_delta_list *d = trans->fs_usage_deltas; -+ -+ if (ret) -+ return ret; -+ -+ d->nr_inodes += nr; -+ } -+ -+ if (old_deleted != new_deleted) { -+ int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new->k.p, new_deleted); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+int bch2_mark_inode(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, struct bkey_s_c new, -+ unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_fs_usage *fs_usage; -+ u64 journal_seq = trans->journal_res.seq; -+ -+ if (flags & BTREE_TRIGGER_INSERT) { -+ struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v; -+ -+ BUG_ON(!journal_seq); -+ BUG_ON(new.k->type != KEY_TYPE_inode_v3); -+ -+ v->bi_journal_seq = cpu_to_le64(journal_seq); -+ } -+ -+ if (flags & BTREE_TRIGGER_GC) { -+ percpu_down_read(&c->mark_lock); -+ preempt_disable(); -+ -+ fs_usage = fs_usage_ptr(c, journal_seq, flags & BTREE_TRIGGER_GC); -+ fs_usage->nr_inodes += bkey_is_inode(new.k); -+ fs_usage->nr_inodes -= bkey_is_inode(old.k); -+ -+ preempt_enable(); -+ percpu_up_read(&c->mark_lock); -+ } -+ return 0; -+} -+ -+int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(k.k->p.inode, c, err, -+ inode_pos_inode_nonzero, -+ "nonzero k.p.inode"); -+fsck_err: -+ return ret; -+} -+ -+void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_inode_generation gen = bkey_s_c_to_inode_generation(k); -+ -+ prt_printf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation)); -+} -+ -+void bch2_inode_init_early(struct bch_fs *c, -+ struct bch_inode_unpacked *inode_u) -+{ -+ enum bch_str_hash_type str_hash = -+ bch2_str_hash_opt_to_type(c, c->opts.str_hash); -+ -+ memset(inode_u, 0, sizeof(*inode_u)); -+ -+ /* ick */ -+ inode_u->bi_flags |= str_hash << INODE_STR_HASH_OFFSET; -+ get_random_bytes(&inode_u->bi_hash_seed, -+ sizeof(inode_u->bi_hash_seed)); -+} -+ -+void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now, -+ uid_t uid, gid_t gid, umode_t mode, dev_t rdev, -+ struct bch_inode_unpacked *parent) -+{ -+ inode_u->bi_mode = mode; -+ inode_u->bi_uid = uid; -+ inode_u->bi_gid = gid; -+ inode_u->bi_dev = rdev; -+ inode_u->bi_atime = now; -+ inode_u->bi_mtime = now; -+ inode_u->bi_ctime = now; -+ inode_u->bi_otime = now; -+ -+ if (parent && parent->bi_mode & S_ISGID) { -+ inode_u->bi_gid = parent->bi_gid; -+ if (S_ISDIR(mode)) -+ inode_u->bi_mode |= S_ISGID; -+ } -+ -+ if (parent) { -+#define x(_name, ...) inode_u->bi_##_name = parent->bi_##_name; -+ BCH_INODE_OPTS() -+#undef x -+ } -+} -+ -+void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, -+ uid_t uid, gid_t gid, umode_t mode, dev_t rdev, -+ struct bch_inode_unpacked *parent) -+{ -+ bch2_inode_init_early(c, inode_u); -+ bch2_inode_init_late(inode_u, bch2_current_time(c), -+ uid, gid, mode, rdev, parent); -+} -+ -+static inline u32 bkey_generation(struct bkey_s_c k) -+{ -+ switch (k.k->type) { -+ case KEY_TYPE_inode: -+ case KEY_TYPE_inode_v2: -+ BUG(); -+ case KEY_TYPE_inode_generation: -+ return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation); -+ default: -+ return 0; -+ } -+} -+ -+/* -+ * This just finds an empty slot: -+ */ -+int bch2_inode_create(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bch_inode_unpacked *inode_u, -+ u32 snapshot, u64 cpu) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_s_c k; -+ u64 min, max, start, pos, *hint; -+ int ret = 0; -+ unsigned bits = (c->opts.inodes_32bit ? 31 : 63); -+ -+ if (c->opts.shard_inode_numbers) { -+ bits -= c->inode_shard_bits; -+ -+ min = (cpu << bits); -+ max = (cpu << bits) | ~(ULLONG_MAX << bits); -+ -+ min = max_t(u64, min, BLOCKDEV_INODE_MAX); -+ hint = c->unused_inode_hints + cpu; -+ } else { -+ min = BLOCKDEV_INODE_MAX; -+ max = ~(ULLONG_MAX << bits); -+ hint = c->unused_inode_hints; -+ } -+ -+ start = READ_ONCE(*hint); -+ -+ if (start >= max || start < min) -+ start = min; -+ -+ pos = start; -+ bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos), -+ BTREE_ITER_ALL_SNAPSHOTS| -+ BTREE_ITER_INTENT); -+again: -+ while ((k = bch2_btree_iter_peek(iter)).k && -+ !(ret = bkey_err(k)) && -+ bkey_lt(k.k->p, POS(0, max))) { -+ if (pos < iter->pos.offset) -+ goto found_slot; -+ -+ /* -+ * We don't need to iterate over keys in every snapshot once -+ * we've found just one: -+ */ -+ pos = iter->pos.offset + 1; -+ bch2_btree_iter_set_pos(iter, POS(0, pos)); -+ } -+ -+ if (!ret && pos < max) -+ goto found_slot; -+ -+ if (!ret && start == min) -+ ret = -BCH_ERR_ENOSPC_inode_create; -+ -+ if (ret) { -+ bch2_trans_iter_exit(trans, iter); -+ return ret; -+ } -+ -+ /* Retry from start */ -+ pos = start = min; -+ bch2_btree_iter_set_pos(iter, POS(0, pos)); -+ goto again; -+found_slot: -+ bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot)); -+ k = bch2_btree_iter_peek_slot(iter); -+ ret = bkey_err(k); -+ if (ret) { -+ bch2_trans_iter_exit(trans, iter); -+ return ret; -+ } -+ -+ *hint = k.k->p.offset; -+ inode_u->bi_inum = k.k->p.offset; -+ inode_u->bi_generation = bkey_generation(k); -+ return 0; -+} -+ -+static int bch2_inode_delete_keys(struct btree_trans *trans, -+ subvol_inum inum, enum btree_id id) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bkey_i delete; -+ struct bpos end = POS(inum.inum, U64_MAX); -+ u32 snapshot; -+ int ret = 0; -+ -+ /* -+ * We're never going to be deleting partial extents, no need to use an -+ * extent iterator: -+ */ -+ bch2_trans_iter_init(trans, &iter, id, POS(inum.inum, 0), -+ BTREE_ITER_INTENT); -+ -+ while (1) { -+ bch2_trans_begin(trans); -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ goto err; -+ -+ bch2_btree_iter_set_snapshot(&iter, snapshot); -+ -+ k = bch2_btree_iter_peek_upto(&iter, end); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (!k.k) -+ break; -+ -+ bkey_init(&delete.k); -+ delete.k.p = iter.pos; -+ -+ if (iter.flags & BTREE_ITER_IS_EXTENTS) -+ bch2_key_resize(&delete.k, -+ bpos_min(end, k.k->p).offset - -+ iter.pos.offset); -+ -+ ret = bch2_trans_update(trans, &iter, &delete, 0) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL); -+err: -+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ break; -+ } -+ -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter = { NULL }; -+ struct bkey_i_inode_generation delete; -+ struct bch_inode_unpacked inode_u; -+ struct bkey_s_c k; -+ u32 snapshot; -+ int ret; -+ -+ /* -+ * If this was a directory, there shouldn't be any real dirents left - -+ * but there could be whiteouts (from hash collisions) that we should -+ * delete: -+ * -+ * XXX: the dirent could ideally would delete whiteouts when they're no -+ * longer needed -+ */ -+ ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?: -+ bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?: -+ bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents); -+ if (ret) -+ goto err; -+retry: -+ bch2_trans_begin(trans); -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ goto err; -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -+ SPOS(0, inum.inum, snapshot), -+ BTREE_ITER_INTENT|BTREE_ITER_CACHED); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (!bkey_is_inode(k.k)) { -+ bch2_fs_inconsistent(c, -+ "inode %llu:%u not found when deleting", -+ inum.inum, snapshot); -+ ret = -EIO; -+ goto err; -+ } -+ -+ bch2_inode_unpack(k, &inode_u); -+ -+ bkey_inode_generation_init(&delete.k_i); -+ delete.k.p = iter.pos; -+ delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); -+ -+ ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *trans, -+ subvol_inum inum, -+ struct bch_inode_unpacked *inode) -+{ -+ struct btree_iter iter; -+ int ret; -+ -+ ret = bch2_inode_peek_nowarn(trans, &iter, inode, inum, 0); -+ if (!ret) -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int bch2_inode_find_by_inum_trans(struct btree_trans *trans, -+ subvol_inum inum, -+ struct bch_inode_unpacked *inode) -+{ -+ struct btree_iter iter; -+ int ret; -+ -+ ret = bch2_inode_peek(trans, &iter, inode, inum, 0); -+ if (!ret) -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum, -+ struct bch_inode_unpacked *inode) -+{ -+ return bch2_trans_do(c, NULL, NULL, 0, -+ bch2_inode_find_by_inum_trans(trans, inum, inode)); -+} -+ -+int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) -+{ -+ if (bi->bi_flags & BCH_INODE_unlinked) -+ bi->bi_flags &= ~BCH_INODE_unlinked; -+ else { -+ if (bi->bi_nlink == U32_MAX) -+ return -EINVAL; -+ -+ bi->bi_nlink++; -+ } -+ -+ return 0; -+} -+ -+void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *bi) -+{ -+ if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_unlinked)) { -+ bch2_trans_inconsistent(trans, "inode %llu unlinked but link count nonzero", -+ bi->bi_inum); -+ return; -+ } -+ -+ if (bi->bi_flags & BCH_INODE_unlinked) { -+ bch2_trans_inconsistent(trans, "inode %llu link count underflow", bi->bi_inum); -+ return; -+ } -+ -+ if (bi->bi_nlink) -+ bi->bi_nlink--; -+ else -+ bi->bi_flags |= BCH_INODE_unlinked; -+} -+ -+struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode) -+{ -+ struct bch_opts ret = { 0 }; -+#define x(_name, _bits) \ -+ if (inode->bi_##_name) \ -+ opt_set(ret, _name, inode->bi_##_name - 1); -+ BCH_INODE_OPTS() -+#undef x -+ return ret; -+} -+ -+void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c, -+ struct bch_inode_unpacked *inode) -+{ -+#define x(_name, _bits) opts->_name = inode_opt_get(c, inode, _name); -+ BCH_INODE_OPTS() -+#undef x -+ -+ if (opts->nocow) -+ opts->compression = opts->background_compression = opts->data_checksum = opts->erasure_code = 0; -+} -+ -+int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts) -+{ -+ struct bch_inode_unpacked inode; -+ int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode)); -+ -+ if (ret) -+ return ret; -+ -+ bch2_inode_opts_get(opts, trans->c, &inode); -+ return 0; -+} -+ -+int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter = { NULL }; -+ struct bkey_i_inode_generation delete; -+ struct bch_inode_unpacked inode_u; -+ struct bkey_s_c k; -+ int ret; -+ -+ do { -+ ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, -+ SPOS(inum, 0, snapshot), -+ SPOS(inum, U64_MAX, snapshot), -+ 0, NULL) ?: -+ bch2_btree_delete_range_trans(trans, BTREE_ID_dirents, -+ SPOS(inum, 0, snapshot), -+ SPOS(inum, U64_MAX, snapshot), -+ 0, NULL) ?: -+ bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs, -+ SPOS(inum, 0, snapshot), -+ SPOS(inum, U64_MAX, snapshot), -+ 0, NULL); -+ } while (ret == -BCH_ERR_transaction_restart_nested); -+ if (ret) -+ goto err; -+retry: -+ bch2_trans_begin(trans); -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -+ SPOS(0, inum, snapshot), BTREE_ITER_INTENT); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (!bkey_is_inode(k.k)) { -+ bch2_fs_inconsistent(c, -+ "inode %llu:%u not found when deleting", -+ inum, snapshot); -+ ret = -EIO; -+ goto err; -+ } -+ -+ bch2_inode_unpack(k, &inode_u); -+ -+ /* Subvolume root? */ -+ if (inode_u.bi_subvol) -+ bch_warn(c, "deleting inode %llu marked as unlinked, but also a subvolume root!?", inode_u.bi_inum); -+ -+ bkey_inode_generation_init(&delete.k_i); -+ delete.k.p = iter.pos; -+ delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); -+ -+ ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ return ret ?: -BCH_ERR_transaction_restart_nested; -+} -+ -+static int may_delete_deleted_inode(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bpos pos, -+ bool *need_another_pass) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter inode_iter; -+ struct bkey_s_c k; -+ struct bch_inode_unpacked inode; -+ int ret; -+ -+ k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, pos, BTREE_ITER_CACHED); -+ ret = bkey_err(k); -+ if (ret) -+ return ret; -+ -+ ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode; -+ if (fsck_err_on(!bkey_is_inode(k.k), c, -+ deleted_inode_missing, -+ "nonexistent inode %llu:%u in deleted_inodes btree", -+ pos.offset, pos.snapshot)) -+ goto delete; -+ -+ ret = bch2_inode_unpack(k, &inode); -+ if (ret) -+ goto out; -+ -+ if (fsck_err_on(S_ISDIR(inode.bi_mode), c, -+ deleted_inode_is_dir, -+ "directory %llu:%u in deleted_inodes btree", -+ pos.offset, pos.snapshot)) -+ goto delete; -+ -+ if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), c, -+ deleted_inode_not_unlinked, -+ "non-deleted inode %llu:%u in deleted_inodes btree", -+ pos.offset, pos.snapshot)) -+ goto delete; -+ -+ if (c->sb.clean && -+ !fsck_err(c, -+ deleted_inode_but_clean, -+ "filesystem marked as clean but have deleted inode %llu:%u", -+ pos.offset, pos.snapshot)) { -+ ret = 0; -+ goto out; -+ } -+ -+ if (bch2_snapshot_is_internal_node(c, pos.snapshot)) { -+ struct bpos new_min_pos; -+ -+ ret = bch2_propagate_key_to_snapshot_leaves(trans, inode_iter.btree_id, k, &new_min_pos); -+ if (ret) -+ goto out; -+ -+ inode.bi_flags &= ~BCH_INODE_unlinked; -+ -+ ret = bch2_inode_write_flags(trans, &inode_iter, &inode, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+ bch_err_msg(c, ret, "clearing inode unlinked flag"); -+ if (ret) -+ goto out; -+ -+ /* -+ * We'll need another write buffer flush to pick up the new -+ * unlinked inodes in the snapshot leaves: -+ */ -+ *need_another_pass = true; -+ return 0; -+ } -+ -+ ret = 1; -+out: -+fsck_err: -+ bch2_trans_iter_exit(trans, &inode_iter); -+ return ret; -+delete: -+ ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false); -+ goto out; -+} -+ -+int bch2_delete_dead_inodes(struct bch_fs *c) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ bool need_another_pass; -+ int ret; -+again: -+ need_another_pass = false; -+ -+ ret = bch2_btree_write_buffer_flush_sync(trans); -+ if (ret) -+ goto err; -+ -+ /* -+ * Weird transaction restart handling here because on successful delete, -+ * bch2_inode_rm_snapshot() will return a nested transaction restart, -+ * but we can't retry because the btree write buffer won't have been -+ * flushed and we'd spin: -+ */ -+ for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, -+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) { -+ ret = lockrestart_do(trans, may_delete_deleted_inode(trans, &iter, k.k->p, -+ &need_another_pass)); -+ if (ret < 0) -+ break; -+ -+ if (ret) { -+ if (!test_bit(BCH_FS_RW, &c->flags)) { -+ bch2_trans_unlock(trans); -+ bch2_fs_lazy_rw(c); -+ } -+ -+ bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); -+ -+ ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); -+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ break; -+ } -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (!ret && need_another_pass) -+ goto again; -+err: -+ bch2_trans_put(trans); -+ -+ return ret; -+} -diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h -new file mode 100644 -index 000000000000..88818a332b1e ---- /dev/null -+++ b/fs/bcachefs/inode.h -@@ -0,0 +1,217 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_INODE_H -+#define _BCACHEFS_INODE_H -+ -+#include "bkey.h" -+#include "bkey_methods.h" -+#include "opts.h" -+ -+enum bkey_invalid_flags; -+extern const char * const bch2_inode_opts[]; -+ -+int bch2_inode_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+int bch2_inode_v2_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+ -+int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned, -+ struct bkey_s_c, struct bkey_i *, unsigned); -+int bch2_mark_inode(struct btree_trans *, enum btree_id, unsigned, -+ struct bkey_s_c, struct bkey_s_c, unsigned); -+ -+#define bch2_bkey_ops_inode ((struct bkey_ops) { \ -+ .key_invalid = bch2_inode_invalid, \ -+ .val_to_text = bch2_inode_to_text, \ -+ .trans_trigger = bch2_trans_mark_inode, \ -+ .atomic_trigger = bch2_mark_inode, \ -+ .min_val_size = 16, \ -+}) -+ -+#define bch2_bkey_ops_inode_v2 ((struct bkey_ops) { \ -+ .key_invalid = bch2_inode_v2_invalid, \ -+ .val_to_text = bch2_inode_to_text, \ -+ .trans_trigger = bch2_trans_mark_inode, \ -+ .atomic_trigger = bch2_mark_inode, \ -+ .min_val_size = 32, \ -+}) -+ -+#define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \ -+ .key_invalid = bch2_inode_v3_invalid, \ -+ .val_to_text = bch2_inode_to_text, \ -+ .trans_trigger = bch2_trans_mark_inode, \ -+ .atomic_trigger = bch2_mark_inode, \ -+ .min_val_size = 48, \ -+}) -+ -+static inline bool bkey_is_inode(const struct bkey *k) -+{ -+ return k->type == KEY_TYPE_inode || -+ k->type == KEY_TYPE_inode_v2 || -+ k->type == KEY_TYPE_inode_v3; -+} -+ -+int bch2_inode_generation_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+ -+#define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \ -+ .key_invalid = bch2_inode_generation_invalid, \ -+ .val_to_text = bch2_inode_generation_to_text, \ -+ .min_val_size = 8, \ -+}) -+ -+#if 0 -+typedef struct { -+ u64 lo; -+ u32 hi; -+} __packed __aligned(4) u96; -+#endif -+typedef u64 u96; -+ -+struct bch_inode_unpacked { -+ u64 bi_inum; -+ u64 bi_journal_seq; -+ __le64 bi_hash_seed; -+ u64 bi_size; -+ u64 bi_sectors; -+ u64 bi_version; -+ u32 bi_flags; -+ u16 bi_mode; -+ -+#define x(_name, _bits) u##_bits _name; -+ BCH_INODE_FIELDS_v3() -+#undef x -+}; -+ -+struct bkey_inode_buf { -+ struct bkey_i_inode_v3 inode; -+ -+#define x(_name, _bits) + 8 + _bits / 8 -+ u8 _pad[0 + BCH_INODE_FIELDS_v3()]; -+#undef x -+} __packed __aligned(8); -+ -+void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *); -+int bch2_inode_unpack(struct bkey_s_c, struct bch_inode_unpacked *); -+struct bkey_i *bch2_inode_to_v3(struct btree_trans *, struct bkey_i *); -+ -+void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *); -+ -+int bch2_inode_peek(struct btree_trans *, struct btree_iter *, -+ struct bch_inode_unpacked *, subvol_inum, unsigned); -+ -+int bch2_inode_write_flags(struct btree_trans *, struct btree_iter *, -+ struct bch_inode_unpacked *, enum btree_update_flags); -+ -+static inline int bch2_inode_write(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bch_inode_unpacked *inode) -+{ -+ return bch2_inode_write_flags(trans, iter, inode, 0); -+} -+ -+void bch2_inode_init_early(struct bch_fs *, -+ struct bch_inode_unpacked *); -+void bch2_inode_init_late(struct bch_inode_unpacked *, u64, -+ uid_t, gid_t, umode_t, dev_t, -+ struct bch_inode_unpacked *); -+void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *, -+ uid_t, gid_t, umode_t, dev_t, -+ struct bch_inode_unpacked *); -+ -+int bch2_inode_create(struct btree_trans *, struct btree_iter *, -+ struct bch_inode_unpacked *, u32, u64); -+ -+int bch2_inode_rm(struct bch_fs *, subvol_inum); -+ -+int bch2_inode_find_by_inum_nowarn_trans(struct btree_trans *, -+ subvol_inum, -+ struct bch_inode_unpacked *); -+int bch2_inode_find_by_inum_trans(struct btree_trans *, subvol_inum, -+ struct bch_inode_unpacked *); -+int bch2_inode_find_by_inum(struct bch_fs *, subvol_inum, -+ struct bch_inode_unpacked *); -+ -+#define inode_opt_get(_c, _inode, _name) \ -+ ((_inode)->bi_##_name ? (_inode)->bi_##_name - 1 : (_c)->opts._name) -+ -+static inline void bch2_inode_opt_set(struct bch_inode_unpacked *inode, -+ enum inode_opt_id id, u64 v) -+{ -+ switch (id) { -+#define x(_name, ...) \ -+ case Inode_opt_##_name: \ -+ inode->bi_##_name = v; \ -+ break; -+ BCH_INODE_OPTS() -+#undef x -+ default: -+ BUG(); -+ } -+} -+ -+static inline u64 bch2_inode_opt_get(struct bch_inode_unpacked *inode, -+ enum inode_opt_id id) -+{ -+ switch (id) { -+#define x(_name, ...) \ -+ case Inode_opt_##_name: \ -+ return inode->bi_##_name; -+ BCH_INODE_OPTS() -+#undef x -+ default: -+ BUG(); -+ } -+} -+ -+static inline u8 mode_to_type(umode_t mode) -+{ -+ return (mode >> 12) & 15; -+} -+ -+static inline u8 inode_d_type(struct bch_inode_unpacked *inode) -+{ -+ return inode->bi_subvol ? DT_SUBVOL : mode_to_type(inode->bi_mode); -+} -+ -+/* i_nlink: */ -+ -+static inline unsigned nlink_bias(umode_t mode) -+{ -+ return S_ISDIR(mode) ? 2 : 1; -+} -+ -+static inline unsigned bch2_inode_nlink_get(struct bch_inode_unpacked *bi) -+{ -+ return bi->bi_flags & BCH_INODE_unlinked -+ ? 0 -+ : bi->bi_nlink + nlink_bias(bi->bi_mode); -+} -+ -+static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi, -+ unsigned nlink) -+{ -+ if (nlink) { -+ bi->bi_nlink = nlink - nlink_bias(bi->bi_mode); -+ bi->bi_flags &= ~BCH_INODE_unlinked; -+ } else { -+ bi->bi_nlink = 0; -+ bi->bi_flags |= BCH_INODE_unlinked; -+ } -+} -+ -+int bch2_inode_nlink_inc(struct bch_inode_unpacked *); -+void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *); -+ -+struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *); -+void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, -+ struct bch_inode_unpacked *); -+int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *); -+ -+int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32); -+int bch2_delete_dead_inodes(struct bch_fs *); -+ -+#endif /* _BCACHEFS_INODE_H */ -diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c -new file mode 100644 -index 000000000000..bebc11444ef5 ---- /dev/null -+++ b/fs/bcachefs/io_misc.c -@@ -0,0 +1,524 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * io_misc.c - fallocate, fpunch, truncate: -+ */ -+ -+#include "bcachefs.h" -+#include "alloc_foreground.h" -+#include "bkey_buf.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "clock.h" -+#include "error.h" -+#include "extents.h" -+#include "extent_update.h" -+#include "inode.h" -+#include "io_misc.h" -+#include "io_write.h" -+#include "logged_ops.h" -+#include "rebalance.h" -+#include "subvolume.h" -+ -+/* Overwrites whatever was present with zeroes: */ -+int bch2_extent_fallocate(struct btree_trans *trans, -+ subvol_inum inum, -+ struct btree_iter *iter, -+ u64 sectors, -+ struct bch_io_opts opts, -+ s64 *i_sectors_delta, -+ struct write_point_specifier write_point) -+{ -+ struct bch_fs *c = trans->c; -+ struct disk_reservation disk_res = { 0 }; -+ struct closure cl; -+ struct open_buckets open_buckets = { 0 }; -+ struct bkey_s_c k; -+ struct bkey_buf old, new; -+ unsigned sectors_allocated = 0; -+ bool have_reservation = false; -+ bool unwritten = opts.nocow && -+ c->sb.version >= bcachefs_metadata_version_unwritten_extents; -+ int ret; -+ -+ bch2_bkey_buf_init(&old); -+ bch2_bkey_buf_init(&new); -+ closure_init_stack(&cl); -+ -+ k = bch2_btree_iter_peek_slot(iter); -+ ret = bkey_err(k); -+ if (ret) -+ return ret; -+ -+ sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset); -+ -+ if (!have_reservation) { -+ unsigned new_replicas = -+ max(0, (int) opts.data_replicas - -+ (int) bch2_bkey_nr_ptrs_fully_allocated(k)); -+ /* -+ * Get a disk reservation before (in the nocow case) calling -+ * into the allocator: -+ */ -+ ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); -+ if (unlikely(ret)) -+ goto err; -+ -+ bch2_bkey_buf_reassemble(&old, c, k); -+ } -+ -+ if (have_reservation) { -+ if (!bch2_extents_match(k, bkey_i_to_s_c(old.k))) -+ goto err; -+ -+ bch2_key_resize(&new.k->k, sectors); -+ } else if (!unwritten) { -+ struct bkey_i_reservation *reservation; -+ -+ bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64)); -+ reservation = bkey_reservation_init(new.k); -+ reservation->k.p = iter->pos; -+ bch2_key_resize(&reservation->k, sectors); -+ reservation->v.nr_replicas = opts.data_replicas; -+ } else { -+ struct bkey_i_extent *e; -+ struct bch_devs_list devs_have; -+ struct write_point *wp; -+ struct bch_extent_ptr *ptr; -+ -+ devs_have.nr = 0; -+ -+ bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX); -+ -+ e = bkey_extent_init(new.k); -+ e->k.p = iter->pos; -+ -+ ret = bch2_alloc_sectors_start_trans(trans, -+ opts.foreground_target, -+ false, -+ write_point, -+ &devs_have, -+ opts.data_replicas, -+ opts.data_replicas, -+ BCH_WATERMARK_normal, 0, &cl, &wp); -+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) -+ ret = -BCH_ERR_transaction_restart_nested; -+ if (ret) -+ goto err; -+ -+ sectors = min_t(u64, sectors, wp->sectors_free); -+ sectors_allocated = sectors; -+ -+ bch2_key_resize(&e->k, sectors); -+ -+ bch2_open_bucket_get(c, wp, &open_buckets); -+ bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false); -+ bch2_alloc_sectors_done(c, wp); -+ -+ extent_for_each_ptr(extent_i_to_s(e), ptr) -+ ptr->unwritten = true; -+ } -+ -+ have_reservation = true; -+ -+ ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res, -+ 0, i_sectors_delta, true); -+err: -+ if (!ret && sectors_allocated) -+ bch2_increment_clock(c, sectors_allocated, WRITE); -+ -+ bch2_open_buckets_put(c, &open_buckets); -+ bch2_disk_reservation_put(c, &disk_res); -+ bch2_bkey_buf_exit(&new, c); -+ bch2_bkey_buf_exit(&old, c); -+ -+ if (closure_nr_remaining(&cl) != 1) { -+ bch2_trans_unlock(trans); -+ closure_sync(&cl); -+ } -+ -+ return ret; -+} -+ -+/* -+ * Returns -BCH_ERR_transacton_restart if we had to drop locks: -+ */ -+int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, -+ subvol_inum inum, u64 end, -+ s64 *i_sectors_delta) -+{ -+ struct bch_fs *c = trans->c; -+ unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); -+ struct bpos end_pos = POS(inum.inum, end); -+ struct bkey_s_c k; -+ int ret = 0, ret2 = 0; -+ u32 snapshot; -+ -+ while (!ret || -+ bch2_err_matches(ret, BCH_ERR_transaction_restart)) { -+ struct disk_reservation disk_res = -+ bch2_disk_reservation_init(c, 0); -+ struct bkey_i delete; -+ -+ if (ret) -+ ret2 = ret; -+ -+ bch2_trans_begin(trans); -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ continue; -+ -+ bch2_btree_iter_set_snapshot(iter, snapshot); -+ -+ /* -+ * peek_upto() doesn't have ideal semantics for extents: -+ */ -+ k = bch2_btree_iter_peek_upto(iter, end_pos); -+ if (!k.k) -+ break; -+ -+ ret = bkey_err(k); -+ if (ret) -+ continue; -+ -+ bkey_init(&delete.k); -+ delete.k.p = iter->pos; -+ -+ /* create the biggest key we can */ -+ bch2_key_resize(&delete.k, max_sectors); -+ bch2_cut_back(end_pos, &delete); -+ -+ ret = bch2_extent_update(trans, inum, iter, &delete, -+ &disk_res, 0, i_sectors_delta, false); -+ bch2_disk_reservation_put(c, &disk_res); -+ } -+ -+ return ret ?: ret2; -+} -+ -+int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, -+ s64 *i_sectors_delta) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ int ret; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -+ POS(inum.inum, start), -+ BTREE_ITER_INTENT); -+ -+ ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta); -+ -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ ret = 0; -+ -+ return ret; -+} -+ -+/* truncate: */ -+ -+void bch2_logged_op_truncate_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) -+{ -+ struct bkey_s_c_logged_op_truncate op = bkey_s_c_to_logged_op_truncate(k); -+ -+ prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol)); -+ prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum)); -+ prt_printf(out, " new_i_size=%llu", le64_to_cpu(op.v->new_i_size)); -+} -+ -+static int truncate_set_isize(struct btree_trans *trans, -+ subvol_inum inum, -+ u64 new_i_size) -+{ -+ struct btree_iter iter = { NULL }; -+ struct bch_inode_unpacked inode_u; -+ int ret; -+ -+ ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT) ?: -+ (inode_u.bi_size = new_i_size, 0) ?: -+ bch2_inode_write(trans, &iter, &inode_u); -+ -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, -+ struct bkey_i *op_k, -+ u64 *i_sectors_delta) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter fpunch_iter; -+ struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k); -+ subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; -+ u64 new_i_size = le64_to_cpu(op->v.new_i_size); -+ int ret; -+ -+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, -+ truncate_set_isize(trans, inum, new_i_size)); -+ if (ret) -+ goto err; -+ -+ bch2_trans_iter_init(trans, &fpunch_iter, BTREE_ID_extents, -+ POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9), -+ BTREE_ITER_INTENT); -+ ret = bch2_fpunch_at(trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta); -+ bch2_trans_iter_exit(trans, &fpunch_iter); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ ret = 0; -+err: -+ bch2_logged_op_finish(trans, op_k); -+ return ret; -+} -+ -+int bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i *op_k) -+{ -+ return __bch2_resume_logged_op_truncate(trans, op_k, NULL); -+} -+ -+int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sectors_delta) -+{ -+ struct bkey_i_logged_op_truncate op; -+ -+ bkey_logged_op_truncate_init(&op.k_i); -+ op.v.subvol = cpu_to_le32(inum.subvol); -+ op.v.inum = cpu_to_le64(inum.inum); -+ op.v.new_i_size = cpu_to_le64(new_i_size); -+ -+ /* -+ * Logged ops aren't atomic w.r.t. snapshot creation: creating a -+ * snapshot while they're in progress, then crashing, will result in the -+ * resume only proceeding in one of the snapshots -+ */ -+ down_read(&c->snapshot_create_lock); -+ int ret = bch2_trans_run(c, -+ bch2_logged_op_start(trans, &op.k_i) ?: -+ __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta)); -+ up_read(&c->snapshot_create_lock); -+ -+ return ret; -+} -+ -+/* finsert/fcollapse: */ -+ -+void bch2_logged_op_finsert_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) -+{ -+ struct bkey_s_c_logged_op_finsert op = bkey_s_c_to_logged_op_finsert(k); -+ -+ prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol)); -+ prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum)); -+ prt_printf(out, " dst_offset=%lli", le64_to_cpu(op.v->dst_offset)); -+ prt_printf(out, " src_offset=%llu", le64_to_cpu(op.v->src_offset)); -+} -+ -+static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len) -+{ -+ struct btree_iter iter; -+ struct bch_inode_unpacked inode_u; -+ int ret; -+ -+ offset <<= 9; -+ len <<= 9; -+ -+ ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT); -+ if (ret) -+ return ret; -+ -+ if (len > 0) { -+ if (MAX_LFS_FILESIZE - inode_u.bi_size < len) { -+ ret = -EFBIG; -+ goto err; -+ } -+ -+ if (offset >= inode_u.bi_size) { -+ ret = -EINVAL; -+ goto err; -+ } -+ } -+ -+ inode_u.bi_size += len; -+ inode_u.bi_mtime = inode_u.bi_ctime = bch2_current_time(trans->c); -+ -+ ret = bch2_inode_write(trans, &iter, &inode_u); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, -+ struct bkey_i *op_k, -+ u64 *i_sectors_delta) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k); -+ subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; -+ struct bch_io_opts opts; -+ u64 dst_offset = le64_to_cpu(op->v.dst_offset); -+ u64 src_offset = le64_to_cpu(op->v.src_offset); -+ s64 shift = dst_offset - src_offset; -+ u64 len = abs(shift); -+ u64 pos = le64_to_cpu(op->v.pos); -+ bool insert = shift > 0; -+ int ret = 0; -+ -+ ret = bch2_inum_opts_get(trans, inum, &opts); -+ if (ret) -+ return ret; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -+ POS(inum.inum, 0), -+ BTREE_ITER_INTENT); -+ -+ switch (op->v.state) { -+case LOGGED_OP_FINSERT_start: -+ op->v.state = LOGGED_OP_FINSERT_shift_extents; -+ -+ if (insert) { -+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, -+ adjust_i_size(trans, inum, src_offset, len) ?: -+ bch2_logged_op_update(trans, &op->k_i)); -+ if (ret) -+ goto err; -+ } else { -+ bch2_btree_iter_set_pos(&iter, POS(inum.inum, src_offset)); -+ -+ ret = bch2_fpunch_at(trans, &iter, inum, src_offset + len, i_sectors_delta); -+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto err; -+ -+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, -+ bch2_logged_op_update(trans, &op->k_i)); -+ } -+ -+ fallthrough; -+case LOGGED_OP_FINSERT_shift_extents: -+ while (1) { -+ struct disk_reservation disk_res = -+ bch2_disk_reservation_init(c, 0); -+ struct bkey_i delete, *copy; -+ struct bkey_s_c k; -+ struct bpos src_pos = POS(inum.inum, src_offset); -+ u32 snapshot; -+ -+ bch2_trans_begin(trans); -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ goto btree_err; -+ -+ bch2_btree_iter_set_snapshot(&iter, snapshot); -+ bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot)); -+ -+ k = insert -+ ? bch2_btree_iter_peek_prev(&iter) -+ : bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX)); -+ if ((ret = bkey_err(k))) -+ goto btree_err; -+ -+ if (!k.k || -+ k.k->p.inode != inum.inum || -+ bkey_le(k.k->p, POS(inum.inum, src_offset))) -+ break; -+ -+ copy = bch2_bkey_make_mut_noupdate(trans, k); -+ if ((ret = PTR_ERR_OR_ZERO(copy))) -+ goto btree_err; -+ -+ if (insert && -+ bkey_lt(bkey_start_pos(k.k), src_pos)) { -+ bch2_cut_front(src_pos, copy); -+ -+ /* Splitting compressed extent? */ -+ bch2_disk_reservation_add(c, &disk_res, -+ copy->k.size * -+ bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy)), -+ BCH_DISK_RESERVATION_NOFAIL); -+ } -+ -+ bkey_init(&delete.k); -+ delete.k.p = copy->k.p; -+ delete.k.p.snapshot = snapshot; -+ delete.k.size = copy->k.size; -+ -+ copy->k.p.offset += shift; -+ copy->k.p.snapshot = snapshot; -+ -+ op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); -+ -+ ret = bch2_bkey_set_needs_rebalance(c, copy, -+ opts.background_target, -+ opts.background_compression) ?: -+ bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: -+ bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: -+ bch2_logged_op_update(trans, &op->k_i) ?: -+ bch2_trans_commit(trans, &disk_res, NULL, BTREE_INSERT_NOFAIL); -+btree_err: -+ bch2_disk_reservation_put(c, &disk_res); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret) -+ goto err; -+ -+ pos = le64_to_cpu(op->v.pos); -+ } -+ -+ op->v.state = LOGGED_OP_FINSERT_finish; -+ -+ if (!insert) { -+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, -+ adjust_i_size(trans, inum, src_offset, shift) ?: -+ bch2_logged_op_update(trans, &op->k_i)); -+ } else { -+ /* We need an inode update to update bi_journal_seq for fsync: */ -+ ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, -+ adjust_i_size(trans, inum, 0, 0) ?: -+ bch2_logged_op_update(trans, &op->k_i)); -+ } -+ -+ break; -+case LOGGED_OP_FINSERT_finish: -+ break; -+ } -+err: -+ bch2_logged_op_finish(trans, op_k); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int bch2_resume_logged_op_finsert(struct btree_trans *trans, struct bkey_i *op_k) -+{ -+ return __bch2_resume_logged_op_finsert(trans, op_k, NULL); -+} -+ -+int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, -+ u64 offset, u64 len, bool insert, -+ s64 *i_sectors_delta) -+{ -+ struct bkey_i_logged_op_finsert op; -+ s64 shift = insert ? len : -len; -+ -+ bkey_logged_op_finsert_init(&op.k_i); -+ op.v.subvol = cpu_to_le32(inum.subvol); -+ op.v.inum = cpu_to_le64(inum.inum); -+ op.v.dst_offset = cpu_to_le64(offset + shift); -+ op.v.src_offset = cpu_to_le64(offset); -+ op.v.pos = cpu_to_le64(insert ? U64_MAX : offset); -+ -+ /* -+ * Logged ops aren't atomic w.r.t. snapshot creation: creating a -+ * snapshot while they're in progress, then crashing, will result in the -+ * resume only proceeding in one of the snapshots -+ */ -+ down_read(&c->snapshot_create_lock); -+ int ret = bch2_trans_run(c, -+ bch2_logged_op_start(trans, &op.k_i) ?: -+ __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta)); -+ up_read(&c->snapshot_create_lock); -+ -+ return ret; -+} -diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h -new file mode 100644 -index 000000000000..9cb44a7c43c1 ---- /dev/null -+++ b/fs/bcachefs/io_misc.h -@@ -0,0 +1,34 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_IO_MISC_H -+#define _BCACHEFS_IO_MISC_H -+ -+int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *, -+ u64, struct bch_io_opts, s64 *, -+ struct write_point_specifier); -+int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, -+ subvol_inum, u64, s64 *); -+int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); -+ -+void bch2_logged_op_truncate_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+ -+#define bch2_bkey_ops_logged_op_truncate ((struct bkey_ops) { \ -+ .val_to_text = bch2_logged_op_truncate_to_text, \ -+ .min_val_size = 24, \ -+}) -+ -+int bch2_resume_logged_op_truncate(struct btree_trans *, struct bkey_i *); -+ -+int bch2_truncate(struct bch_fs *, subvol_inum, u64, u64 *); -+ -+void bch2_logged_op_finsert_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+ -+#define bch2_bkey_ops_logged_op_finsert ((struct bkey_ops) { \ -+ .val_to_text = bch2_logged_op_finsert_to_text, \ -+ .min_val_size = 24, \ -+}) -+ -+int bch2_resume_logged_op_finsert(struct btree_trans *, struct bkey_i *); -+ -+int bch2_fcollapse_finsert(struct bch_fs *, subvol_inum, u64, u64, bool, s64 *); -+ -+#endif /* _BCACHEFS_IO_MISC_H */ -diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c -new file mode 100644 -index 000000000000..a56ed553dc15 ---- /dev/null -+++ b/fs/bcachefs/io_read.c -@@ -0,0 +1,1210 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Some low level IO code, and hacks for various block layer limitations -+ * -+ * Copyright 2010, 2011 Kent Overstreet -+ * Copyright 2012 Google, Inc. -+ */ -+ -+#include "bcachefs.h" -+#include "alloc_background.h" -+#include "alloc_foreground.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "checksum.h" -+#include "clock.h" -+#include "compress.h" -+#include "data_update.h" -+#include "disk_groups.h" -+#include "ec.h" -+#include "error.h" -+#include "io_read.h" -+#include "io_misc.h" -+#include "io_write.h" -+#include "subvolume.h" -+#include "trace.h" -+ -+#include -+ -+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT -+ -+static bool bch2_target_congested(struct bch_fs *c, u16 target) -+{ -+ const struct bch_devs_mask *devs; -+ unsigned d, nr = 0, total = 0; -+ u64 now = local_clock(), last; -+ s64 congested; -+ struct bch_dev *ca; -+ -+ if (!target) -+ return false; -+ -+ rcu_read_lock(); -+ devs = bch2_target_to_mask(c, target) ?: -+ &c->rw_devs[BCH_DATA_user]; -+ -+ for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) { -+ ca = rcu_dereference(c->devs[d]); -+ if (!ca) -+ continue; -+ -+ congested = atomic_read(&ca->congested); -+ last = READ_ONCE(ca->congested_last); -+ if (time_after64(now, last)) -+ congested -= (now - last) >> 12; -+ -+ total += max(congested, 0LL); -+ nr++; -+ } -+ rcu_read_unlock(); -+ -+ return bch2_rand_range(nr * CONGESTED_MAX) < total; -+} -+ -+#else -+ -+static bool bch2_target_congested(struct bch_fs *c, u16 target) -+{ -+ return false; -+} -+ -+#endif -+ -+/* Cache promotion on read */ -+ -+struct promote_op { -+ struct rcu_head rcu; -+ u64 start_time; -+ -+ struct rhash_head hash; -+ struct bpos pos; -+ -+ struct data_update write; -+ struct bio_vec bi_inline_vecs[0]; /* must be last */ -+}; -+ -+static const struct rhashtable_params bch_promote_params = { -+ .head_offset = offsetof(struct promote_op, hash), -+ .key_offset = offsetof(struct promote_op, pos), -+ .key_len = sizeof(struct bpos), -+}; -+ -+static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, -+ struct bpos pos, -+ struct bch_io_opts opts, -+ unsigned flags) -+{ -+ BUG_ON(!opts.promote_target); -+ -+ if (!(flags & BCH_READ_MAY_PROMOTE)) -+ return -BCH_ERR_nopromote_may_not; -+ -+ if (bch2_bkey_has_target(c, k, opts.promote_target)) -+ return -BCH_ERR_nopromote_already_promoted; -+ -+ if (bkey_extent_is_unwritten(k)) -+ return -BCH_ERR_nopromote_unwritten; -+ -+ if (bch2_target_congested(c, opts.promote_target)) -+ return -BCH_ERR_nopromote_congested; -+ -+ if (rhashtable_lookup_fast(&c->promote_table, &pos, -+ bch_promote_params)) -+ return -BCH_ERR_nopromote_in_flight; -+ -+ return 0; -+} -+ -+static void promote_free(struct bch_fs *c, struct promote_op *op) -+{ -+ int ret; -+ -+ bch2_data_update_exit(&op->write); -+ -+ ret = rhashtable_remove_fast(&c->promote_table, &op->hash, -+ bch_promote_params); -+ BUG_ON(ret); -+ bch2_write_ref_put(c, BCH_WRITE_REF_promote); -+ kfree_rcu(op, rcu); -+} -+ -+static void promote_done(struct bch_write_op *wop) -+{ -+ struct promote_op *op = -+ container_of(wop, struct promote_op, write.op); -+ struct bch_fs *c = op->write.op.c; -+ -+ bch2_time_stats_update(&c->times[BCH_TIME_data_promote], -+ op->start_time); -+ promote_free(c, op); -+} -+ -+static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) -+{ -+ struct bio *bio = &op->write.op.wbio.bio; -+ -+ trace_and_count(op->write.op.c, read_promote, &rbio->bio); -+ -+ /* we now own pages: */ -+ BUG_ON(!rbio->bounce); -+ BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs); -+ -+ memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec, -+ sizeof(struct bio_vec) * rbio->bio.bi_vcnt); -+ swap(bio->bi_vcnt, rbio->bio.bi_vcnt); -+ -+ bch2_data_update_read_done(&op->write, rbio->pick.crc); -+} -+ -+static struct promote_op *__promote_alloc(struct btree_trans *trans, -+ enum btree_id btree_id, -+ struct bkey_s_c k, -+ struct bpos pos, -+ struct extent_ptr_decoded *pick, -+ struct bch_io_opts opts, -+ unsigned sectors, -+ struct bch_read_bio **rbio) -+{ -+ struct bch_fs *c = trans->c; -+ struct promote_op *op = NULL; -+ struct bio *bio; -+ unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); -+ int ret; -+ -+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) -+ return NULL; -+ -+ op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOFS); -+ if (!op) -+ goto err; -+ -+ op->start_time = local_clock(); -+ op->pos = pos; -+ -+ /* -+ * We don't use the mempool here because extents that aren't -+ * checksummed or compressed can be too big for the mempool: -+ */ -+ *rbio = kzalloc(sizeof(struct bch_read_bio) + -+ sizeof(struct bio_vec) * pages, -+ GFP_NOFS); -+ if (!*rbio) -+ goto err; -+ -+ rbio_init(&(*rbio)->bio, opts); -+ bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0); -+ -+ if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, -+ GFP_NOFS)) -+ goto err; -+ -+ (*rbio)->bounce = true; -+ (*rbio)->split = true; -+ (*rbio)->kmalloc = true; -+ -+ if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, -+ bch_promote_params)) -+ goto err; -+ -+ bio = &op->write.op.wbio.bio; -+ bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0); -+ -+ ret = bch2_data_update_init(trans, NULL, &op->write, -+ writepoint_hashed((unsigned long) current), -+ opts, -+ (struct data_update_opts) { -+ .target = opts.promote_target, -+ .extra_replicas = 1, -+ .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED, -+ }, -+ btree_id, k); -+ /* -+ * possible errors: -BCH_ERR_nocow_lock_blocked, -+ * -BCH_ERR_ENOSPC_disk_reservation: -+ */ -+ if (ret) { -+ ret = rhashtable_remove_fast(&c->promote_table, &op->hash, -+ bch_promote_params); -+ BUG_ON(ret); -+ goto err; -+ } -+ -+ op->write.op.end_io = promote_done; -+ -+ return op; -+err: -+ if (*rbio) -+ bio_free_pages(&(*rbio)->bio); -+ kfree(*rbio); -+ *rbio = NULL; -+ kfree(op); -+ bch2_write_ref_put(c, BCH_WRITE_REF_promote); -+ return NULL; -+} -+ -+noinline -+static struct promote_op *promote_alloc(struct btree_trans *trans, -+ struct bvec_iter iter, -+ struct bkey_s_c k, -+ struct extent_ptr_decoded *pick, -+ struct bch_io_opts opts, -+ unsigned flags, -+ struct bch_read_bio **rbio, -+ bool *bounce, -+ bool *read_full) -+{ -+ struct bch_fs *c = trans->c; -+ bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents); -+ /* data might have to be decompressed in the write path: */ -+ unsigned sectors = promote_full -+ ? max(pick->crc.compressed_size, pick->crc.live_size) -+ : bvec_iter_sectors(iter); -+ struct bpos pos = promote_full -+ ? bkey_start_pos(k.k) -+ : POS(k.k->p.inode, iter.bi_sector); -+ struct promote_op *promote; -+ int ret; -+ -+ ret = should_promote(c, k, pos, opts, flags); -+ if (ret) -+ goto nopromote; -+ -+ promote = __promote_alloc(trans, -+ k.k->type == KEY_TYPE_reflink_v -+ ? BTREE_ID_reflink -+ : BTREE_ID_extents, -+ k, pos, pick, opts, sectors, rbio); -+ if (!promote) { -+ ret = -BCH_ERR_nopromote_enomem; -+ goto nopromote; -+ } -+ -+ *bounce = true; -+ *read_full = promote_full; -+ return promote; -+nopromote: -+ trace_read_nopromote(c, ret); -+ return NULL; -+} -+ -+/* Read */ -+ -+#define READ_RETRY_AVOID 1 -+#define READ_RETRY 2 -+#define READ_ERR 3 -+ -+enum rbio_context { -+ RBIO_CONTEXT_NULL, -+ RBIO_CONTEXT_HIGHPRI, -+ RBIO_CONTEXT_UNBOUND, -+}; -+ -+static inline struct bch_read_bio * -+bch2_rbio_parent(struct bch_read_bio *rbio) -+{ -+ return rbio->split ? rbio->parent : rbio; -+} -+ -+__always_inline -+static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn, -+ enum rbio_context context, -+ struct workqueue_struct *wq) -+{ -+ if (context <= rbio->context) { -+ fn(&rbio->work); -+ } else { -+ rbio->work.func = fn; -+ rbio->context = context; -+ queue_work(wq, &rbio->work); -+ } -+} -+ -+static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) -+{ -+ BUG_ON(rbio->bounce && !rbio->split); -+ -+ if (rbio->promote) -+ promote_free(rbio->c, rbio->promote); -+ rbio->promote = NULL; -+ -+ if (rbio->bounce) -+ bch2_bio_free_pages_pool(rbio->c, &rbio->bio); -+ -+ if (rbio->split) { -+ struct bch_read_bio *parent = rbio->parent; -+ -+ if (rbio->kmalloc) -+ kfree(rbio); -+ else -+ bio_put(&rbio->bio); -+ -+ rbio = parent; -+ } -+ -+ return rbio; -+} -+ -+/* -+ * Only called on a top level bch_read_bio to complete an entire read request, -+ * not a split: -+ */ -+static void bch2_rbio_done(struct bch_read_bio *rbio) -+{ -+ if (rbio->start_time) -+ bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read], -+ rbio->start_time); -+ bio_endio(&rbio->bio); -+} -+ -+static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio, -+ struct bvec_iter bvec_iter, -+ struct bch_io_failures *failed, -+ unsigned flags) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_buf sk; -+ struct bkey_s_c k; -+ int ret; -+ -+ flags &= ~BCH_READ_LAST_FRAGMENT; -+ flags |= BCH_READ_MUST_CLONE; -+ -+ bch2_bkey_buf_init(&sk); -+ -+ bch2_trans_iter_init(trans, &iter, rbio->data_btree, -+ rbio->read_pos, BTREE_ITER_SLOTS); -+retry: -+ rbio->bio.bi_status = 0; -+ -+ k = bch2_btree_iter_peek_slot(&iter); -+ if (bkey_err(k)) -+ goto err; -+ -+ bch2_bkey_buf_reassemble(&sk, c, k); -+ k = bkey_i_to_s_c(sk.k); -+ bch2_trans_unlock(trans); -+ -+ if (!bch2_bkey_matches_ptr(c, k, -+ rbio->pick.ptr, -+ rbio->data_pos.offset - -+ rbio->pick.crc.offset)) { -+ /* extent we wanted to read no longer exists: */ -+ rbio->hole = true; -+ goto out; -+ } -+ -+ ret = __bch2_read_extent(trans, rbio, bvec_iter, -+ rbio->read_pos, -+ rbio->data_btree, -+ k, 0, failed, flags); -+ if (ret == READ_RETRY) -+ goto retry; -+ if (ret) -+ goto err; -+out: -+ bch2_rbio_done(rbio); -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ bch2_bkey_buf_exit(&sk, c); -+ return; -+err: -+ rbio->bio.bi_status = BLK_STS_IOERR; -+ goto out; -+} -+ -+static void bch2_rbio_retry(struct work_struct *work) -+{ -+ struct bch_read_bio *rbio = -+ container_of(work, struct bch_read_bio, work); -+ struct bch_fs *c = rbio->c; -+ struct bvec_iter iter = rbio->bvec_iter; -+ unsigned flags = rbio->flags; -+ subvol_inum inum = { -+ .subvol = rbio->subvol, -+ .inum = rbio->read_pos.inode, -+ }; -+ struct bch_io_failures failed = { .nr = 0 }; -+ -+ trace_and_count(c, read_retry, &rbio->bio); -+ -+ if (rbio->retry == READ_RETRY_AVOID) -+ bch2_mark_io_failure(&failed, &rbio->pick); -+ -+ rbio->bio.bi_status = 0; -+ -+ rbio = bch2_rbio_free(rbio); -+ -+ flags |= BCH_READ_IN_RETRY; -+ flags &= ~BCH_READ_MAY_PROMOTE; -+ -+ if (flags & BCH_READ_NODECODE) { -+ bch2_read_retry_nodecode(c, rbio, iter, &failed, flags); -+ } else { -+ flags &= ~BCH_READ_LAST_FRAGMENT; -+ flags |= BCH_READ_MUST_CLONE; -+ -+ __bch2_read(c, rbio, iter, inum, &failed, flags); -+ } -+} -+ -+static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, -+ blk_status_t error) -+{ -+ rbio->retry = retry; -+ -+ if (rbio->flags & BCH_READ_IN_RETRY) -+ return; -+ -+ if (retry == READ_ERR) { -+ rbio = bch2_rbio_free(rbio); -+ -+ rbio->bio.bi_status = error; -+ bch2_rbio_done(rbio); -+ } else { -+ bch2_rbio_punt(rbio, bch2_rbio_retry, -+ RBIO_CONTEXT_UNBOUND, system_unbound_wq); -+ } -+} -+ -+static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, -+ struct bch_read_bio *rbio) -+{ -+ struct bch_fs *c = rbio->c; -+ u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; -+ struct bch_extent_crc_unpacked new_crc; -+ struct btree_iter iter; -+ struct bkey_i *new; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ if (crc_is_compressed(rbio->pick.crc)) -+ return 0; -+ -+ k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos, -+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT); -+ if ((ret = bkey_err(k))) -+ goto out; -+ -+ if (bversion_cmp(k.k->version, rbio->version) || -+ !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) -+ goto out; -+ -+ /* Extent was merged? */ -+ if (bkey_start_offset(k.k) < data_offset || -+ k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size) -+ goto out; -+ -+ if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version, -+ rbio->pick.crc, NULL, &new_crc, -+ bkey_start_offset(k.k) - data_offset, k.k->size, -+ rbio->pick.crc.csum_type)) { -+ bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)"); -+ ret = 0; -+ goto out; -+ } -+ -+ /* -+ * going to be temporarily appending another checksum entry: -+ */ -+ new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + -+ sizeof(struct bch_extent_crc128)); -+ if ((ret = PTR_ERR_OR_ZERO(new))) -+ goto out; -+ -+ bkey_reassemble(new, k); -+ -+ if (!bch2_bkey_narrow_crcs(new, new_crc)) -+ goto out; -+ -+ ret = bch2_trans_update(trans, &iter, new, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+out: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) -+{ -+ bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL, -+ __bch2_rbio_narrow_crcs(trans, rbio)); -+} -+ -+/* Inner part that may run in process context */ -+static void __bch2_read_endio(struct work_struct *work) -+{ -+ struct bch_read_bio *rbio = -+ container_of(work, struct bch_read_bio, work); -+ struct bch_fs *c = rbio->c; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev); -+ struct bio *src = &rbio->bio; -+ struct bio *dst = &bch2_rbio_parent(rbio)->bio; -+ struct bvec_iter dst_iter = rbio->bvec_iter; -+ struct bch_extent_crc_unpacked crc = rbio->pick.crc; -+ struct nonce nonce = extent_nonce(rbio->version, crc); -+ unsigned nofs_flags; -+ struct bch_csum csum; -+ int ret; -+ -+ nofs_flags = memalloc_nofs_save(); -+ -+ /* Reset iterator for checksumming and copying bounced data: */ -+ if (rbio->bounce) { -+ src->bi_iter.bi_size = crc.compressed_size << 9; -+ src->bi_iter.bi_idx = 0; -+ src->bi_iter.bi_bvec_done = 0; -+ } else { -+ src->bi_iter = rbio->bvec_iter; -+ } -+ -+ csum = bch2_checksum_bio(c, crc.csum_type, nonce, src); -+ if (bch2_crc_cmp(csum, rbio->pick.crc.csum) && !c->opts.no_data_io) -+ goto csum_err; -+ -+ /* -+ * XXX -+ * We need to rework the narrow_crcs path to deliver the read completion -+ * first, and then punt to a different workqueue, otherwise we're -+ * holding up reads while doing btree updates which is bad for memory -+ * reclaim. -+ */ -+ if (unlikely(rbio->narrow_crcs)) -+ bch2_rbio_narrow_crcs(rbio); -+ -+ if (rbio->flags & BCH_READ_NODECODE) -+ goto nodecode; -+ -+ /* Adjust crc to point to subset of data we want: */ -+ crc.offset += rbio->offset_into_extent; -+ crc.live_size = bvec_iter_sectors(rbio->bvec_iter); -+ -+ if (crc_is_compressed(crc)) { -+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); -+ if (ret) -+ goto decrypt_err; -+ -+ if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) && -+ !c->opts.no_data_io) -+ goto decompression_err; -+ } else { -+ /* don't need to decrypt the entire bio: */ -+ nonce = nonce_add(nonce, crc.offset << 9); -+ bio_advance(src, crc.offset << 9); -+ -+ BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size); -+ src->bi_iter.bi_size = dst_iter.bi_size; -+ -+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); -+ if (ret) -+ goto decrypt_err; -+ -+ if (rbio->bounce) { -+ struct bvec_iter src_iter = src->bi_iter; -+ -+ bio_copy_data_iter(dst, &dst_iter, src, &src_iter); -+ } -+ } -+ -+ if (rbio->promote) { -+ /* -+ * Re encrypt data we decrypted, so it's consistent with -+ * rbio->crc: -+ */ -+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); -+ if (ret) -+ goto decrypt_err; -+ -+ promote_start(rbio->promote, rbio); -+ rbio->promote = NULL; -+ } -+nodecode: -+ if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) { -+ rbio = bch2_rbio_free(rbio); -+ bch2_rbio_done(rbio); -+ } -+out: -+ memalloc_nofs_restore(nofs_flags); -+ return; -+csum_err: -+ /* -+ * Checksum error: if the bio wasn't bounced, we may have been -+ * reading into buffers owned by userspace (that userspace can -+ * scribble over) - retry the read, bouncing it this time: -+ */ -+ if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) { -+ rbio->flags |= BCH_READ_MUST_BOUNCE; -+ bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR); -+ goto out; -+ } -+ -+ bch_err_inum_offset_ratelimited(ca, -+ rbio->read_pos.inode, -+ rbio->read_pos.offset << 9, -+ "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)", -+ rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo, -+ csum.hi, csum.lo, bch2_csum_types[crc.csum_type]); -+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); -+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); -+ goto out; -+decompression_err: -+ bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, -+ rbio->read_pos.offset << 9, -+ "decompression error"); -+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); -+ goto out; -+decrypt_err: -+ bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, -+ rbio->read_pos.offset << 9, -+ "decrypt error"); -+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); -+ goto out; -+} -+ -+static void bch2_read_endio(struct bio *bio) -+{ -+ struct bch_read_bio *rbio = -+ container_of(bio, struct bch_read_bio, bio); -+ struct bch_fs *c = rbio->c; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev); -+ struct workqueue_struct *wq = NULL; -+ enum rbio_context context = RBIO_CONTEXT_NULL; -+ -+ if (rbio->have_ioref) { -+ bch2_latency_acct(ca, rbio->submit_time, READ); -+ percpu_ref_put(&ca->io_ref); -+ } -+ -+ if (!rbio->split) -+ rbio->bio.bi_end_io = rbio->end_io; -+ -+ if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read, -+ rbio->read_pos.inode, -+ rbio->read_pos.offset, -+ "data read error: %s", -+ bch2_blk_status_to_str(bio->bi_status))) { -+ bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); -+ return; -+ } -+ -+ if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) || -+ ptr_stale(ca, &rbio->pick.ptr)) { -+ trace_and_count(c, read_reuse_race, &rbio->bio); -+ -+ if (rbio->flags & BCH_READ_RETRY_IF_STALE) -+ bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN); -+ else -+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN); -+ return; -+ } -+ -+ if (rbio->narrow_crcs || -+ rbio->promote || -+ crc_is_compressed(rbio->pick.crc) || -+ bch2_csum_type_is_encryption(rbio->pick.crc.csum_type)) -+ context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq; -+ else if (rbio->pick.crc.csum_type) -+ context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq; -+ -+ bch2_rbio_punt(rbio, __bch2_read_endio, context, wq); -+} -+ -+int __bch2_read_indirect_extent(struct btree_trans *trans, -+ unsigned *offset_into_extent, -+ struct bkey_buf *orig_k) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ u64 reflink_offset; -+ int ret; -+ -+ reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) + -+ *offset_into_extent; -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink, -+ POS(0, reflink_offset), 0); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (k.k->type != KEY_TYPE_reflink_v && -+ k.k->type != KEY_TYPE_indirect_inline_data) { -+ bch_err_inum_offset_ratelimited(trans->c, -+ orig_k->k->k.p.inode, -+ orig_k->k->k.p.offset << 9, -+ "%llu len %u points to nonexistent indirect extent %llu", -+ orig_k->k->k.p.offset, -+ orig_k->k->k.size, -+ reflink_offset); -+ bch2_inconsistent_error(trans->c); -+ ret = -EIO; -+ goto err; -+ } -+ -+ *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); -+ bch2_bkey_buf_reassemble(orig_k, trans->c, k); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, -+ struct bkey_s_c k, -+ struct bch_extent_ptr ptr) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev); -+ struct btree_iter iter; -+ struct printbuf buf = PRINTBUF; -+ int ret; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, -+ PTR_BUCKET_POS(c, &ptr), -+ BTREE_ITER_CACHED); -+ -+ prt_printf(&buf, "Attempting to read from stale dirty pointer:"); -+ printbuf_indent_add(&buf, 2); -+ prt_newline(&buf); -+ -+ bch2_bkey_val_to_text(&buf, c, k); -+ prt_newline(&buf); -+ -+ prt_printf(&buf, "memory gen: %u", *bucket_gen(ca, iter.pos.offset)); -+ -+ ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); -+ if (!ret) { -+ prt_newline(&buf); -+ bch2_bkey_val_to_text(&buf, c, k); -+ } -+ -+ bch2_fs_inconsistent(c, "%s", buf.buf); -+ -+ bch2_trans_iter_exit(trans, &iter); -+ printbuf_exit(&buf); -+} -+ -+int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, -+ struct bvec_iter iter, struct bpos read_pos, -+ enum btree_id data_btree, struct bkey_s_c k, -+ unsigned offset_into_extent, -+ struct bch_io_failures *failed, unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct extent_ptr_decoded pick; -+ struct bch_read_bio *rbio = NULL; -+ struct bch_dev *ca = NULL; -+ struct promote_op *promote = NULL; -+ bool bounce = false, read_full = false, narrow_crcs = false; -+ struct bpos data_pos = bkey_start_pos(k.k); -+ int pick_ret; -+ -+ if (bkey_extent_is_inline_data(k.k)) { -+ unsigned bytes = min_t(unsigned, iter.bi_size, -+ bkey_inline_data_bytes(k.k)); -+ -+ swap(iter.bi_size, bytes); -+ memcpy_to_bio(&orig->bio, iter, bkey_inline_data_p(k)); -+ swap(iter.bi_size, bytes); -+ bio_advance_iter(&orig->bio, &iter, bytes); -+ zero_fill_bio_iter(&orig->bio, iter); -+ goto out_read_done; -+ } -+retry_pick: -+ pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); -+ -+ /* hole or reservation - just zero fill: */ -+ if (!pick_ret) -+ goto hole; -+ -+ if (pick_ret < 0) { -+ bch_err_inum_offset_ratelimited(c, -+ read_pos.inode, read_pos.offset << 9, -+ "no device to read from"); -+ goto err; -+ } -+ -+ ca = bch_dev_bkey_exists(c, pick.ptr.dev); -+ -+ /* -+ * Stale dirty pointers are treated as IO errors, but @failed isn't -+ * allocated unless we're in the retry path - so if we're not in the -+ * retry path, don't check here, it'll be caught in bch2_read_endio() -+ * and we'll end up in the retry path: -+ */ -+ if ((flags & BCH_READ_IN_RETRY) && -+ !pick.ptr.cached && -+ unlikely(ptr_stale(ca, &pick.ptr))) { -+ read_from_stale_dirty_pointer(trans, k, pick.ptr); -+ bch2_mark_io_failure(failed, &pick); -+ goto retry_pick; -+ } -+ -+ /* -+ * Unlock the iterator while the btree node's lock is still in -+ * cache, before doing the IO: -+ */ -+ bch2_trans_unlock(trans); -+ -+ if (flags & BCH_READ_NODECODE) { -+ /* -+ * can happen if we retry, and the extent we were going to read -+ * has been merged in the meantime: -+ */ -+ if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS) -+ goto hole; -+ -+ iter.bi_size = pick.crc.compressed_size << 9; -+ goto get_bio; -+ } -+ -+ if (!(flags & BCH_READ_LAST_FRAGMENT) || -+ bio_flagged(&orig->bio, BIO_CHAIN)) -+ flags |= BCH_READ_MUST_CLONE; -+ -+ narrow_crcs = !(flags & BCH_READ_IN_RETRY) && -+ bch2_can_narrow_extent_crcs(k, pick.crc); -+ -+ if (narrow_crcs && (flags & BCH_READ_USER_MAPPED)) -+ flags |= BCH_READ_MUST_BOUNCE; -+ -+ EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); -+ -+ if (crc_is_compressed(pick.crc) || -+ (pick.crc.csum_type != BCH_CSUM_none && -+ (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || -+ (bch2_csum_type_is_encryption(pick.crc.csum_type) && -+ (flags & BCH_READ_USER_MAPPED)) || -+ (flags & BCH_READ_MUST_BOUNCE)))) { -+ read_full = true; -+ bounce = true; -+ } -+ -+ if (orig->opts.promote_target) -+ promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags, -+ &rbio, &bounce, &read_full); -+ -+ if (!read_full) { -+ EBUG_ON(crc_is_compressed(pick.crc)); -+ EBUG_ON(pick.crc.csum_type && -+ (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || -+ bvec_iter_sectors(iter) != pick.crc.live_size || -+ pick.crc.offset || -+ offset_into_extent)); -+ -+ data_pos.offset += offset_into_extent; -+ pick.ptr.offset += pick.crc.offset + -+ offset_into_extent; -+ offset_into_extent = 0; -+ pick.crc.compressed_size = bvec_iter_sectors(iter); -+ pick.crc.uncompressed_size = bvec_iter_sectors(iter); -+ pick.crc.offset = 0; -+ pick.crc.live_size = bvec_iter_sectors(iter); -+ } -+get_bio: -+ if (rbio) { -+ /* -+ * promote already allocated bounce rbio: -+ * promote needs to allocate a bio big enough for uncompressing -+ * data in the write path, but we're not going to use it all -+ * here: -+ */ -+ EBUG_ON(rbio->bio.bi_iter.bi_size < -+ pick.crc.compressed_size << 9); -+ rbio->bio.bi_iter.bi_size = -+ pick.crc.compressed_size << 9; -+ } else if (bounce) { -+ unsigned sectors = pick.crc.compressed_size; -+ -+ rbio = rbio_init(bio_alloc_bioset(NULL, -+ DIV_ROUND_UP(sectors, PAGE_SECTORS), -+ 0, -+ GFP_NOFS, -+ &c->bio_read_split), -+ orig->opts); -+ -+ bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9); -+ rbio->bounce = true; -+ rbio->split = true; -+ } else if (flags & BCH_READ_MUST_CLONE) { -+ /* -+ * Have to clone if there were any splits, due to error -+ * reporting issues (if a split errored, and retrying didn't -+ * work, when it reports the error to its parent (us) we don't -+ * know if the error was from our bio, and we should retry, or -+ * from the whole bio, in which case we don't want to retry and -+ * lose the error) -+ */ -+ rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS, -+ &c->bio_read_split), -+ orig->opts); -+ rbio->bio.bi_iter = iter; -+ rbio->split = true; -+ } else { -+ rbio = orig; -+ rbio->bio.bi_iter = iter; -+ EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN)); -+ } -+ -+ EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size); -+ -+ rbio->c = c; -+ rbio->submit_time = local_clock(); -+ if (rbio->split) -+ rbio->parent = orig; -+ else -+ rbio->end_io = orig->bio.bi_end_io; -+ rbio->bvec_iter = iter; -+ rbio->offset_into_extent= offset_into_extent; -+ rbio->flags = flags; -+ rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ); -+ rbio->narrow_crcs = narrow_crcs; -+ rbio->hole = 0; -+ rbio->retry = 0; -+ rbio->context = 0; -+ /* XXX: only initialize this if needed */ -+ rbio->devs_have = bch2_bkey_devs(k); -+ rbio->pick = pick; -+ rbio->subvol = orig->subvol; -+ rbio->read_pos = read_pos; -+ rbio->data_btree = data_btree; -+ rbio->data_pos = data_pos; -+ rbio->version = k.k->version; -+ rbio->promote = promote; -+ INIT_WORK(&rbio->work, NULL); -+ -+ rbio->bio.bi_opf = orig->bio.bi_opf; -+ rbio->bio.bi_iter.bi_sector = pick.ptr.offset; -+ rbio->bio.bi_end_io = bch2_read_endio; -+ -+ if (rbio->bounce) -+ trace_and_count(c, read_bounce, &rbio->bio); -+ -+ this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio)); -+ bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); -+ -+ /* -+ * If it's being moved internally, we don't want to flag it as a cache -+ * hit: -+ */ -+ if (pick.ptr.cached && !(flags & BCH_READ_NODECODE)) -+ bch2_bucket_io_time_reset(trans, pick.ptr.dev, -+ PTR_BUCKET_NR(ca, &pick.ptr), READ); -+ -+ if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) { -+ bio_inc_remaining(&orig->bio); -+ trace_and_count(c, read_split, &orig->bio); -+ } -+ -+ if (!rbio->pick.idx) { -+ if (!rbio->have_ioref) { -+ bch_err_inum_offset_ratelimited(c, -+ read_pos.inode, -+ read_pos.offset << 9, -+ "no device to read from"); -+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); -+ goto out; -+ } -+ -+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user], -+ bio_sectors(&rbio->bio)); -+ bio_set_dev(&rbio->bio, ca->disk_sb.bdev); -+ -+ if (unlikely(c->opts.no_data_io)) { -+ if (likely(!(flags & BCH_READ_IN_RETRY))) -+ bio_endio(&rbio->bio); -+ } else { -+ if (likely(!(flags & BCH_READ_IN_RETRY))) -+ submit_bio(&rbio->bio); -+ else -+ submit_bio_wait(&rbio->bio); -+ } -+ -+ /* -+ * We just submitted IO which may block, we expect relock fail -+ * events and shouldn't count them: -+ */ -+ trans->notrace_relock_fail = true; -+ } else { -+ /* Attempting reconstruct read: */ -+ if (bch2_ec_read_extent(trans, rbio)) { -+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); -+ goto out; -+ } -+ -+ if (likely(!(flags & BCH_READ_IN_RETRY))) -+ bio_endio(&rbio->bio); -+ } -+out: -+ if (likely(!(flags & BCH_READ_IN_RETRY))) { -+ return 0; -+ } else { -+ int ret; -+ -+ rbio->context = RBIO_CONTEXT_UNBOUND; -+ bch2_read_endio(&rbio->bio); -+ -+ ret = rbio->retry; -+ rbio = bch2_rbio_free(rbio); -+ -+ if (ret == READ_RETRY_AVOID) { -+ bch2_mark_io_failure(failed, &pick); -+ ret = READ_RETRY; -+ } -+ -+ if (!ret) -+ goto out_read_done; -+ -+ return ret; -+ } -+ -+err: -+ if (flags & BCH_READ_IN_RETRY) -+ return READ_ERR; -+ -+ orig->bio.bi_status = BLK_STS_IOERR; -+ goto out_read_done; -+ -+hole: -+ /* -+ * won't normally happen in the BCH_READ_NODECODE -+ * (bch2_move_extent()) path, but if we retry and the extent we wanted -+ * to read no longer exists we have to signal that: -+ */ -+ if (flags & BCH_READ_NODECODE) -+ orig->hole = true; -+ -+ zero_fill_bio_iter(&orig->bio, iter); -+out_read_done: -+ if (flags & BCH_READ_LAST_FRAGMENT) -+ bch2_rbio_done(orig); -+ return 0; -+} -+ -+void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, -+ struct bvec_iter bvec_iter, subvol_inum inum, -+ struct bch_io_failures *failed, unsigned flags) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_buf sk; -+ struct bkey_s_c k; -+ u32 snapshot; -+ int ret; -+ -+ BUG_ON(flags & BCH_READ_NODECODE); -+ -+ bch2_bkey_buf_init(&sk); -+retry: -+ bch2_trans_begin(trans); -+ iter = (struct btree_iter) { NULL }; -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ goto err; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -+ SPOS(inum.inum, bvec_iter.bi_sector, snapshot), -+ BTREE_ITER_SLOTS); -+ while (1) { -+ unsigned bytes, sectors, offset_into_extent; -+ enum btree_id data_btree = BTREE_ID_extents; -+ -+ /* -+ * read_extent -> io_time_reset may cause a transaction restart -+ * without returning an error, we need to check for that here: -+ */ -+ ret = bch2_trans_relock(trans); -+ if (ret) -+ break; -+ -+ bch2_btree_iter_set_pos(&iter, -+ POS(inum.inum, bvec_iter.bi_sector)); -+ -+ k = bch2_btree_iter_peek_slot(&iter); -+ ret = bkey_err(k); -+ if (ret) -+ break; -+ -+ offset_into_extent = iter.pos.offset - -+ bkey_start_offset(k.k); -+ sectors = k.k->size - offset_into_extent; -+ -+ bch2_bkey_buf_reassemble(&sk, c, k); -+ -+ ret = bch2_read_indirect_extent(trans, &data_btree, -+ &offset_into_extent, &sk); -+ if (ret) -+ break; -+ -+ k = bkey_i_to_s_c(sk.k); -+ -+ /* -+ * With indirect extents, the amount of data to read is the min -+ * of the original extent and the indirect extent: -+ */ -+ sectors = min(sectors, k.k->size - offset_into_extent); -+ -+ bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; -+ swap(bvec_iter.bi_size, bytes); -+ -+ if (bvec_iter.bi_size == bytes) -+ flags |= BCH_READ_LAST_FRAGMENT; -+ -+ ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos, -+ data_btree, k, -+ offset_into_extent, failed, flags); -+ if (ret) -+ break; -+ -+ if (flags & BCH_READ_LAST_FRAGMENT) -+ break; -+ -+ swap(bvec_iter.bi_size, bytes); -+ bio_advance_iter(&rbio->bio, &bvec_iter, bytes); -+ -+ ret = btree_trans_too_many_iters(trans); -+ if (ret) -+ break; -+ } -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || -+ ret == READ_RETRY || -+ ret == READ_RETRY_AVOID) -+ goto retry; -+ -+ bch2_trans_put(trans); -+ bch2_bkey_buf_exit(&sk, c); -+ -+ if (ret) { -+ bch_err_inum_offset_ratelimited(c, inum.inum, -+ bvec_iter.bi_sector << 9, -+ "read error %i from btree lookup", ret); -+ rbio->bio.bi_status = BLK_STS_IOERR; -+ bch2_rbio_done(rbio); -+ } -+} -+ -+void bch2_fs_io_read_exit(struct bch_fs *c) -+{ -+ if (c->promote_table.tbl) -+ rhashtable_destroy(&c->promote_table); -+ bioset_exit(&c->bio_read_split); -+ bioset_exit(&c->bio_read); -+} -+ -+int bch2_fs_io_read_init(struct bch_fs *c) -+{ -+ if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio), -+ BIOSET_NEED_BVECS)) -+ return -BCH_ERR_ENOMEM_bio_read_init; -+ -+ if (bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio), -+ BIOSET_NEED_BVECS)) -+ return -BCH_ERR_ENOMEM_bio_read_split_init; -+ -+ if (rhashtable_init(&c->promote_table, &bch_promote_params)) -+ return -BCH_ERR_ENOMEM_promote_table_init; -+ -+ return 0; -+} -diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h -new file mode 100644 -index 000000000000..d9c18bb7d403 ---- /dev/null -+++ b/fs/bcachefs/io_read.h -@@ -0,0 +1,158 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_IO_READ_H -+#define _BCACHEFS_IO_READ_H -+ -+#include "bkey_buf.h" -+ -+struct bch_read_bio { -+ struct bch_fs *c; -+ u64 start_time; -+ u64 submit_time; -+ -+ /* -+ * Reads will often have to be split, and if the extent being read from -+ * was checksummed or compressed we'll also have to allocate bounce -+ * buffers and copy the data back into the original bio. -+ * -+ * If we didn't have to split, we have to save and restore the original -+ * bi_end_io - @split below indicates which: -+ */ -+ union { -+ struct bch_read_bio *parent; -+ bio_end_io_t *end_io; -+ }; -+ -+ /* -+ * Saved copy of bio->bi_iter, from submission time - allows us to -+ * resubmit on IO error, and also to copy data back to the original bio -+ * when we're bouncing: -+ */ -+ struct bvec_iter bvec_iter; -+ -+ unsigned offset_into_extent; -+ -+ u16 flags; -+ union { -+ struct { -+ u16 bounce:1, -+ split:1, -+ kmalloc:1, -+ have_ioref:1, -+ narrow_crcs:1, -+ hole:1, -+ retry:2, -+ context:2; -+ }; -+ u16 _state; -+ }; -+ -+ struct bch_devs_list devs_have; -+ -+ struct extent_ptr_decoded pick; -+ -+ /* -+ * pos we read from - different from data_pos for indirect extents: -+ */ -+ u32 subvol; -+ struct bpos read_pos; -+ -+ /* -+ * start pos of data we read (may not be pos of data we want) - for -+ * promote, narrow extents paths: -+ */ -+ enum btree_id data_btree; -+ struct bpos data_pos; -+ struct bversion version; -+ -+ struct promote_op *promote; -+ -+ struct bch_io_opts opts; -+ -+ struct work_struct work; -+ -+ struct bio bio; -+}; -+ -+#define to_rbio(_bio) container_of((_bio), struct bch_read_bio, bio) -+ -+struct bch_devs_mask; -+struct cache_promote_op; -+struct extent_ptr_decoded; -+ -+int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, -+ struct bkey_buf *); -+ -+static inline int bch2_read_indirect_extent(struct btree_trans *trans, -+ enum btree_id *data_btree, -+ unsigned *offset_into_extent, -+ struct bkey_buf *k) -+{ -+ if (k->k->k.type != KEY_TYPE_reflink_p) -+ return 0; -+ -+ *data_btree = BTREE_ID_reflink; -+ return __bch2_read_indirect_extent(trans, offset_into_extent, k); -+} -+ -+enum bch_read_flags { -+ BCH_READ_RETRY_IF_STALE = 1 << 0, -+ BCH_READ_MAY_PROMOTE = 1 << 1, -+ BCH_READ_USER_MAPPED = 1 << 2, -+ BCH_READ_NODECODE = 1 << 3, -+ BCH_READ_LAST_FRAGMENT = 1 << 4, -+ -+ /* internal: */ -+ BCH_READ_MUST_BOUNCE = 1 << 5, -+ BCH_READ_MUST_CLONE = 1 << 6, -+ BCH_READ_IN_RETRY = 1 << 7, -+}; -+ -+int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *, -+ struct bvec_iter, struct bpos, enum btree_id, -+ struct bkey_s_c, unsigned, -+ struct bch_io_failures *, unsigned); -+ -+static inline void bch2_read_extent(struct btree_trans *trans, -+ struct bch_read_bio *rbio, struct bpos read_pos, -+ enum btree_id data_btree, struct bkey_s_c k, -+ unsigned offset_into_extent, unsigned flags) -+{ -+ __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos, -+ data_btree, k, offset_into_extent, NULL, flags); -+} -+ -+void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter, -+ subvol_inum, struct bch_io_failures *, unsigned flags); -+ -+static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, -+ subvol_inum inum) -+{ -+ struct bch_io_failures failed = { .nr = 0 }; -+ -+ BUG_ON(rbio->_state); -+ -+ rbio->c = c; -+ rbio->start_time = local_clock(); -+ rbio->subvol = inum.subvol; -+ -+ __bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed, -+ BCH_READ_RETRY_IF_STALE| -+ BCH_READ_MAY_PROMOTE| -+ BCH_READ_USER_MAPPED); -+} -+ -+static inline struct bch_read_bio *rbio_init(struct bio *bio, -+ struct bch_io_opts opts) -+{ -+ struct bch_read_bio *rbio = to_rbio(bio); -+ -+ rbio->_state = 0; -+ rbio->promote = NULL; -+ rbio->opts = opts; -+ return rbio; -+} -+ -+void bch2_fs_io_read_exit(struct bch_fs *); -+int bch2_fs_io_read_init(struct bch_fs *); -+ -+#endif /* _BCACHEFS_IO_READ_H */ -diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c -new file mode 100644 -index 000000000000..f02b3f7d26a0 ---- /dev/null -+++ b/fs/bcachefs/io_write.c -@@ -0,0 +1,1675 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright 2010, 2011 Kent Overstreet -+ * Copyright 2012 Google, Inc. -+ */ -+ -+#include "bcachefs.h" -+#include "alloc_foreground.h" -+#include "bkey_buf.h" -+#include "bset.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "checksum.h" -+#include "clock.h" -+#include "compress.h" -+#include "debug.h" -+#include "ec.h" -+#include "error.h" -+#include "extent_update.h" -+#include "inode.h" -+#include "io_write.h" -+#include "journal.h" -+#include "keylist.h" -+#include "move.h" -+#include "nocow_locking.h" -+#include "rebalance.h" -+#include "subvolume.h" -+#include "super.h" -+#include "super-io.h" -+#include "trace.h" -+ -+#include -+#include -+#include -+#include -+ -+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT -+ -+static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency, -+ u64 now, int rw) -+{ -+ u64 latency_capable = -+ ca->io_latency[rw].quantiles.entries[QUANTILE_IDX(1)].m; -+ /* ideally we'd be taking into account the device's variance here: */ -+ u64 latency_threshold = latency_capable << (rw == READ ? 2 : 3); -+ s64 latency_over = io_latency - latency_threshold; -+ -+ if (latency_threshold && latency_over > 0) { -+ /* -+ * bump up congested by approximately latency_over * 4 / -+ * latency_threshold - we don't need much accuracy here so don't -+ * bother with the divide: -+ */ -+ if (atomic_read(&ca->congested) < CONGESTED_MAX) -+ atomic_add(latency_over >> -+ max_t(int, ilog2(latency_threshold) - 2, 0), -+ &ca->congested); -+ -+ ca->congested_last = now; -+ } else if (atomic_read(&ca->congested) > 0) { -+ atomic_dec(&ca->congested); -+ } -+} -+ -+void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) -+{ -+ atomic64_t *latency = &ca->cur_latency[rw]; -+ u64 now = local_clock(); -+ u64 io_latency = time_after64(now, submit_time) -+ ? now - submit_time -+ : 0; -+ u64 old, new, v = atomic64_read(latency); -+ -+ do { -+ old = v; -+ -+ /* -+ * If the io latency was reasonably close to the current -+ * latency, skip doing the update and atomic operation - most of -+ * the time: -+ */ -+ if (abs((int) (old - io_latency)) < (old >> 1) && -+ now & ~(~0U << 5)) -+ break; -+ -+ new = ewma_add(old, io_latency, 5); -+ } while ((v = atomic64_cmpxchg(latency, old, new)) != old); -+ -+ bch2_congested_acct(ca, io_latency, now, rw); -+ -+ __bch2_time_stats_update(&ca->io_latency[rw], submit_time, now); -+} -+ -+#endif -+ -+/* Allocate, free from mempool: */ -+ -+void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio) -+{ -+ struct bvec_iter_all iter; -+ struct bio_vec *bv; -+ -+ bio_for_each_segment_all(bv, bio, iter) -+ if (bv->bv_page != ZERO_PAGE(0)) -+ mempool_free(bv->bv_page, &c->bio_bounce_pages); -+ bio->bi_vcnt = 0; -+} -+ -+static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool) -+{ -+ struct page *page; -+ -+ if (likely(!*using_mempool)) { -+ page = alloc_page(GFP_NOFS); -+ if (unlikely(!page)) { -+ mutex_lock(&c->bio_bounce_pages_lock); -+ *using_mempool = true; -+ goto pool_alloc; -+ -+ } -+ } else { -+pool_alloc: -+ page = mempool_alloc(&c->bio_bounce_pages, GFP_NOFS); -+ } -+ -+ return page; -+} -+ -+void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio, -+ size_t size) -+{ -+ bool using_mempool = false; -+ -+ while (size) { -+ struct page *page = __bio_alloc_page_pool(c, &using_mempool); -+ unsigned len = min_t(size_t, PAGE_SIZE, size); -+ -+ BUG_ON(!bio_add_page(bio, page, len, 0)); -+ size -= len; -+ } -+ -+ if (using_mempool) -+ mutex_unlock(&c->bio_bounce_pages_lock); -+} -+ -+/* Extent update path: */ -+ -+int bch2_sum_sector_overwrites(struct btree_trans *trans, -+ struct btree_iter *extent_iter, -+ struct bkey_i *new, -+ bool *usage_increasing, -+ s64 *i_sectors_delta, -+ s64 *disk_sectors_delta) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_s_c old; -+ unsigned new_replicas = bch2_bkey_replicas(c, bkey_i_to_s_c(new)); -+ bool new_compressed = bch2_bkey_sectors_compressed(bkey_i_to_s_c(new)); -+ int ret = 0; -+ -+ *usage_increasing = false; -+ *i_sectors_delta = 0; -+ *disk_sectors_delta = 0; -+ -+ bch2_trans_copy_iter(&iter, extent_iter); -+ -+ for_each_btree_key_upto_continue_norestart(iter, -+ new->k.p, BTREE_ITER_SLOTS, old, ret) { -+ s64 sectors = min(new->k.p.offset, old.k->p.offset) - -+ max(bkey_start_offset(&new->k), -+ bkey_start_offset(old.k)); -+ -+ *i_sectors_delta += sectors * -+ (bkey_extent_is_allocation(&new->k) - -+ bkey_extent_is_allocation(old.k)); -+ -+ *disk_sectors_delta += sectors * bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new)); -+ *disk_sectors_delta -= new->k.p.snapshot == old.k->p.snapshot -+ ? sectors * bch2_bkey_nr_ptrs_fully_allocated(old) -+ : 0; -+ -+ if (!*usage_increasing && -+ (new->k.p.snapshot != old.k->p.snapshot || -+ new_replicas > bch2_bkey_replicas(c, old) || -+ (!new_compressed && bch2_bkey_sectors_compressed(old)))) -+ *usage_increasing = true; -+ -+ if (bkey_ge(old.k->p, new->k.p)) -+ break; -+ } -+ -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, -+ struct btree_iter *extent_iter, -+ u64 new_i_size, -+ s64 i_sectors_delta) -+{ -+ struct btree_iter iter; -+ struct bkey_i *k; -+ struct bkey_i_inode_v3 *inode; -+ /* -+ * Crazy performance optimization: -+ * Every extent update needs to also update the inode: the inode trigger -+ * will set bi->journal_seq to the journal sequence number of this -+ * transaction - for fsync. -+ * -+ * But if that's the only reason we're updating the inode (we're not -+ * updating bi_size or bi_sectors), then we don't need the inode update -+ * to be journalled - if we crash, the bi_journal_seq update will be -+ * lost, but that's fine. -+ */ -+ unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL; -+ int ret; -+ -+ k = bch2_bkey_get_mut_noupdate(trans, &iter, BTREE_ID_inodes, -+ SPOS(0, -+ extent_iter->pos.inode, -+ extent_iter->snapshot), -+ BTREE_ITER_CACHED); -+ ret = PTR_ERR_OR_ZERO(k); -+ if (unlikely(ret)) -+ return ret; -+ -+ if (unlikely(k->k.type != KEY_TYPE_inode_v3)) { -+ k = bch2_inode_to_v3(trans, k); -+ ret = PTR_ERR_OR_ZERO(k); -+ if (unlikely(ret)) -+ goto err; -+ } -+ -+ inode = bkey_i_to_inode_v3(k); -+ -+ if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_i_size_dirty) && -+ new_i_size > le64_to_cpu(inode->v.bi_size)) { -+ inode->v.bi_size = cpu_to_le64(new_i_size); -+ inode_update_flags = 0; -+ } -+ -+ if (i_sectors_delta) { -+ le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta); -+ inode_update_flags = 0; -+ } -+ -+ if (inode->k.p.snapshot != iter.snapshot) { -+ inode->k.p.snapshot = iter.snapshot; -+ inode_update_flags = 0; -+ } -+ -+ ret = bch2_trans_update(trans, &iter, &inode->k_i, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| -+ inode_update_flags); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int bch2_extent_update(struct btree_trans *trans, -+ subvol_inum inum, -+ struct btree_iter *iter, -+ struct bkey_i *k, -+ struct disk_reservation *disk_res, -+ u64 new_i_size, -+ s64 *i_sectors_delta_total, -+ bool check_enospc) -+{ -+ struct bpos next_pos; -+ bool usage_increasing; -+ s64 i_sectors_delta = 0, disk_sectors_delta = 0; -+ int ret; -+ -+ /* -+ * This traverses us the iterator without changing iter->path->pos to -+ * search_key() (which is pos + 1 for extents): we want there to be a -+ * path already traversed at iter->pos because -+ * bch2_trans_extent_update() will use it to attempt extent merging -+ */ -+ ret = __bch2_btree_iter_traverse(iter); -+ if (ret) -+ return ret; -+ -+ ret = bch2_extent_trim_atomic(trans, iter, k); -+ if (ret) -+ return ret; -+ -+ next_pos = k->k.p; -+ -+ ret = bch2_sum_sector_overwrites(trans, iter, k, -+ &usage_increasing, -+ &i_sectors_delta, -+ &disk_sectors_delta); -+ if (ret) -+ return ret; -+ -+ if (disk_res && -+ disk_sectors_delta > (s64) disk_res->sectors) { -+ ret = bch2_disk_reservation_add(trans->c, disk_res, -+ disk_sectors_delta - disk_res->sectors, -+ !check_enospc || !usage_increasing -+ ? BCH_DISK_RESERVATION_NOFAIL : 0); -+ if (ret) -+ return ret; -+ } -+ -+ /* -+ * Note: -+ * We always have to do an inode update - even when i_size/i_sectors -+ * aren't changing - for fsync to work properly; fsync relies on -+ * inode->bi_journal_seq which is updated by the trigger code: -+ */ -+ ret = bch2_extent_update_i_size_sectors(trans, iter, -+ min(k->k.p.offset << 9, new_i_size), -+ i_sectors_delta) ?: -+ bch2_trans_update(trans, iter, k, 0) ?: -+ bch2_trans_commit(trans, disk_res, NULL, -+ BTREE_INSERT_NOCHECK_RW| -+ BTREE_INSERT_NOFAIL); -+ if (unlikely(ret)) -+ return ret; -+ -+ if (i_sectors_delta_total) -+ *i_sectors_delta_total += i_sectors_delta; -+ bch2_btree_iter_set_pos(iter, next_pos); -+ return 0; -+} -+ -+static int bch2_write_index_default(struct bch_write_op *op) -+{ -+ struct bch_fs *c = op->c; -+ struct bkey_buf sk; -+ struct keylist *keys = &op->insert_keys; -+ struct bkey_i *k = bch2_keylist_front(keys); -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ subvol_inum inum = { -+ .subvol = op->subvol, -+ .inum = k->k.p.inode, -+ }; -+ int ret; -+ -+ BUG_ON(!inum.subvol); -+ -+ bch2_bkey_buf_init(&sk); -+ -+ do { -+ bch2_trans_begin(trans); -+ -+ k = bch2_keylist_front(keys); -+ bch2_bkey_buf_copy(&sk, c, k); -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, -+ &sk.k->k.p.snapshot); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret) -+ break; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -+ bkey_start_pos(&sk.k->k), -+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT); -+ -+ ret = bch2_bkey_set_needs_rebalance(c, sk.k, -+ op->opts.background_target, -+ op->opts.background_compression) ?: -+ bch2_extent_update(trans, inum, &iter, sk.k, -+ &op->res, -+ op->new_i_size, &op->i_sectors_delta, -+ op->flags & BCH_WRITE_CHECK_ENOSPC); -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret) -+ break; -+ -+ if (bkey_ge(iter.pos, k->k.p)) -+ bch2_keylist_pop_front(&op->insert_keys); -+ else -+ bch2_cut_front(iter.pos, k); -+ } while (!bch2_keylist_empty(keys)); -+ -+ bch2_trans_put(trans); -+ bch2_bkey_buf_exit(&sk, c); -+ -+ return ret; -+} -+ -+/* Writes */ -+ -+void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, -+ enum bch_data_type type, -+ const struct bkey_i *k, -+ bool nocow) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k)); -+ const struct bch_extent_ptr *ptr; -+ struct bch_write_bio *n; -+ struct bch_dev *ca; -+ -+ BUG_ON(c->opts.nochanges); -+ -+ bkey_for_each_ptr(ptrs, ptr) { -+ BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX || -+ !c->devs[ptr->dev]); -+ -+ ca = bch_dev_bkey_exists(c, ptr->dev); -+ -+ if (to_entry(ptr + 1) < ptrs.end) { -+ n = to_wbio(bio_alloc_clone(NULL, &wbio->bio, -+ GFP_NOFS, &ca->replica_set)); -+ -+ n->bio.bi_end_io = wbio->bio.bi_end_io; -+ n->bio.bi_private = wbio->bio.bi_private; -+ n->parent = wbio; -+ n->split = true; -+ n->bounce = false; -+ n->put_bio = true; -+ n->bio.bi_opf = wbio->bio.bi_opf; -+ bio_inc_remaining(&wbio->bio); -+ } else { -+ n = wbio; -+ n->split = false; -+ } -+ -+ n->c = c; -+ n->dev = ptr->dev; -+ n->have_ioref = nocow || bch2_dev_get_ioref(ca, -+ type == BCH_DATA_btree ? READ : WRITE); -+ n->nocow = nocow; -+ n->submit_time = local_clock(); -+ n->inode_offset = bkey_start_offset(&k->k); -+ n->bio.bi_iter.bi_sector = ptr->offset; -+ -+ if (likely(n->have_ioref)) { -+ this_cpu_add(ca->io_done->sectors[WRITE][type], -+ bio_sectors(&n->bio)); -+ -+ bio_set_dev(&n->bio, ca->disk_sb.bdev); -+ -+ if (type != BCH_DATA_btree && unlikely(c->opts.no_data_io)) { -+ bio_endio(&n->bio); -+ continue; -+ } -+ -+ submit_bio(&n->bio); -+ } else { -+ n->bio.bi_status = BLK_STS_REMOVED; -+ bio_endio(&n->bio); -+ } -+ } -+} -+ -+static void __bch2_write(struct bch_write_op *); -+ -+static void bch2_write_done(struct closure *cl) -+{ -+ struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); -+ struct bch_fs *c = op->c; -+ -+ EBUG_ON(op->open_buckets.nr); -+ -+ bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time); -+ bch2_disk_reservation_put(c, &op->res); -+ -+ if (!(op->flags & BCH_WRITE_MOVE)) -+ bch2_write_ref_put(c, BCH_WRITE_REF_write); -+ bch2_keylist_free(&op->insert_keys, op->inline_keys); -+ -+ EBUG_ON(cl->parent); -+ closure_debug_destroy(cl); -+ if (op->end_io) -+ op->end_io(op); -+} -+ -+static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op) -+{ -+ struct keylist *keys = &op->insert_keys; -+ struct bch_extent_ptr *ptr; -+ struct bkey_i *src, *dst = keys->keys, *n; -+ -+ for (src = keys->keys; src != keys->top; src = n) { -+ n = bkey_next(src); -+ -+ if (bkey_extent_is_direct_data(&src->k)) { -+ bch2_bkey_drop_ptrs(bkey_i_to_s(src), ptr, -+ test_bit(ptr->dev, op->failed.d)); -+ -+ if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src))) -+ return -EIO; -+ } -+ -+ if (dst != src) -+ memmove_u64s_down(dst, src, src->k.u64s); -+ dst = bkey_next(dst); -+ } -+ -+ keys->top = dst; -+ return 0; -+} -+ -+/** -+ * __bch2_write_index - after a write, update index to point to new data -+ * @op: bch_write_op to process -+ */ -+static void __bch2_write_index(struct bch_write_op *op) -+{ -+ struct bch_fs *c = op->c; -+ struct keylist *keys = &op->insert_keys; -+ unsigned dev; -+ int ret = 0; -+ -+ if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) { -+ ret = bch2_write_drop_io_error_ptrs(op); -+ if (ret) -+ goto err; -+ } -+ -+ if (!bch2_keylist_empty(keys)) { -+ u64 sectors_start = keylist_sectors(keys); -+ -+ ret = !(op->flags & BCH_WRITE_MOVE) -+ ? bch2_write_index_default(op) -+ : bch2_data_update_index_update(op); -+ -+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); -+ BUG_ON(keylist_sectors(keys) && !ret); -+ -+ op->written += sectors_start - keylist_sectors(keys); -+ -+ if (ret && !bch2_err_matches(ret, EROFS)) { -+ struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); -+ -+ bch_err_inum_offset_ratelimited(c, -+ insert->k.p.inode, insert->k.p.offset << 9, -+ "write error while doing btree update: %s", -+ bch2_err_str(ret)); -+ } -+ -+ if (ret) -+ goto err; -+ } -+out: -+ /* If some a bucket wasn't written, we can't erasure code it: */ -+ for_each_set_bit(dev, op->failed.d, BCH_SB_MEMBERS_MAX) -+ bch2_open_bucket_write_error(c, &op->open_buckets, dev); -+ -+ bch2_open_buckets_put(c, &op->open_buckets); -+ return; -+err: -+ keys->top = keys->keys; -+ op->error = ret; -+ op->flags |= BCH_WRITE_DONE; -+ goto out; -+} -+ -+static inline void __wp_update_state(struct write_point *wp, enum write_point_state state) -+{ -+ if (state != wp->state) { -+ u64 now = ktime_get_ns(); -+ -+ if (wp->last_state_change && -+ time_after64(now, wp->last_state_change)) -+ wp->time[wp->state] += now - wp->last_state_change; -+ wp->state = state; -+ wp->last_state_change = now; -+ } -+} -+ -+static inline void wp_update_state(struct write_point *wp, bool running) -+{ -+ enum write_point_state state; -+ -+ state = running ? WRITE_POINT_running : -+ !list_empty(&wp->writes) ? WRITE_POINT_waiting_io -+ : WRITE_POINT_stopped; -+ -+ __wp_update_state(wp, state); -+} -+ -+static void bch2_write_index(struct closure *cl) -+{ -+ struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); -+ struct write_point *wp = op->wp; -+ struct workqueue_struct *wq = index_update_wq(op); -+ unsigned long flags; -+ -+ if ((op->flags & BCH_WRITE_DONE) && -+ (op->flags & BCH_WRITE_MOVE)) -+ bch2_bio_free_pages_pool(op->c, &op->wbio.bio); -+ -+ spin_lock_irqsave(&wp->writes_lock, flags); -+ if (wp->state == WRITE_POINT_waiting_io) -+ __wp_update_state(wp, WRITE_POINT_waiting_work); -+ list_add_tail(&op->wp_list, &wp->writes); -+ spin_unlock_irqrestore (&wp->writes_lock, flags); -+ -+ queue_work(wq, &wp->index_update_work); -+} -+ -+static inline void bch2_write_queue(struct bch_write_op *op, struct write_point *wp) -+{ -+ op->wp = wp; -+ -+ if (wp->state == WRITE_POINT_stopped) { -+ spin_lock_irq(&wp->writes_lock); -+ __wp_update_state(wp, WRITE_POINT_waiting_io); -+ spin_unlock_irq(&wp->writes_lock); -+ } -+} -+ -+void bch2_write_point_do_index_updates(struct work_struct *work) -+{ -+ struct write_point *wp = -+ container_of(work, struct write_point, index_update_work); -+ struct bch_write_op *op; -+ -+ while (1) { -+ spin_lock_irq(&wp->writes_lock); -+ op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list); -+ if (op) -+ list_del(&op->wp_list); -+ wp_update_state(wp, op != NULL); -+ spin_unlock_irq(&wp->writes_lock); -+ -+ if (!op) -+ break; -+ -+ op->flags |= BCH_WRITE_IN_WORKER; -+ -+ __bch2_write_index(op); -+ -+ if (!(op->flags & BCH_WRITE_DONE)) -+ __bch2_write(op); -+ else -+ bch2_write_done(&op->cl); -+ } -+} -+ -+static void bch2_write_endio(struct bio *bio) -+{ -+ struct closure *cl = bio->bi_private; -+ struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); -+ struct bch_write_bio *wbio = to_wbio(bio); -+ struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL; -+ struct bch_fs *c = wbio->c; -+ struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev); -+ -+ if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, -+ op->pos.inode, -+ wbio->inode_offset << 9, -+ "data write error: %s", -+ bch2_blk_status_to_str(bio->bi_status))) { -+ set_bit(wbio->dev, op->failed.d); -+ op->flags |= BCH_WRITE_IO_ERROR; -+ } -+ -+ if (wbio->nocow) -+ set_bit(wbio->dev, op->devs_need_flush->d); -+ -+ if (wbio->have_ioref) { -+ bch2_latency_acct(ca, wbio->submit_time, WRITE); -+ percpu_ref_put(&ca->io_ref); -+ } -+ -+ if (wbio->bounce) -+ bch2_bio_free_pages_pool(c, bio); -+ -+ if (wbio->put_bio) -+ bio_put(bio); -+ -+ if (parent) -+ bio_endio(&parent->bio); -+ else -+ closure_put(cl); -+} -+ -+static void init_append_extent(struct bch_write_op *op, -+ struct write_point *wp, -+ struct bversion version, -+ struct bch_extent_crc_unpacked crc) -+{ -+ struct bkey_i_extent *e; -+ -+ op->pos.offset += crc.uncompressed_size; -+ -+ e = bkey_extent_init(op->insert_keys.top); -+ e->k.p = op->pos; -+ e->k.size = crc.uncompressed_size; -+ e->k.version = version; -+ -+ if (crc.csum_type || -+ crc.compression_type || -+ crc.nonce) -+ bch2_extent_crc_append(&e->k_i, crc); -+ -+ bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size, -+ op->flags & BCH_WRITE_CACHED); -+ -+ bch2_keylist_push(&op->insert_keys); -+} -+ -+static struct bio *bch2_write_bio_alloc(struct bch_fs *c, -+ struct write_point *wp, -+ struct bio *src, -+ bool *page_alloc_failed, -+ void *buf) -+{ -+ struct bch_write_bio *wbio; -+ struct bio *bio; -+ unsigned output_available = -+ min(wp->sectors_free << 9, src->bi_iter.bi_size); -+ unsigned pages = DIV_ROUND_UP(output_available + -+ (buf -+ ? ((unsigned long) buf & (PAGE_SIZE - 1)) -+ : 0), PAGE_SIZE); -+ -+ pages = min(pages, BIO_MAX_VECS); -+ -+ bio = bio_alloc_bioset(NULL, pages, 0, -+ GFP_NOFS, &c->bio_write); -+ wbio = wbio_init(bio); -+ wbio->put_bio = true; -+ /* copy WRITE_SYNC flag */ -+ wbio->bio.bi_opf = src->bi_opf; -+ -+ if (buf) { -+ bch2_bio_map(bio, buf, output_available); -+ return bio; -+ } -+ -+ wbio->bounce = true; -+ -+ /* -+ * We can't use mempool for more than c->sb.encoded_extent_max -+ * worth of pages, but we'd like to allocate more if we can: -+ */ -+ bch2_bio_alloc_pages_pool(c, bio, -+ min_t(unsigned, output_available, -+ c->opts.encoded_extent_max)); -+ -+ if (bio->bi_iter.bi_size < output_available) -+ *page_alloc_failed = -+ bch2_bio_alloc_pages(bio, -+ output_available - -+ bio->bi_iter.bi_size, -+ GFP_NOFS) != 0; -+ -+ return bio; -+} -+ -+static int bch2_write_rechecksum(struct bch_fs *c, -+ struct bch_write_op *op, -+ unsigned new_csum_type) -+{ -+ struct bio *bio = &op->wbio.bio; -+ struct bch_extent_crc_unpacked new_crc; -+ int ret; -+ -+ /* bch2_rechecksum_bio() can't encrypt or decrypt data: */ -+ -+ if (bch2_csum_type_is_encryption(op->crc.csum_type) != -+ bch2_csum_type_is_encryption(new_csum_type)) -+ new_csum_type = op->crc.csum_type; -+ -+ ret = bch2_rechecksum_bio(c, bio, op->version, op->crc, -+ NULL, &new_crc, -+ op->crc.offset, op->crc.live_size, -+ new_csum_type); -+ if (ret) -+ return ret; -+ -+ bio_advance(bio, op->crc.offset << 9); -+ bio->bi_iter.bi_size = op->crc.live_size << 9; -+ op->crc = new_crc; -+ return 0; -+} -+ -+static int bch2_write_decrypt(struct bch_write_op *op) -+{ -+ struct bch_fs *c = op->c; -+ struct nonce nonce = extent_nonce(op->version, op->crc); -+ struct bch_csum csum; -+ int ret; -+ -+ if (!bch2_csum_type_is_encryption(op->crc.csum_type)) -+ return 0; -+ -+ /* -+ * If we need to decrypt data in the write path, we'll no longer be able -+ * to verify the existing checksum (poly1305 mac, in this case) after -+ * it's decrypted - this is the last point we'll be able to reverify the -+ * checksum: -+ */ -+ csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio); -+ if (bch2_crc_cmp(op->crc.csum, csum)) -+ return -EIO; -+ -+ ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio); -+ op->crc.csum_type = 0; -+ op->crc.csum = (struct bch_csum) { 0, 0 }; -+ return ret; -+} -+ -+static enum prep_encoded_ret { -+ PREP_ENCODED_OK, -+ PREP_ENCODED_ERR, -+ PREP_ENCODED_CHECKSUM_ERR, -+ PREP_ENCODED_DO_WRITE, -+} bch2_write_prep_encoded_data(struct bch_write_op *op, struct write_point *wp) -+{ -+ struct bch_fs *c = op->c; -+ struct bio *bio = &op->wbio.bio; -+ -+ if (!(op->flags & BCH_WRITE_DATA_ENCODED)) -+ return PREP_ENCODED_OK; -+ -+ BUG_ON(bio_sectors(bio) != op->crc.compressed_size); -+ -+ /* Can we just write the entire extent as is? */ -+ if (op->crc.uncompressed_size == op->crc.live_size && -+ op->crc.uncompressed_size <= c->opts.encoded_extent_max >> 9 && -+ op->crc.compressed_size <= wp->sectors_free && -+ (op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) || -+ op->incompressible)) { -+ if (!crc_is_compressed(op->crc) && -+ op->csum_type != op->crc.csum_type && -+ bch2_write_rechecksum(c, op, op->csum_type) && -+ !c->opts.no_data_io) -+ return PREP_ENCODED_CHECKSUM_ERR; -+ -+ return PREP_ENCODED_DO_WRITE; -+ } -+ -+ /* -+ * If the data is compressed and we couldn't write the entire extent as -+ * is, we have to decompress it: -+ */ -+ if (crc_is_compressed(op->crc)) { -+ struct bch_csum csum; -+ -+ if (bch2_write_decrypt(op)) -+ return PREP_ENCODED_CHECKSUM_ERR; -+ -+ /* Last point we can still verify checksum: */ -+ csum = bch2_checksum_bio(c, op->crc.csum_type, -+ extent_nonce(op->version, op->crc), -+ bio); -+ if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io) -+ return PREP_ENCODED_CHECKSUM_ERR; -+ -+ if (bch2_bio_uncompress_inplace(c, bio, &op->crc)) -+ return PREP_ENCODED_ERR; -+ } -+ -+ /* -+ * No longer have compressed data after this point - data might be -+ * encrypted: -+ */ -+ -+ /* -+ * If the data is checksummed and we're only writing a subset, -+ * rechecksum and adjust bio to point to currently live data: -+ */ -+ if ((op->crc.live_size != op->crc.uncompressed_size || -+ op->crc.csum_type != op->csum_type) && -+ bch2_write_rechecksum(c, op, op->csum_type) && -+ !c->opts.no_data_io) -+ return PREP_ENCODED_CHECKSUM_ERR; -+ -+ /* -+ * If we want to compress the data, it has to be decrypted: -+ */ -+ if ((op->compression_opt || -+ bch2_csum_type_is_encryption(op->crc.csum_type) != -+ bch2_csum_type_is_encryption(op->csum_type)) && -+ bch2_write_decrypt(op)) -+ return PREP_ENCODED_CHECKSUM_ERR; -+ -+ return PREP_ENCODED_OK; -+} -+ -+static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, -+ struct bio **_dst) -+{ -+ struct bch_fs *c = op->c; -+ struct bio *src = &op->wbio.bio, *dst = src; -+ struct bvec_iter saved_iter; -+ void *ec_buf; -+ unsigned total_output = 0, total_input = 0; -+ bool bounce = false; -+ bool page_alloc_failed = false; -+ int ret, more = 0; -+ -+ BUG_ON(!bio_sectors(src)); -+ -+ ec_buf = bch2_writepoint_ec_buf(c, wp); -+ -+ switch (bch2_write_prep_encoded_data(op, wp)) { -+ case PREP_ENCODED_OK: -+ break; -+ case PREP_ENCODED_ERR: -+ ret = -EIO; -+ goto err; -+ case PREP_ENCODED_CHECKSUM_ERR: -+ goto csum_err; -+ case PREP_ENCODED_DO_WRITE: -+ /* XXX look for bug here */ -+ if (ec_buf) { -+ dst = bch2_write_bio_alloc(c, wp, src, -+ &page_alloc_failed, -+ ec_buf); -+ bio_copy_data(dst, src); -+ bounce = true; -+ } -+ init_append_extent(op, wp, op->version, op->crc); -+ goto do_write; -+ } -+ -+ if (ec_buf || -+ op->compression_opt || -+ (op->csum_type && -+ !(op->flags & BCH_WRITE_PAGES_STABLE)) || -+ (bch2_csum_type_is_encryption(op->csum_type) && -+ !(op->flags & BCH_WRITE_PAGES_OWNED))) { -+ dst = bch2_write_bio_alloc(c, wp, src, -+ &page_alloc_failed, -+ ec_buf); -+ bounce = true; -+ } -+ -+ saved_iter = dst->bi_iter; -+ -+ do { -+ struct bch_extent_crc_unpacked crc = { 0 }; -+ struct bversion version = op->version; -+ size_t dst_len = 0, src_len = 0; -+ -+ if (page_alloc_failed && -+ dst->bi_iter.bi_size < (wp->sectors_free << 9) && -+ dst->bi_iter.bi_size < c->opts.encoded_extent_max) -+ break; -+ -+ BUG_ON(op->compression_opt && -+ (op->flags & BCH_WRITE_DATA_ENCODED) && -+ bch2_csum_type_is_encryption(op->crc.csum_type)); -+ BUG_ON(op->compression_opt && !bounce); -+ -+ crc.compression_type = op->incompressible -+ ? BCH_COMPRESSION_TYPE_incompressible -+ : op->compression_opt -+ ? bch2_bio_compress(c, dst, &dst_len, src, &src_len, -+ op->compression_opt) -+ : 0; -+ if (!crc_is_compressed(crc)) { -+ dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size); -+ dst_len = min_t(unsigned, dst_len, wp->sectors_free << 9); -+ -+ if (op->csum_type) -+ dst_len = min_t(unsigned, dst_len, -+ c->opts.encoded_extent_max); -+ -+ if (bounce) { -+ swap(dst->bi_iter.bi_size, dst_len); -+ bio_copy_data(dst, src); -+ swap(dst->bi_iter.bi_size, dst_len); -+ } -+ -+ src_len = dst_len; -+ } -+ -+ BUG_ON(!src_len || !dst_len); -+ -+ if (bch2_csum_type_is_encryption(op->csum_type)) { -+ if (bversion_zero(version)) { -+ version.lo = atomic64_inc_return(&c->key_version); -+ } else { -+ crc.nonce = op->nonce; -+ op->nonce += src_len >> 9; -+ } -+ } -+ -+ if ((op->flags & BCH_WRITE_DATA_ENCODED) && -+ !crc_is_compressed(crc) && -+ bch2_csum_type_is_encryption(op->crc.csum_type) == -+ bch2_csum_type_is_encryption(op->csum_type)) { -+ u8 compression_type = crc.compression_type; -+ u16 nonce = crc.nonce; -+ /* -+ * Note: when we're using rechecksum(), we need to be -+ * checksumming @src because it has all the data our -+ * existing checksum covers - if we bounced (because we -+ * were trying to compress), @dst will only have the -+ * part of the data the new checksum will cover. -+ * -+ * But normally we want to be checksumming post bounce, -+ * because part of the reason for bouncing is so the -+ * data can't be modified (by userspace) while it's in -+ * flight. -+ */ -+ if (bch2_rechecksum_bio(c, src, version, op->crc, -+ &crc, &op->crc, -+ src_len >> 9, -+ bio_sectors(src) - (src_len >> 9), -+ op->csum_type)) -+ goto csum_err; -+ /* -+ * rchecksum_bio sets compression_type on crc from op->crc, -+ * this isn't always correct as sometimes we're changing -+ * an extent from uncompressed to incompressible. -+ */ -+ crc.compression_type = compression_type; -+ crc.nonce = nonce; -+ } else { -+ if ((op->flags & BCH_WRITE_DATA_ENCODED) && -+ bch2_rechecksum_bio(c, src, version, op->crc, -+ NULL, &op->crc, -+ src_len >> 9, -+ bio_sectors(src) - (src_len >> 9), -+ op->crc.csum_type)) -+ goto csum_err; -+ -+ crc.compressed_size = dst_len >> 9; -+ crc.uncompressed_size = src_len >> 9; -+ crc.live_size = src_len >> 9; -+ -+ swap(dst->bi_iter.bi_size, dst_len); -+ ret = bch2_encrypt_bio(c, op->csum_type, -+ extent_nonce(version, crc), dst); -+ if (ret) -+ goto err; -+ -+ crc.csum = bch2_checksum_bio(c, op->csum_type, -+ extent_nonce(version, crc), dst); -+ crc.csum_type = op->csum_type; -+ swap(dst->bi_iter.bi_size, dst_len); -+ } -+ -+ init_append_extent(op, wp, version, crc); -+ -+ if (dst != src) -+ bio_advance(dst, dst_len); -+ bio_advance(src, src_len); -+ total_output += dst_len; -+ total_input += src_len; -+ } while (dst->bi_iter.bi_size && -+ src->bi_iter.bi_size && -+ wp->sectors_free && -+ !bch2_keylist_realloc(&op->insert_keys, -+ op->inline_keys, -+ ARRAY_SIZE(op->inline_keys), -+ BKEY_EXTENT_U64s_MAX)); -+ -+ more = src->bi_iter.bi_size != 0; -+ -+ dst->bi_iter = saved_iter; -+ -+ if (dst == src && more) { -+ BUG_ON(total_output != total_input); -+ -+ dst = bio_split(src, total_input >> 9, -+ GFP_NOFS, &c->bio_write); -+ wbio_init(dst)->put_bio = true; -+ /* copy WRITE_SYNC flag */ -+ dst->bi_opf = src->bi_opf; -+ } -+ -+ dst->bi_iter.bi_size = total_output; -+do_write: -+ *_dst = dst; -+ return more; -+csum_err: -+ bch_err(c, "error verifying existing checksum while rewriting existing data (memory corruption?)"); -+ ret = -EIO; -+err: -+ if (to_wbio(dst)->bounce) -+ bch2_bio_free_pages_pool(c, dst); -+ if (to_wbio(dst)->put_bio) -+ bio_put(dst); -+ -+ return ret; -+} -+ -+static bool bch2_extent_is_writeable(struct bch_write_op *op, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = op->c; -+ struct bkey_s_c_extent e; -+ struct extent_ptr_decoded p; -+ const union bch_extent_entry *entry; -+ unsigned replicas = 0; -+ -+ if (k.k->type != KEY_TYPE_extent) -+ return false; -+ -+ e = bkey_s_c_to_extent(k); -+ extent_for_each_ptr_decode(e, p, entry) { -+ if (crc_is_encoded(p.crc) || p.has_ec) -+ return false; -+ -+ replicas += bch2_extent_ptr_durability(c, &p); -+ } -+ -+ return replicas >= op->opts.data_replicas; -+} -+ -+static inline void bch2_nocow_write_unlock(struct bch_write_op *op) -+{ -+ struct bch_fs *c = op->c; -+ const struct bch_extent_ptr *ptr; -+ struct bkey_i *k; -+ -+ for_each_keylist_key(&op->insert_keys, k) { -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k)); -+ -+ bkey_for_each_ptr(ptrs, ptr) -+ bch2_bucket_nocow_unlock(&c->nocow_locks, -+ PTR_BUCKET_POS(c, ptr), -+ BUCKET_NOCOW_LOCK_UPDATE); -+ } -+} -+ -+static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_i *orig, -+ struct bkey_s_c k, -+ u64 new_i_size) -+{ -+ struct bkey_i *new; -+ struct bkey_ptrs ptrs; -+ struct bch_extent_ptr *ptr; -+ int ret; -+ -+ if (!bch2_extents_match(bkey_i_to_s_c(orig), k)) { -+ /* trace this */ -+ return 0; -+ } -+ -+ new = bch2_bkey_make_mut_noupdate(trans, k); -+ ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ return ret; -+ -+ bch2_cut_front(bkey_start_pos(&orig->k), new); -+ bch2_cut_back(orig->k.p, new); -+ -+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); -+ bkey_for_each_ptr(ptrs, ptr) -+ ptr->unwritten = 0; -+ -+ /* -+ * Note that we're not calling bch2_subvol_get_snapshot() in this path - -+ * that was done when we kicked off the write, and here it's important -+ * that we update the extent that we wrote to - even if a snapshot has -+ * since been created. The write is still outstanding, so we're ok -+ * w.r.t. snapshot atomicity: -+ */ -+ return bch2_extent_update_i_size_sectors(trans, iter, -+ min(new->k.p.offset << 9, new_i_size), 0) ?: -+ bch2_trans_update(trans, iter, new, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+} -+ -+static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) -+{ -+ struct bch_fs *c = op->c; -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_i *orig; -+ struct bkey_s_c k; -+ int ret; -+ -+ for_each_keylist_key(&op->insert_keys, orig) { -+ ret = for_each_btree_key_upto_commit(trans, iter, BTREE_ID_extents, -+ bkey_start_pos(&orig->k), orig->k.p, -+ BTREE_ITER_INTENT, k, -+ NULL, NULL, BTREE_INSERT_NOFAIL, ({ -+ bch2_nocow_write_convert_one_unwritten(trans, &iter, orig, k, op->new_i_size); -+ })); -+ -+ if (ret && !bch2_err_matches(ret, EROFS)) { -+ struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); -+ -+ bch_err_inum_offset_ratelimited(c, -+ insert->k.p.inode, insert->k.p.offset << 9, -+ "write error while doing btree update: %s", -+ bch2_err_str(ret)); -+ } -+ -+ if (ret) { -+ op->error = ret; -+ break; -+ } -+ } -+ -+ bch2_trans_put(trans); -+} -+ -+static void __bch2_nocow_write_done(struct bch_write_op *op) -+{ -+ bch2_nocow_write_unlock(op); -+ -+ if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) { -+ op->error = -EIO; -+ } else if (unlikely(op->flags & BCH_WRITE_CONVERT_UNWRITTEN)) -+ bch2_nocow_write_convert_unwritten(op); -+} -+ -+static void bch2_nocow_write_done(struct closure *cl) -+{ -+ struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); -+ -+ __bch2_nocow_write_done(op); -+ bch2_write_done(cl); -+} -+ -+static void bch2_nocow_write(struct bch_write_op *op) -+{ -+ struct bch_fs *c = op->c; -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bkey_ptrs_c ptrs; -+ const struct bch_extent_ptr *ptr; -+ struct { -+ struct bpos b; -+ unsigned gen; -+ struct nocow_lock_bucket *l; -+ } buckets[BCH_REPLICAS_MAX]; -+ unsigned nr_buckets = 0; -+ u32 snapshot; -+ int ret, i; -+ -+ if (op->flags & BCH_WRITE_MOVE) -+ return; -+ -+ trans = bch2_trans_get(c); -+retry: -+ bch2_trans_begin(trans); -+ -+ ret = bch2_subvolume_get_snapshot(trans, op->subvol, &snapshot); -+ if (unlikely(ret)) -+ goto err; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -+ SPOS(op->pos.inode, op->pos.offset, snapshot), -+ BTREE_ITER_SLOTS); -+ while (1) { -+ struct bio *bio = &op->wbio.bio; -+ -+ nr_buckets = 0; -+ -+ k = bch2_btree_iter_peek_slot(&iter); -+ ret = bkey_err(k); -+ if (ret) -+ break; -+ -+ /* fall back to normal cow write path? */ -+ if (unlikely(k.k->p.snapshot != snapshot || -+ !bch2_extent_is_writeable(op, k))) -+ break; -+ -+ if (bch2_keylist_realloc(&op->insert_keys, -+ op->inline_keys, -+ ARRAY_SIZE(op->inline_keys), -+ k.k->u64s)) -+ break; -+ -+ /* Get iorefs before dropping btree locks: */ -+ ptrs = bch2_bkey_ptrs_c(k); -+ bkey_for_each_ptr(ptrs, ptr) { -+ buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr); -+ buckets[nr_buckets].gen = ptr->gen; -+ buckets[nr_buckets].l = -+ bucket_nocow_lock(&c->nocow_locks, -+ bucket_to_u64(buckets[nr_buckets].b)); -+ -+ prefetch(buckets[nr_buckets].l); -+ -+ if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE))) -+ goto err_get_ioref; -+ -+ nr_buckets++; -+ -+ if (ptr->unwritten) -+ op->flags |= BCH_WRITE_CONVERT_UNWRITTEN; -+ } -+ -+ /* Unlock before taking nocow locks, doing IO: */ -+ bkey_reassemble(op->insert_keys.top, k); -+ bch2_trans_unlock(trans); -+ -+ bch2_cut_front(op->pos, op->insert_keys.top); -+ if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN) -+ bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top); -+ -+ for (i = 0; i < nr_buckets; i++) { -+ struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode); -+ struct nocow_lock_bucket *l = buckets[i].l; -+ bool stale; -+ -+ __bch2_bucket_nocow_lock(&c->nocow_locks, l, -+ bucket_to_u64(buckets[i].b), -+ BUCKET_NOCOW_LOCK_UPDATE); -+ -+ rcu_read_lock(); -+ stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen); -+ rcu_read_unlock(); -+ -+ if (unlikely(stale)) -+ goto err_bucket_stale; -+ } -+ -+ bio = &op->wbio.bio; -+ if (k.k->p.offset < op->pos.offset + bio_sectors(bio)) { -+ bio = bio_split(bio, k.k->p.offset - op->pos.offset, -+ GFP_KERNEL, &c->bio_write); -+ wbio_init(bio)->put_bio = true; -+ bio->bi_opf = op->wbio.bio.bi_opf; -+ } else { -+ op->flags |= BCH_WRITE_DONE; -+ } -+ -+ op->pos.offset += bio_sectors(bio); -+ op->written += bio_sectors(bio); -+ -+ bio->bi_end_io = bch2_write_endio; -+ bio->bi_private = &op->cl; -+ bio->bi_opf |= REQ_OP_WRITE; -+ closure_get(&op->cl); -+ bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user, -+ op->insert_keys.top, true); -+ -+ bch2_keylist_push(&op->insert_keys); -+ if (op->flags & BCH_WRITE_DONE) -+ break; -+ bch2_btree_iter_advance(&iter); -+ } -+out: -+ bch2_trans_iter_exit(trans, &iter); -+err: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ if (ret) { -+ bch_err_inum_offset_ratelimited(c, -+ op->pos.inode, -+ op->pos.offset << 9, -+ "%s: btree lookup error %s", -+ __func__, bch2_err_str(ret)); -+ op->error = ret; -+ op->flags |= BCH_WRITE_DONE; -+ } -+ -+ bch2_trans_put(trans); -+ -+ /* fallback to cow write path? */ -+ if (!(op->flags & BCH_WRITE_DONE)) { -+ closure_sync(&op->cl); -+ __bch2_nocow_write_done(op); -+ op->insert_keys.top = op->insert_keys.keys; -+ } else if (op->flags & BCH_WRITE_SYNC) { -+ closure_sync(&op->cl); -+ bch2_nocow_write_done(&op->cl); -+ } else { -+ /* -+ * XXX -+ * needs to run out of process context because ei_quota_lock is -+ * a mutex -+ */ -+ continue_at(&op->cl, bch2_nocow_write_done, index_update_wq(op)); -+ } -+ return; -+err_get_ioref: -+ for (i = 0; i < nr_buckets; i++) -+ percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref); -+ -+ /* Fall back to COW path: */ -+ goto out; -+err_bucket_stale: -+ while (i >= 0) { -+ bch2_bucket_nocow_unlock(&c->nocow_locks, -+ buckets[i].b, -+ BUCKET_NOCOW_LOCK_UPDATE); -+ --i; -+ } -+ for (i = 0; i < nr_buckets; i++) -+ percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref); -+ -+ /* We can retry this: */ -+ ret = -BCH_ERR_transaction_restart; -+ goto out; -+} -+ -+static void __bch2_write(struct bch_write_op *op) -+{ -+ struct bch_fs *c = op->c; -+ struct write_point *wp = NULL; -+ struct bio *bio = NULL; -+ unsigned nofs_flags; -+ int ret; -+ -+ nofs_flags = memalloc_nofs_save(); -+ -+ if (unlikely(op->opts.nocow && c->opts.nocow_enabled)) { -+ bch2_nocow_write(op); -+ if (op->flags & BCH_WRITE_DONE) -+ goto out_nofs_restore; -+ } -+again: -+ memset(&op->failed, 0, sizeof(op->failed)); -+ -+ do { -+ struct bkey_i *key_to_write; -+ unsigned key_to_write_offset = op->insert_keys.top_p - -+ op->insert_keys.keys_p; -+ -+ /* +1 for possible cache device: */ -+ if (op->open_buckets.nr + op->nr_replicas + 1 > -+ ARRAY_SIZE(op->open_buckets.v)) -+ break; -+ -+ if (bch2_keylist_realloc(&op->insert_keys, -+ op->inline_keys, -+ ARRAY_SIZE(op->inline_keys), -+ BKEY_EXTENT_U64s_MAX)) -+ break; -+ -+ /* -+ * The copygc thread is now global, which means it's no longer -+ * freeing up space on specific disks, which means that -+ * allocations for specific disks may hang arbitrarily long: -+ */ -+ ret = bch2_trans_do(c, NULL, NULL, 0, -+ bch2_alloc_sectors_start_trans(trans, -+ op->target, -+ op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED), -+ op->write_point, -+ &op->devs_have, -+ op->nr_replicas, -+ op->nr_replicas_required, -+ op->watermark, -+ op->flags, -+ (op->flags & (BCH_WRITE_ALLOC_NOWAIT| -+ BCH_WRITE_ONLY_SPECIFIED_DEVS)) -+ ? NULL : &op->cl, &wp)); -+ if (unlikely(ret)) { -+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) -+ break; -+ -+ goto err; -+ } -+ -+ EBUG_ON(!wp); -+ -+ bch2_open_bucket_get(c, wp, &op->open_buckets); -+ ret = bch2_write_extent(op, wp, &bio); -+ -+ bch2_alloc_sectors_done_inlined(c, wp); -+err: -+ if (ret <= 0) { -+ op->flags |= BCH_WRITE_DONE; -+ -+ if (ret < 0) { -+ op->error = ret; -+ break; -+ } -+ } -+ -+ bio->bi_end_io = bch2_write_endio; -+ bio->bi_private = &op->cl; -+ bio->bi_opf |= REQ_OP_WRITE; -+ -+ closure_get(bio->bi_private); -+ -+ key_to_write = (void *) (op->insert_keys.keys_p + -+ key_to_write_offset); -+ -+ bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user, -+ key_to_write, false); -+ } while (ret); -+ -+ /* -+ * Sync or no? -+ * -+ * If we're running asynchronously, wne may still want to block -+ * synchronously here if we weren't able to submit all of the IO at -+ * once, as that signals backpressure to the caller. -+ */ -+ if ((op->flags & BCH_WRITE_SYNC) || -+ (!(op->flags & BCH_WRITE_DONE) && -+ !(op->flags & BCH_WRITE_IN_WORKER))) { -+ closure_sync(&op->cl); -+ __bch2_write_index(op); -+ -+ if (!(op->flags & BCH_WRITE_DONE)) -+ goto again; -+ bch2_write_done(&op->cl); -+ } else { -+ bch2_write_queue(op, wp); -+ continue_at(&op->cl, bch2_write_index, NULL); -+ } -+out_nofs_restore: -+ memalloc_nofs_restore(nofs_flags); -+} -+ -+static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) -+{ -+ struct bio *bio = &op->wbio.bio; -+ struct bvec_iter iter; -+ struct bkey_i_inline_data *id; -+ unsigned sectors; -+ int ret; -+ -+ op->flags |= BCH_WRITE_WROTE_DATA_INLINE; -+ op->flags |= BCH_WRITE_DONE; -+ -+ bch2_check_set_feature(op->c, BCH_FEATURE_inline_data); -+ -+ ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys, -+ ARRAY_SIZE(op->inline_keys), -+ BKEY_U64s + DIV_ROUND_UP(data_len, 8)); -+ if (ret) { -+ op->error = ret; -+ goto err; -+ } -+ -+ sectors = bio_sectors(bio); -+ op->pos.offset += sectors; -+ -+ id = bkey_inline_data_init(op->insert_keys.top); -+ id->k.p = op->pos; -+ id->k.version = op->version; -+ id->k.size = sectors; -+ -+ iter = bio->bi_iter; -+ iter.bi_size = data_len; -+ memcpy_from_bio(id->v.data, bio, iter); -+ -+ while (data_len & 7) -+ id->v.data[data_len++] = '\0'; -+ set_bkey_val_bytes(&id->k, data_len); -+ bch2_keylist_push(&op->insert_keys); -+ -+ __bch2_write_index(op); -+err: -+ bch2_write_done(&op->cl); -+} -+ -+/** -+ * bch2_write() - handle a write to a cache device or flash only volume -+ * @cl: &bch_write_op->cl -+ * -+ * This is the starting point for any data to end up in a cache device; it could -+ * be from a normal write, or a writeback write, or a write to a flash only -+ * volume - it's also used by the moving garbage collector to compact data in -+ * mostly empty buckets. -+ * -+ * It first writes the data to the cache, creating a list of keys to be inserted -+ * (if the data won't fit in a single open bucket, there will be multiple keys); -+ * after the data is written it calls bch_journal, and after the keys have been -+ * added to the next journal write they're inserted into the btree. -+ * -+ * If op->discard is true, instead of inserting the data it invalidates the -+ * region of the cache represented by op->bio and op->inode. -+ */ -+void bch2_write(struct closure *cl) -+{ -+ struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); -+ struct bio *bio = &op->wbio.bio; -+ struct bch_fs *c = op->c; -+ unsigned data_len; -+ -+ EBUG_ON(op->cl.parent); -+ BUG_ON(!op->nr_replicas); -+ BUG_ON(!op->write_point.v); -+ BUG_ON(bkey_eq(op->pos, POS_MAX)); -+ -+ op->start_time = local_clock(); -+ bch2_keylist_init(&op->insert_keys, op->inline_keys); -+ wbio_init(bio)->put_bio = false; -+ -+ if (bio->bi_iter.bi_size & (c->opts.block_size - 1)) { -+ bch_err_inum_offset_ratelimited(c, -+ op->pos.inode, -+ op->pos.offset << 9, -+ "misaligned write"); -+ op->error = -EIO; -+ goto err; -+ } -+ -+ if (c->opts.nochanges) { -+ op->error = -BCH_ERR_erofs_no_writes; -+ goto err; -+ } -+ -+ if (!(op->flags & BCH_WRITE_MOVE) && -+ !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) { -+ op->error = -BCH_ERR_erofs_no_writes; -+ goto err; -+ } -+ -+ this_cpu_add(c->counters[BCH_COUNTER_io_write], bio_sectors(bio)); -+ bch2_increment_clock(c, bio_sectors(bio), WRITE); -+ -+ data_len = min_t(u64, bio->bi_iter.bi_size, -+ op->new_i_size - (op->pos.offset << 9)); -+ -+ if (c->opts.inline_data && -+ data_len <= min(block_bytes(c) / 2, 1024U)) { -+ bch2_write_data_inline(op, data_len); -+ return; -+ } -+ -+ __bch2_write(op); -+ return; -+err: -+ bch2_disk_reservation_put(c, &op->res); -+ -+ closure_debug_destroy(&op->cl); -+ if (op->end_io) -+ op->end_io(op); -+} -+ -+static const char * const bch2_write_flags[] = { -+#define x(f) #f, -+ BCH_WRITE_FLAGS() -+#undef x -+ NULL -+}; -+ -+void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op) -+{ -+ prt_str(out, "pos: "); -+ bch2_bpos_to_text(out, op->pos); -+ prt_newline(out); -+ printbuf_indent_add(out, 2); -+ -+ prt_str(out, "started: "); -+ bch2_pr_time_units(out, local_clock() - op->start_time); -+ prt_newline(out); -+ -+ prt_str(out, "flags: "); -+ prt_bitflags(out, bch2_write_flags, op->flags); -+ prt_newline(out); -+ -+ prt_printf(out, "ref: %u", closure_nr_remaining(&op->cl)); -+ prt_newline(out); -+ -+ printbuf_indent_sub(out, 2); -+} -+ -+void bch2_fs_io_write_exit(struct bch_fs *c) -+{ -+ mempool_exit(&c->bio_bounce_pages); -+ bioset_exit(&c->bio_write); -+} -+ -+int bch2_fs_io_write_init(struct bch_fs *c) -+{ -+ if (bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio), -+ BIOSET_NEED_BVECS)) -+ return -BCH_ERR_ENOMEM_bio_write_init; -+ -+ if (mempool_init_page_pool(&c->bio_bounce_pages, -+ max_t(unsigned, -+ c->opts.btree_node_size, -+ c->opts.encoded_extent_max) / -+ PAGE_SIZE, 0)) -+ return -BCH_ERR_ENOMEM_bio_bounce_pages_init; -+ -+ return 0; -+} -diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h -new file mode 100644 -index 000000000000..9323167229ee ---- /dev/null -+++ b/fs/bcachefs/io_write.h -@@ -0,0 +1,110 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_IO_WRITE_H -+#define _BCACHEFS_IO_WRITE_H -+ -+#include "checksum.h" -+#include "io_write_types.h" -+ -+#define to_wbio(_bio) \ -+ container_of((_bio), struct bch_write_bio, bio) -+ -+void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *); -+void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t); -+ -+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT -+void bch2_latency_acct(struct bch_dev *, u64, int); -+#else -+static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {} -+#endif -+ -+void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *, -+ enum bch_data_type, const struct bkey_i *, bool); -+ -+#define BCH_WRITE_FLAGS() \ -+ x(ALLOC_NOWAIT) \ -+ x(CACHED) \ -+ x(DATA_ENCODED) \ -+ x(PAGES_STABLE) \ -+ x(PAGES_OWNED) \ -+ x(ONLY_SPECIFIED_DEVS) \ -+ x(WROTE_DATA_INLINE) \ -+ x(FROM_INTERNAL) \ -+ x(CHECK_ENOSPC) \ -+ x(SYNC) \ -+ x(MOVE) \ -+ x(IN_WORKER) \ -+ x(DONE) \ -+ x(IO_ERROR) \ -+ x(CONVERT_UNWRITTEN) -+ -+enum __bch_write_flags { -+#define x(f) __BCH_WRITE_##f, -+ BCH_WRITE_FLAGS() -+#undef x -+}; -+ -+enum bch_write_flags { -+#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f), -+ BCH_WRITE_FLAGS() -+#undef x -+}; -+ -+static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op) -+{ -+ return op->watermark == BCH_WATERMARK_copygc -+ ? op->c->copygc_wq -+ : op->c->btree_update_wq; -+} -+ -+int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *, -+ struct bkey_i *, bool *, s64 *, s64 *); -+int bch2_extent_update(struct btree_trans *, subvol_inum, -+ struct btree_iter *, struct bkey_i *, -+ struct disk_reservation *, u64, s64 *, bool); -+ -+static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c, -+ struct bch_io_opts opts) -+{ -+ op->c = c; -+ op->end_io = NULL; -+ op->flags = 0; -+ op->written = 0; -+ op->error = 0; -+ op->csum_type = bch2_data_checksum_type(c, opts); -+ op->compression_opt = opts.compression; -+ op->nr_replicas = 0; -+ op->nr_replicas_required = c->opts.data_replicas_required; -+ op->watermark = BCH_WATERMARK_normal; -+ op->incompressible = 0; -+ op->open_buckets.nr = 0; -+ op->devs_have.nr = 0; -+ op->target = 0; -+ op->opts = opts; -+ op->subvol = 0; -+ op->pos = POS_MAX; -+ op->version = ZERO_VERSION; -+ op->write_point = (struct write_point_specifier) { 0 }; -+ op->res = (struct disk_reservation) { 0 }; -+ op->new_i_size = U64_MAX; -+ op->i_sectors_delta = 0; -+ op->devs_need_flush = NULL; -+} -+ -+void bch2_write(struct closure *); -+ -+void bch2_write_point_do_index_updates(struct work_struct *); -+ -+static inline struct bch_write_bio *wbio_init(struct bio *bio) -+{ -+ struct bch_write_bio *wbio = to_wbio(bio); -+ -+ memset(&wbio->wbio, 0, sizeof(wbio->wbio)); -+ return wbio; -+} -+ -+void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *); -+ -+void bch2_fs_io_write_exit(struct bch_fs *); -+int bch2_fs_io_write_init(struct bch_fs *); -+ -+#endif /* _BCACHEFS_IO_WRITE_H */ -diff --git a/fs/bcachefs/io_write_types.h b/fs/bcachefs/io_write_types.h -new file mode 100644 -index 000000000000..c7f97c2c4805 ---- /dev/null -+++ b/fs/bcachefs/io_write_types.h -@@ -0,0 +1,96 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_IO_WRITE_TYPES_H -+#define _BCACHEFS_IO_WRITE_TYPES_H -+ -+#include "alloc_types.h" -+#include "btree_types.h" -+#include "buckets_types.h" -+#include "extents_types.h" -+#include "keylist_types.h" -+#include "opts.h" -+#include "super_types.h" -+ -+#include -+#include -+ -+struct bch_write_bio { -+ struct_group(wbio, -+ struct bch_fs *c; -+ struct bch_write_bio *parent; -+ -+ u64 submit_time; -+ u64 inode_offset; -+ -+ struct bch_devs_list failed; -+ u8 dev; -+ -+ unsigned split:1, -+ bounce:1, -+ put_bio:1, -+ have_ioref:1, -+ nocow:1, -+ used_mempool:1, -+ first_btree_write:1; -+ ); -+ -+ struct bio bio; -+}; -+ -+struct bch_write_op { -+ struct closure cl; -+ struct bch_fs *c; -+ void (*end_io)(struct bch_write_op *); -+ u64 start_time; -+ -+ unsigned written; /* sectors */ -+ u16 flags; -+ s16 error; /* dio write path expects it to hold -ERESTARTSYS... */ -+ -+ unsigned compression_opt:8; -+ unsigned csum_type:4; -+ unsigned nr_replicas:4; -+ unsigned nr_replicas_required:4; -+ unsigned watermark:3; -+ unsigned incompressible:1; -+ unsigned stripe_waited:1; -+ -+ struct bch_devs_list devs_have; -+ u16 target; -+ u16 nonce; -+ struct bch_io_opts opts; -+ -+ u32 subvol; -+ struct bpos pos; -+ struct bversion version; -+ -+ /* For BCH_WRITE_DATA_ENCODED: */ -+ struct bch_extent_crc_unpacked crc; -+ -+ struct write_point_specifier write_point; -+ -+ struct write_point *wp; -+ struct list_head wp_list; -+ -+ struct disk_reservation res; -+ -+ struct open_buckets open_buckets; -+ -+ u64 new_i_size; -+ s64 i_sectors_delta; -+ -+ struct bch_devs_mask failed; -+ -+ struct keylist insert_keys; -+ u64 inline_keys[BKEY_EXTENT_U64s_MAX * 2]; -+ -+ /* -+ * Bitmask of devices that have had nocow writes issued to them since -+ * last flush: -+ */ -+ struct bch_devs_mask *devs_need_flush; -+ -+ /* Must be last: */ -+ struct bch_write_bio wbio; -+}; -+ -+#endif /* _BCACHEFS_IO_WRITE_TYPES_H */ -diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c -new file mode 100644 -index 000000000000..5b5d69f2316b ---- /dev/null -+++ b/fs/bcachefs/journal.c -@@ -0,0 +1,1468 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * bcachefs journalling code, for btree insertions -+ * -+ * Copyright 2012 Google, Inc. -+ */ -+ -+#include "bcachefs.h" -+#include "alloc_foreground.h" -+#include "bkey_methods.h" -+#include "btree_gc.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "error.h" -+#include "journal.h" -+#include "journal_io.h" -+#include "journal_reclaim.h" -+#include "journal_sb.h" -+#include "journal_seq_blacklist.h" -+#include "trace.h" -+ -+static const char * const bch2_journal_errors[] = { -+#define x(n) #n, -+ JOURNAL_ERRORS() -+#undef x -+ NULL -+}; -+ -+static inline bool journal_seq_unwritten(struct journal *j, u64 seq) -+{ -+ return seq > j->seq_ondisk; -+} -+ -+static bool __journal_entry_is_open(union journal_res_state state) -+{ -+ return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL; -+} -+ -+static inline unsigned nr_unwritten_journal_entries(struct journal *j) -+{ -+ return atomic64_read(&j->seq) - j->seq_ondisk; -+} -+ -+static bool journal_entry_is_open(struct journal *j) -+{ -+ return __journal_entry_is_open(j->reservations); -+} -+ -+static inline struct journal_buf * -+journal_seq_to_buf(struct journal *j, u64 seq) -+{ -+ struct journal_buf *buf = NULL; -+ -+ EBUG_ON(seq > journal_cur_seq(j)); -+ -+ if (journal_seq_unwritten(j, seq)) { -+ buf = j->buf + (seq & JOURNAL_BUF_MASK); -+ EBUG_ON(le64_to_cpu(buf->data->seq) != seq); -+ } -+ return buf; -+} -+ -+static void journal_pin_list_init(struct journal_entry_pin_list *p, int count) -+{ -+ unsigned i; -+ -+ for (i = 0; i < ARRAY_SIZE(p->list); i++) -+ INIT_LIST_HEAD(&p->list[i]); -+ INIT_LIST_HEAD(&p->flushed); -+ atomic_set(&p->count, count); -+ p->devs.nr = 0; -+} -+ -+/* -+ * Detect stuck journal conditions and trigger shutdown. Technically the journal -+ * can end up stuck for a variety of reasons, such as a blocked I/O, journal -+ * reservation lockup, etc. Since this is a fatal error with potentially -+ * unpredictable characteristics, we want to be fairly conservative before we -+ * decide to shut things down. -+ * -+ * Consider the journal stuck when it appears full with no ability to commit -+ * btree transactions, to discard journal buckets, nor acquire priority -+ * (reserved watermark) reservation. -+ */ -+static inline bool -+journal_error_check_stuck(struct journal *j, int error, unsigned flags) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ bool stuck = false; -+ struct printbuf buf = PRINTBUF; -+ -+ if (!(error == JOURNAL_ERR_journal_full || -+ error == JOURNAL_ERR_journal_pin_full) || -+ nr_unwritten_journal_entries(j) || -+ (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) -+ return stuck; -+ -+ spin_lock(&j->lock); -+ -+ if (j->can_discard) { -+ spin_unlock(&j->lock); -+ return stuck; -+ } -+ -+ stuck = true; -+ -+ /* -+ * The journal shutdown path will set ->err_seq, but do it here first to -+ * serialize against concurrent failures and avoid duplicate error -+ * reports. -+ */ -+ if (j->err_seq) { -+ spin_unlock(&j->lock); -+ return stuck; -+ } -+ j->err_seq = journal_cur_seq(j); -+ spin_unlock(&j->lock); -+ -+ bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)", -+ bch2_journal_errors[error]); -+ bch2_journal_debug_to_text(&buf, j); -+ bch_err(c, "%s", buf.buf); -+ -+ printbuf_reset(&buf); -+ bch2_journal_pins_to_text(&buf, j); -+ bch_err(c, "Journal pins:\n%s", buf.buf); -+ printbuf_exit(&buf); -+ -+ bch2_fatal_error(c); -+ dump_stack(); -+ -+ return stuck; -+} -+ -+/* -+ * Final processing when the last reference of a journal buffer has been -+ * dropped. Drop the pin list reference acquired at journal entry open and write -+ * the buffer, if requested. -+ */ -+void bch2_journal_buf_put_final(struct journal *j, u64 seq, bool write) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ -+ lockdep_assert_held(&j->lock); -+ -+ if (__bch2_journal_pin_put(j, seq)) -+ bch2_journal_reclaim_fast(j); -+ if (write) -+ closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL); -+} -+ -+/* -+ * Returns true if journal entry is now closed: -+ * -+ * We don't close a journal_buf until the next journal_buf is finished writing, -+ * and can be opened again - this also initializes the next journal_buf: -+ */ -+static void __journal_entry_close(struct journal *j, unsigned closed_val) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct journal_buf *buf = journal_cur_buf(j); -+ union journal_res_state old, new; -+ u64 v = atomic64_read(&j->reservations.counter); -+ unsigned sectors; -+ -+ BUG_ON(closed_val != JOURNAL_ENTRY_CLOSED_VAL && -+ closed_val != JOURNAL_ENTRY_ERROR_VAL); -+ -+ lockdep_assert_held(&j->lock); -+ -+ do { -+ old.v = new.v = v; -+ new.cur_entry_offset = closed_val; -+ -+ if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL || -+ old.cur_entry_offset == new.cur_entry_offset) -+ return; -+ } while ((v = atomic64_cmpxchg(&j->reservations.counter, -+ old.v, new.v)) != old.v); -+ -+ if (!__journal_entry_is_open(old)) -+ return; -+ -+ /* Close out old buffer: */ -+ buf->data->u64s = cpu_to_le32(old.cur_entry_offset); -+ -+ sectors = vstruct_blocks_plus(buf->data, c->block_bits, -+ buf->u64s_reserved) << c->block_bits; -+ BUG_ON(sectors > buf->sectors); -+ buf->sectors = sectors; -+ -+ /* -+ * We have to set last_seq here, _before_ opening a new journal entry: -+ * -+ * A threads may replace an old pin with a new pin on their current -+ * journal reservation - the expectation being that the journal will -+ * contain either what the old pin protected or what the new pin -+ * protects. -+ * -+ * After the old pin is dropped journal_last_seq() won't include the old -+ * pin, so we can only write the updated last_seq on the entry that -+ * contains whatever the new pin protects. -+ * -+ * Restated, we can _not_ update last_seq for a given entry if there -+ * could be a newer entry open with reservations/pins that have been -+ * taken against it. -+ * -+ * Hence, we want update/set last_seq on the current journal entry right -+ * before we open a new one: -+ */ -+ buf->last_seq = journal_last_seq(j); -+ buf->data->last_seq = cpu_to_le64(buf->last_seq); -+ BUG_ON(buf->last_seq > le64_to_cpu(buf->data->seq)); -+ -+ cancel_delayed_work(&j->write_work); -+ -+ bch2_journal_space_available(j); -+ -+ __bch2_journal_buf_put(j, old.idx, le64_to_cpu(buf->data->seq)); -+} -+ -+void bch2_journal_halt(struct journal *j) -+{ -+ spin_lock(&j->lock); -+ __journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL); -+ if (!j->err_seq) -+ j->err_seq = journal_cur_seq(j); -+ journal_wake(j); -+ spin_unlock(&j->lock); -+} -+ -+static bool journal_entry_want_write(struct journal *j) -+{ -+ bool ret = !journal_entry_is_open(j) || -+ journal_cur_seq(j) == journal_last_unwritten_seq(j); -+ -+ /* Don't close it yet if we already have a write in flight: */ -+ if (ret) -+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); -+ else if (nr_unwritten_journal_entries(j)) { -+ struct journal_buf *buf = journal_cur_buf(j); -+ -+ if (!buf->flush_time) { -+ buf->flush_time = local_clock() ?: 1; -+ buf->expires = jiffies; -+ } -+ } -+ -+ return ret; -+} -+ -+static bool journal_entry_close(struct journal *j) -+{ -+ bool ret; -+ -+ spin_lock(&j->lock); -+ ret = journal_entry_want_write(j); -+ spin_unlock(&j->lock); -+ -+ return ret; -+} -+ -+/* -+ * should _only_ called from journal_res_get() - when we actually want a -+ * journal reservation - journal entry is open means journal is dirty: -+ */ -+static int journal_entry_open(struct journal *j) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct journal_buf *buf = j->buf + -+ ((journal_cur_seq(j) + 1) & JOURNAL_BUF_MASK); -+ union journal_res_state old, new; -+ int u64s; -+ u64 v; -+ -+ lockdep_assert_held(&j->lock); -+ BUG_ON(journal_entry_is_open(j)); -+ BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); -+ -+ if (j->blocked) -+ return JOURNAL_ERR_blocked; -+ -+ if (j->cur_entry_error) -+ return j->cur_entry_error; -+ -+ if (bch2_journal_error(j)) -+ return JOURNAL_ERR_insufficient_devices; /* -EROFS */ -+ -+ if (!fifo_free(&j->pin)) -+ return JOURNAL_ERR_journal_pin_full; -+ -+ if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf)) -+ return JOURNAL_ERR_max_in_flight; -+ -+ BUG_ON(!j->cur_entry_sectors); -+ -+ buf->expires = -+ (journal_cur_seq(j) == j->flushed_seq_ondisk -+ ? jiffies -+ : j->last_flush_write) + -+ msecs_to_jiffies(c->opts.journal_flush_delay); -+ -+ buf->u64s_reserved = j->entry_u64s_reserved; -+ buf->disk_sectors = j->cur_entry_sectors; -+ buf->sectors = min(buf->disk_sectors, buf->buf_size >> 9); -+ -+ u64s = (int) (buf->sectors << 9) / sizeof(u64) - -+ journal_entry_overhead(j); -+ u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1); -+ -+ if (u64s <= (ssize_t) j->early_journal_entries.nr) -+ return JOURNAL_ERR_journal_full; -+ -+ if (fifo_empty(&j->pin) && j->reclaim_thread) -+ wake_up_process(j->reclaim_thread); -+ -+ /* -+ * The fifo_push() needs to happen at the same time as j->seq is -+ * incremented for journal_last_seq() to be calculated correctly -+ */ -+ atomic64_inc(&j->seq); -+ journal_pin_list_init(fifo_push_ref(&j->pin), 1); -+ -+ BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf); -+ -+ bkey_extent_init(&buf->key); -+ buf->noflush = false; -+ buf->must_flush = false; -+ buf->separate_flush = false; -+ buf->flush_time = 0; -+ -+ memset(buf->data, 0, sizeof(*buf->data)); -+ buf->data->seq = cpu_to_le64(journal_cur_seq(j)); -+ buf->data->u64s = 0; -+ -+ if (j->early_journal_entries.nr) { -+ memcpy(buf->data->_data, j->early_journal_entries.data, -+ j->early_journal_entries.nr * sizeof(u64)); -+ le32_add_cpu(&buf->data->u64s, j->early_journal_entries.nr); -+ } -+ -+ /* -+ * Must be set before marking the journal entry as open: -+ */ -+ j->cur_entry_u64s = u64s; -+ -+ v = atomic64_read(&j->reservations.counter); -+ do { -+ old.v = new.v = v; -+ -+ BUG_ON(old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL); -+ -+ new.idx++; -+ BUG_ON(journal_state_count(new, new.idx)); -+ BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_BUF_MASK)); -+ -+ journal_state_inc(&new); -+ -+ /* Handle any already added entries */ -+ new.cur_entry_offset = le32_to_cpu(buf->data->u64s); -+ } while ((v = atomic64_cmpxchg(&j->reservations.counter, -+ old.v, new.v)) != old.v); -+ -+ if (j->res_get_blocked_start) -+ bch2_time_stats_update(j->blocked_time, -+ j->res_get_blocked_start); -+ j->res_get_blocked_start = 0; -+ -+ mod_delayed_work(c->io_complete_wq, -+ &j->write_work, -+ msecs_to_jiffies(c->opts.journal_flush_delay)); -+ journal_wake(j); -+ -+ if (j->early_journal_entries.nr) -+ darray_exit(&j->early_journal_entries); -+ return 0; -+} -+ -+static bool journal_quiesced(struct journal *j) -+{ -+ bool ret = atomic64_read(&j->seq) == j->seq_ondisk; -+ -+ if (!ret) -+ journal_entry_close(j); -+ return ret; -+} -+ -+static void journal_quiesce(struct journal *j) -+{ -+ wait_event(j->wait, journal_quiesced(j)); -+} -+ -+static void journal_write_work(struct work_struct *work) -+{ -+ struct journal *j = container_of(work, struct journal, write_work.work); -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ long delta; -+ -+ spin_lock(&j->lock); -+ if (!__journal_entry_is_open(j->reservations)) -+ goto unlock; -+ -+ delta = journal_cur_buf(j)->expires - jiffies; -+ -+ if (delta > 0) -+ mod_delayed_work(c->io_complete_wq, &j->write_work, delta); -+ else -+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); -+unlock: -+ spin_unlock(&j->lock); -+} -+ -+static int __journal_res_get(struct journal *j, struct journal_res *res, -+ unsigned flags) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct journal_buf *buf; -+ bool can_discard; -+ int ret; -+retry: -+ if (journal_res_get_fast(j, res, flags)) -+ return 0; -+ -+ if (bch2_journal_error(j)) -+ return -BCH_ERR_erofs_journal_err; -+ -+ spin_lock(&j->lock); -+ -+ /* check once more in case somebody else shut things down... */ -+ if (bch2_journal_error(j)) { -+ spin_unlock(&j->lock); -+ return -BCH_ERR_erofs_journal_err; -+ } -+ -+ /* -+ * Recheck after taking the lock, so we don't race with another thread -+ * that just did journal_entry_open() and call journal_entry_close() -+ * unnecessarily -+ */ -+ if (journal_res_get_fast(j, res, flags)) { -+ spin_unlock(&j->lock); -+ return 0; -+ } -+ -+ if ((flags & BCH_WATERMARK_MASK) < j->watermark) { -+ /* -+ * Don't want to close current journal entry, just need to -+ * invoke reclaim: -+ */ -+ ret = JOURNAL_ERR_journal_full; -+ goto unlock; -+ } -+ -+ /* -+ * If we couldn't get a reservation because the current buf filled up, -+ * and we had room for a bigger entry on disk, signal that we want to -+ * realloc the journal bufs: -+ */ -+ buf = journal_cur_buf(j); -+ if (journal_entry_is_open(j) && -+ buf->buf_size >> 9 < buf->disk_sectors && -+ buf->buf_size < JOURNAL_ENTRY_SIZE_MAX) -+ j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1); -+ -+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); -+ ret = journal_entry_open(j); -+ -+ if (ret == JOURNAL_ERR_max_in_flight) -+ trace_and_count(c, journal_entry_full, c); -+unlock: -+ if ((ret && ret != JOURNAL_ERR_insufficient_devices) && -+ !j->res_get_blocked_start) { -+ j->res_get_blocked_start = local_clock() ?: 1; -+ trace_and_count(c, journal_full, c); -+ } -+ -+ can_discard = j->can_discard; -+ spin_unlock(&j->lock); -+ -+ if (!ret) -+ goto retry; -+ if (journal_error_check_stuck(j, ret, flags)) -+ ret = -BCH_ERR_journal_res_get_blocked; -+ -+ /* -+ * Journal is full - can't rely on reclaim from work item due to -+ * freezing: -+ */ -+ if ((ret == JOURNAL_ERR_journal_full || -+ ret == JOURNAL_ERR_journal_pin_full) && -+ !(flags & JOURNAL_RES_GET_NONBLOCK)) { -+ if (can_discard) { -+ bch2_journal_do_discards(j); -+ goto retry; -+ } -+ -+ if (mutex_trylock(&j->reclaim_lock)) { -+ bch2_journal_reclaim(j); -+ mutex_unlock(&j->reclaim_lock); -+ } -+ } -+ -+ return ret == JOURNAL_ERR_insufficient_devices -+ ? -BCH_ERR_erofs_journal_err -+ : -BCH_ERR_journal_res_get_blocked; -+} -+ -+/* -+ * Essentially the entry function to the journaling code. When bcachefs is doing -+ * a btree insert, it calls this function to get the current journal write. -+ * Journal write is the structure used set up journal writes. The calling -+ * function will then add its keys to the structure, queuing them for the next -+ * write. -+ * -+ * To ensure forward progress, the current task must not be holding any -+ * btree node write locks. -+ */ -+int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, -+ unsigned flags) -+{ -+ int ret; -+ -+ closure_wait_event(&j->async_wait, -+ (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked || -+ (flags & JOURNAL_RES_GET_NONBLOCK)); -+ return ret; -+} -+ -+/* journal_preres: */ -+ -+static bool journal_preres_available(struct journal *j, -+ struct journal_preres *res, -+ unsigned new_u64s, -+ unsigned flags) -+{ -+ bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags, true); -+ -+ if (!ret && mutex_trylock(&j->reclaim_lock)) { -+ bch2_journal_reclaim(j); -+ mutex_unlock(&j->reclaim_lock); -+ } -+ -+ return ret; -+} -+ -+int __bch2_journal_preres_get(struct journal *j, -+ struct journal_preres *res, -+ unsigned new_u64s, -+ unsigned flags) -+{ -+ int ret; -+ -+ closure_wait_event(&j->preres_wait, -+ (ret = bch2_journal_error(j)) || -+ journal_preres_available(j, res, new_u64s, flags)); -+ return ret; -+} -+ -+/* journal_entry_res: */ -+ -+void bch2_journal_entry_res_resize(struct journal *j, -+ struct journal_entry_res *res, -+ unsigned new_u64s) -+{ -+ union journal_res_state state; -+ int d = new_u64s - res->u64s; -+ -+ spin_lock(&j->lock); -+ -+ j->entry_u64s_reserved += d; -+ if (d <= 0) -+ goto out; -+ -+ j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d); -+ smp_mb(); -+ state = READ_ONCE(j->reservations); -+ -+ if (state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL && -+ state.cur_entry_offset > j->cur_entry_u64s) { -+ j->cur_entry_u64s += d; -+ /* -+ * Not enough room in current journal entry, have to flush it: -+ */ -+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); -+ } else { -+ journal_cur_buf(j)->u64s_reserved += d; -+ } -+out: -+ spin_unlock(&j->lock); -+ res->u64s += d; -+} -+ -+/* journal flushing: */ -+ -+/** -+ * bch2_journal_flush_seq_async - wait for a journal entry to be written -+ * @j: journal object -+ * @seq: seq to flush -+ * @parent: closure object to wait with -+ * Returns: 1 if @seq has already been flushed, 0 if @seq is being flushed, -+ * -EIO if @seq will never be flushed -+ * -+ * Like bch2_journal_wait_on_seq, except that it triggers a write immediately if -+ * necessary -+ */ -+int bch2_journal_flush_seq_async(struct journal *j, u64 seq, -+ struct closure *parent) -+{ -+ struct journal_buf *buf; -+ int ret = 0; -+ -+ if (seq <= j->flushed_seq_ondisk) -+ return 1; -+ -+ spin_lock(&j->lock); -+ -+ if (WARN_ONCE(seq > journal_cur_seq(j), -+ "requested to flush journal seq %llu, but currently at %llu", -+ seq, journal_cur_seq(j))) -+ goto out; -+ -+ /* Recheck under lock: */ -+ if (j->err_seq && seq >= j->err_seq) { -+ ret = -EIO; -+ goto out; -+ } -+ -+ if (seq <= j->flushed_seq_ondisk) { -+ ret = 1; -+ goto out; -+ } -+ -+ /* if seq was written, but not flushed - flush a newer one instead */ -+ seq = max(seq, journal_last_unwritten_seq(j)); -+ -+recheck_need_open: -+ if (seq > journal_cur_seq(j)) { -+ struct journal_res res = { 0 }; -+ -+ if (journal_entry_is_open(j)) -+ __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); -+ -+ spin_unlock(&j->lock); -+ -+ ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); -+ if (ret) -+ return ret; -+ -+ seq = res.seq; -+ buf = j->buf + (seq & JOURNAL_BUF_MASK); -+ buf->must_flush = true; -+ -+ if (!buf->flush_time) { -+ buf->flush_time = local_clock() ?: 1; -+ buf->expires = jiffies; -+ } -+ -+ if (parent && !closure_wait(&buf->wait, parent)) -+ BUG(); -+ -+ bch2_journal_res_put(j, &res); -+ -+ spin_lock(&j->lock); -+ goto want_write; -+ } -+ -+ /* -+ * if write was kicked off without a flush, flush the next sequence -+ * number instead -+ */ -+ buf = journal_seq_to_buf(j, seq); -+ if (buf->noflush) { -+ seq++; -+ goto recheck_need_open; -+ } -+ -+ buf->must_flush = true; -+ -+ if (parent && !closure_wait(&buf->wait, parent)) -+ BUG(); -+want_write: -+ if (seq == journal_cur_seq(j)) -+ journal_entry_want_write(j); -+out: -+ spin_unlock(&j->lock); -+ return ret; -+} -+ -+int bch2_journal_flush_seq(struct journal *j, u64 seq) -+{ -+ u64 start_time = local_clock(); -+ int ret, ret2; -+ -+ /* -+ * Don't update time_stats when @seq is already flushed: -+ */ -+ if (seq <= j->flushed_seq_ondisk) -+ return 0; -+ -+ ret = wait_event_interruptible(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL))); -+ -+ if (!ret) -+ bch2_time_stats_update(j->flush_seq_time, start_time); -+ -+ return ret ?: ret2 < 0 ? ret2 : 0; -+} -+ -+/* -+ * bch2_journal_flush_async - if there is an open journal entry, or a journal -+ * still being written, write it and wait for the write to complete -+ */ -+void bch2_journal_flush_async(struct journal *j, struct closure *parent) -+{ -+ bch2_journal_flush_seq_async(j, atomic64_read(&j->seq), parent); -+} -+ -+int bch2_journal_flush(struct journal *j) -+{ -+ return bch2_journal_flush_seq(j, atomic64_read(&j->seq)); -+} -+ -+/* -+ * bch2_journal_noflush_seq - tell the journal not to issue any flushes before -+ * @seq -+ */ -+bool bch2_journal_noflush_seq(struct journal *j, u64 seq) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ u64 unwritten_seq; -+ bool ret = false; -+ -+ if (!(c->sb.features & (1ULL << BCH_FEATURE_journal_no_flush))) -+ return false; -+ -+ if (seq <= c->journal.flushed_seq_ondisk) -+ return false; -+ -+ spin_lock(&j->lock); -+ if (seq <= c->journal.flushed_seq_ondisk) -+ goto out; -+ -+ for (unwritten_seq = journal_last_unwritten_seq(j); -+ unwritten_seq < seq; -+ unwritten_seq++) { -+ struct journal_buf *buf = journal_seq_to_buf(j, unwritten_seq); -+ -+ /* journal write is already in flight, and was a flush write: */ -+ if (unwritten_seq == journal_last_unwritten_seq(j) && !buf->noflush) -+ goto out; -+ -+ buf->noflush = true; -+ } -+ -+ ret = true; -+out: -+ spin_unlock(&j->lock); -+ return ret; -+} -+ -+int bch2_journal_meta(struct journal *j) -+{ -+ struct journal_buf *buf; -+ struct journal_res res; -+ int ret; -+ -+ memset(&res, 0, sizeof(res)); -+ -+ ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); -+ if (ret) -+ return ret; -+ -+ buf = j->buf + (res.seq & JOURNAL_BUF_MASK); -+ buf->must_flush = true; -+ -+ if (!buf->flush_time) { -+ buf->flush_time = local_clock() ?: 1; -+ buf->expires = jiffies; -+ } -+ -+ bch2_journal_res_put(j, &res); -+ -+ return bch2_journal_flush_seq(j, res.seq); -+} -+ -+/* block/unlock the journal: */ -+ -+void bch2_journal_unblock(struct journal *j) -+{ -+ spin_lock(&j->lock); -+ j->blocked--; -+ spin_unlock(&j->lock); -+ -+ journal_wake(j); -+} -+ -+void bch2_journal_block(struct journal *j) -+{ -+ spin_lock(&j->lock); -+ j->blocked++; -+ spin_unlock(&j->lock); -+ -+ journal_quiesce(j); -+} -+ -+/* allocate journal on a device: */ -+ -+static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, -+ bool new_fs, struct closure *cl) -+{ -+ struct bch_fs *c = ca->fs; -+ struct journal_device *ja = &ca->journal; -+ u64 *new_bucket_seq = NULL, *new_buckets = NULL; -+ struct open_bucket **ob = NULL; -+ long *bu = NULL; -+ unsigned i, pos, nr_got = 0, nr_want = nr - ja->nr; -+ int ret = 0; -+ -+ BUG_ON(nr <= ja->nr); -+ -+ bu = kcalloc(nr_want, sizeof(*bu), GFP_KERNEL); -+ ob = kcalloc(nr_want, sizeof(*ob), GFP_KERNEL); -+ new_buckets = kcalloc(nr, sizeof(u64), GFP_KERNEL); -+ new_bucket_seq = kcalloc(nr, sizeof(u64), GFP_KERNEL); -+ if (!bu || !ob || !new_buckets || !new_bucket_seq) { -+ ret = -BCH_ERR_ENOMEM_set_nr_journal_buckets; -+ goto err_free; -+ } -+ -+ for (nr_got = 0; nr_got < nr_want; nr_got++) { -+ if (new_fs) { -+ bu[nr_got] = bch2_bucket_alloc_new_fs(ca); -+ if (bu[nr_got] < 0) { -+ ret = -BCH_ERR_ENOSPC_bucket_alloc; -+ break; -+ } -+ } else { -+ ob[nr_got] = bch2_bucket_alloc(c, ca, BCH_WATERMARK_normal, cl); -+ ret = PTR_ERR_OR_ZERO(ob[nr_got]); -+ if (ret) -+ break; -+ -+ ret = bch2_trans_run(c, -+ bch2_trans_mark_metadata_bucket(trans, ca, -+ ob[nr_got]->bucket, BCH_DATA_journal, -+ ca->mi.bucket_size)); -+ if (ret) { -+ bch2_open_bucket_put(c, ob[nr_got]); -+ bch_err_msg(c, ret, "marking new journal buckets"); -+ break; -+ } -+ -+ bu[nr_got] = ob[nr_got]->bucket; -+ } -+ } -+ -+ if (!nr_got) -+ goto err_free; -+ -+ /* Don't return an error if we successfully allocated some buckets: */ -+ ret = 0; -+ -+ if (c) { -+ bch2_journal_flush_all_pins(&c->journal); -+ bch2_journal_block(&c->journal); -+ mutex_lock(&c->sb_lock); -+ } -+ -+ memcpy(new_buckets, ja->buckets, ja->nr * sizeof(u64)); -+ memcpy(new_bucket_seq, ja->bucket_seq, ja->nr * sizeof(u64)); -+ -+ BUG_ON(ja->discard_idx > ja->nr); -+ -+ pos = ja->discard_idx ?: ja->nr; -+ -+ memmove(new_buckets + pos + nr_got, -+ new_buckets + pos, -+ sizeof(new_buckets[0]) * (ja->nr - pos)); -+ memmove(new_bucket_seq + pos + nr_got, -+ new_bucket_seq + pos, -+ sizeof(new_bucket_seq[0]) * (ja->nr - pos)); -+ -+ for (i = 0; i < nr_got; i++) { -+ new_buckets[pos + i] = bu[i]; -+ new_bucket_seq[pos + i] = 0; -+ } -+ -+ nr = ja->nr + nr_got; -+ -+ ret = bch2_journal_buckets_to_sb(c, ca, new_buckets, nr); -+ if (ret) -+ goto err_unblock; -+ -+ if (!new_fs) -+ bch2_write_super(c); -+ -+ /* Commit: */ -+ if (c) -+ spin_lock(&c->journal.lock); -+ -+ swap(new_buckets, ja->buckets); -+ swap(new_bucket_seq, ja->bucket_seq); -+ ja->nr = nr; -+ -+ if (pos <= ja->discard_idx) -+ ja->discard_idx = (ja->discard_idx + nr_got) % ja->nr; -+ if (pos <= ja->dirty_idx_ondisk) -+ ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + nr_got) % ja->nr; -+ if (pos <= ja->dirty_idx) -+ ja->dirty_idx = (ja->dirty_idx + nr_got) % ja->nr; -+ if (pos <= ja->cur_idx) -+ ja->cur_idx = (ja->cur_idx + nr_got) % ja->nr; -+ -+ if (c) -+ spin_unlock(&c->journal.lock); -+err_unblock: -+ if (c) { -+ bch2_journal_unblock(&c->journal); -+ mutex_unlock(&c->sb_lock); -+ } -+ -+ if (ret && !new_fs) -+ for (i = 0; i < nr_got; i++) -+ bch2_trans_run(c, -+ bch2_trans_mark_metadata_bucket(trans, ca, -+ bu[i], BCH_DATA_free, 0)); -+err_free: -+ if (!new_fs) -+ for (i = 0; i < nr_got; i++) -+ bch2_open_bucket_put(c, ob[i]); -+ -+ kfree(new_bucket_seq); -+ kfree(new_buckets); -+ kfree(ob); -+ kfree(bu); -+ return ret; -+} -+ -+/* -+ * Allocate more journal space at runtime - not currently making use if it, but -+ * the code works: -+ */ -+int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, -+ unsigned nr) -+{ -+ struct journal_device *ja = &ca->journal; -+ struct closure cl; -+ int ret = 0; -+ -+ closure_init_stack(&cl); -+ -+ down_write(&c->state_lock); -+ -+ /* don't handle reducing nr of buckets yet: */ -+ if (nr < ja->nr) -+ goto unlock; -+ -+ while (ja->nr < nr) { -+ struct disk_reservation disk_res = { 0, 0, 0 }; -+ -+ /* -+ * note: journal buckets aren't really counted as _sectors_ used yet, so -+ * we don't need the disk reservation to avoid the BUG_ON() in buckets.c -+ * when space used goes up without a reservation - but we do need the -+ * reservation to ensure we'll actually be able to allocate: -+ * -+ * XXX: that's not right, disk reservations only ensure a -+ * filesystem-wide allocation will succeed, this is a device -+ * specific allocation - we can hang here: -+ */ -+ -+ ret = bch2_disk_reservation_get(c, &disk_res, -+ bucket_to_sector(ca, nr - ja->nr), 1, 0); -+ if (ret) -+ break; -+ -+ ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl); -+ -+ bch2_disk_reservation_put(c, &disk_res); -+ -+ closure_sync(&cl); -+ -+ if (ret && ret != -BCH_ERR_bucket_alloc_blocked) -+ break; -+ } -+ -+ if (ret) -+ bch_err_fn(c, ret); -+unlock: -+ up_write(&c->state_lock); -+ return ret; -+} -+ -+int bch2_dev_journal_alloc(struct bch_dev *ca) -+{ -+ unsigned nr; -+ int ret; -+ -+ if (dynamic_fault("bcachefs:add:journal_alloc")) { -+ ret = -BCH_ERR_ENOMEM_set_nr_journal_buckets; -+ goto err; -+ } -+ -+ /* 1/128th of the device by default: */ -+ nr = ca->mi.nbuckets >> 7; -+ -+ /* -+ * clamp journal size to 8192 buckets or 8GB (in sectors), whichever -+ * is smaller: -+ */ -+ nr = clamp_t(unsigned, nr, -+ BCH_JOURNAL_BUCKETS_MIN, -+ min(1 << 13, -+ (1 << 24) / ca->mi.bucket_size)); -+ -+ ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL); -+err: -+ if (ret) -+ bch_err_fn(ca, ret); -+ return ret; -+} -+ -+int bch2_fs_journal_alloc(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ unsigned i; -+ -+ for_each_online_member(ca, c, i) { -+ if (ca->journal.nr) -+ continue; -+ -+ int ret = bch2_dev_journal_alloc(ca); -+ if (ret) { -+ percpu_ref_put(&ca->io_ref); -+ return ret; -+ } -+ } -+ -+ return 0; -+} -+ -+/* startup/shutdown: */ -+ -+static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx) -+{ -+ bool ret = false; -+ u64 seq; -+ -+ spin_lock(&j->lock); -+ for (seq = journal_last_unwritten_seq(j); -+ seq <= journal_cur_seq(j) && !ret; -+ seq++) { -+ struct journal_buf *buf = journal_seq_to_buf(j, seq); -+ -+ if (bch2_bkey_has_device_c(bkey_i_to_s_c(&buf->key), dev_idx)) -+ ret = true; -+ } -+ spin_unlock(&j->lock); -+ -+ return ret; -+} -+ -+void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca) -+{ -+ wait_event(j->wait, !bch2_journal_writing_to_device(j, ca->dev_idx)); -+} -+ -+void bch2_fs_journal_stop(struct journal *j) -+{ -+ bch2_journal_reclaim_stop(j); -+ bch2_journal_flush_all_pins(j); -+ -+ wait_event(j->wait, journal_entry_close(j)); -+ -+ /* -+ * Always write a new journal entry, to make sure the clock hands are up -+ * to date (and match the superblock) -+ */ -+ bch2_journal_meta(j); -+ -+ journal_quiesce(j); -+ -+ BUG_ON(!bch2_journal_error(j) && -+ test_bit(JOURNAL_REPLAY_DONE, &j->flags) && -+ j->last_empty_seq != journal_cur_seq(j)); -+ -+ cancel_delayed_work_sync(&j->write_work); -+} -+ -+int bch2_fs_journal_start(struct journal *j, u64 cur_seq) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct journal_entry_pin_list *p; -+ struct journal_replay *i, **_i; -+ struct genradix_iter iter; -+ bool had_entries = false; -+ unsigned ptr; -+ u64 last_seq = cur_seq, nr, seq; -+ -+ genradix_for_each_reverse(&c->journal_entries, iter, _i) { -+ i = *_i; -+ -+ if (!i || i->ignore) -+ continue; -+ -+ last_seq = le64_to_cpu(i->j.last_seq); -+ break; -+ } -+ -+ nr = cur_seq - last_seq; -+ -+ if (nr + 1 > j->pin.size) { -+ free_fifo(&j->pin); -+ init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL); -+ if (!j->pin.data) { -+ bch_err(c, "error reallocating journal fifo (%llu open entries)", nr); -+ return -BCH_ERR_ENOMEM_journal_pin_fifo; -+ } -+ } -+ -+ j->replay_journal_seq = last_seq; -+ j->replay_journal_seq_end = cur_seq; -+ j->last_seq_ondisk = last_seq; -+ j->flushed_seq_ondisk = cur_seq - 1; -+ j->seq_ondisk = cur_seq - 1; -+ j->pin.front = last_seq; -+ j->pin.back = cur_seq; -+ atomic64_set(&j->seq, cur_seq - 1); -+ -+ fifo_for_each_entry_ptr(p, &j->pin, seq) -+ journal_pin_list_init(p, 1); -+ -+ genradix_for_each(&c->journal_entries, iter, _i) { -+ i = *_i; -+ -+ if (!i || i->ignore) -+ continue; -+ -+ seq = le64_to_cpu(i->j.seq); -+ BUG_ON(seq >= cur_seq); -+ -+ if (seq < last_seq) -+ continue; -+ -+ if (journal_entry_empty(&i->j)) -+ j->last_empty_seq = le64_to_cpu(i->j.seq); -+ -+ p = journal_seq_pin(j, seq); -+ -+ p->devs.nr = 0; -+ for (ptr = 0; ptr < i->nr_ptrs; ptr++) -+ bch2_dev_list_add_dev(&p->devs, i->ptrs[ptr].dev); -+ -+ had_entries = true; -+ } -+ -+ if (!had_entries) -+ j->last_empty_seq = cur_seq; -+ -+ spin_lock(&j->lock); -+ -+ set_bit(JOURNAL_STARTED, &j->flags); -+ j->last_flush_write = jiffies; -+ -+ j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j); -+ j->reservations.unwritten_idx++; -+ -+ c->last_bucket_seq_cleanup = journal_cur_seq(j); -+ -+ bch2_journal_space_available(j); -+ spin_unlock(&j->lock); -+ -+ return bch2_journal_reclaim_start(j); -+} -+ -+/* init/exit: */ -+ -+void bch2_dev_journal_exit(struct bch_dev *ca) -+{ -+ kfree(ca->journal.bio); -+ kfree(ca->journal.buckets); -+ kfree(ca->journal.bucket_seq); -+ -+ ca->journal.bio = NULL; -+ ca->journal.buckets = NULL; -+ ca->journal.bucket_seq = NULL; -+} -+ -+int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) -+{ -+ struct journal_device *ja = &ca->journal; -+ struct bch_sb_field_journal *journal_buckets = -+ bch2_sb_field_get(sb, journal); -+ struct bch_sb_field_journal_v2 *journal_buckets_v2 = -+ bch2_sb_field_get(sb, journal_v2); -+ unsigned i, nr_bvecs; -+ -+ ja->nr = 0; -+ -+ if (journal_buckets_v2) { -+ unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2); -+ -+ for (i = 0; i < nr; i++) -+ ja->nr += le64_to_cpu(journal_buckets_v2->d[i].nr); -+ } else if (journal_buckets) { -+ ja->nr = bch2_nr_journal_buckets(journal_buckets); -+ } -+ -+ ja->bucket_seq = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL); -+ if (!ja->bucket_seq) -+ return -BCH_ERR_ENOMEM_dev_journal_init; -+ -+ nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE); -+ -+ ca->journal.bio = bio_kmalloc(nr_bvecs, GFP_KERNEL); -+ if (!ca->journal.bio) -+ return -BCH_ERR_ENOMEM_dev_journal_init; -+ -+ bio_init(ca->journal.bio, NULL, ca->journal.bio->bi_inline_vecs, nr_bvecs, 0); -+ -+ ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL); -+ if (!ja->buckets) -+ return -BCH_ERR_ENOMEM_dev_journal_init; -+ -+ if (journal_buckets_v2) { -+ unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2); -+ unsigned j, dst = 0; -+ -+ for (i = 0; i < nr; i++) -+ for (j = 0; j < le64_to_cpu(journal_buckets_v2->d[i].nr); j++) -+ ja->buckets[dst++] = -+ le64_to_cpu(journal_buckets_v2->d[i].start) + j; -+ } else if (journal_buckets) { -+ for (i = 0; i < ja->nr; i++) -+ ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]); -+ } -+ -+ return 0; -+} -+ -+void bch2_fs_journal_exit(struct journal *j) -+{ -+ unsigned i; -+ -+ darray_exit(&j->early_journal_entries); -+ -+ for (i = 0; i < ARRAY_SIZE(j->buf); i++) -+ kvpfree(j->buf[i].data, j->buf[i].buf_size); -+ free_fifo(&j->pin); -+} -+ -+int bch2_fs_journal_init(struct journal *j) -+{ -+ static struct lock_class_key res_key; -+ unsigned i; -+ -+ spin_lock_init(&j->lock); -+ spin_lock_init(&j->err_lock); -+ init_waitqueue_head(&j->wait); -+ INIT_DELAYED_WORK(&j->write_work, journal_write_work); -+ init_waitqueue_head(&j->reclaim_wait); -+ init_waitqueue_head(&j->pin_flush_wait); -+ mutex_init(&j->reclaim_lock); -+ mutex_init(&j->discard_lock); -+ -+ lockdep_init_map(&j->res_map, "journal res", &res_key, 0); -+ -+ atomic64_set(&j->reservations.counter, -+ ((union journal_res_state) -+ { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); -+ -+ if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL))) -+ return -BCH_ERR_ENOMEM_journal_pin_fifo; -+ -+ for (i = 0; i < ARRAY_SIZE(j->buf); i++) { -+ j->buf[i].buf_size = JOURNAL_ENTRY_SIZE_MIN; -+ j->buf[i].data = kvpmalloc(j->buf[i].buf_size, GFP_KERNEL); -+ if (!j->buf[i].data) -+ return -BCH_ERR_ENOMEM_journal_buf; -+ } -+ -+ j->pin.front = j->pin.back = 1; -+ return 0; -+} -+ -+/* debug: */ -+ -+void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ union journal_res_state s; -+ struct bch_dev *ca; -+ unsigned long now = jiffies; -+ u64 seq; -+ unsigned i; -+ -+ if (!out->nr_tabstops) -+ printbuf_tabstop_push(out, 24); -+ out->atomic++; -+ -+ rcu_read_lock(); -+ s = READ_ONCE(j->reservations); -+ -+ prt_printf(out, "dirty journal entries:\t%llu/%llu\n", fifo_used(&j->pin), j->pin.size); -+ prt_printf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j)); -+ prt_printf(out, "seq_ondisk:\t\t%llu\n", j->seq_ondisk); -+ prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j)); -+ prt_printf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk); -+ prt_printf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk); -+ prt_printf(out, "prereserved:\t\t%u/%u\n", j->prereserved.reserved, j->prereserved.remaining); -+ prt_printf(out, "watermark:\t\t%s\n", bch2_watermarks[j->watermark]); -+ prt_printf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved); -+ prt_printf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes); -+ prt_printf(out, "nr noflush writes:\t%llu\n", j->nr_noflush_writes); -+ prt_printf(out, "nr direct reclaim:\t%llu\n", j->nr_direct_reclaim); -+ prt_printf(out, "nr background reclaim:\t%llu\n", j->nr_background_reclaim); -+ prt_printf(out, "reclaim kicked:\t\t%u\n", j->reclaim_kicked); -+ prt_printf(out, "reclaim runs in:\t%u ms\n", time_after(j->next_reclaim, now) -+ ? jiffies_to_msecs(j->next_reclaim - jiffies) : 0); -+ prt_printf(out, "current entry sectors:\t%u\n", j->cur_entry_sectors); -+ prt_printf(out, "current entry error:\t%s\n", bch2_journal_errors[j->cur_entry_error]); -+ prt_printf(out, "current entry:\t\t"); -+ -+ switch (s.cur_entry_offset) { -+ case JOURNAL_ENTRY_ERROR_VAL: -+ prt_printf(out, "error"); -+ break; -+ case JOURNAL_ENTRY_CLOSED_VAL: -+ prt_printf(out, "closed"); -+ break; -+ default: -+ prt_printf(out, "%u/%u", s.cur_entry_offset, j->cur_entry_u64s); -+ break; -+ } -+ -+ prt_newline(out); -+ -+ for (seq = journal_cur_seq(j); -+ seq >= journal_last_unwritten_seq(j); -+ --seq) { -+ i = seq & JOURNAL_BUF_MASK; -+ -+ prt_printf(out, "unwritten entry:"); -+ prt_tab(out); -+ prt_printf(out, "%llu", seq); -+ prt_newline(out); -+ printbuf_indent_add(out, 2); -+ -+ prt_printf(out, "refcount:"); -+ prt_tab(out); -+ prt_printf(out, "%u", journal_state_count(s, i)); -+ prt_newline(out); -+ -+ prt_printf(out, "sectors:"); -+ prt_tab(out); -+ prt_printf(out, "%u", j->buf[i].sectors); -+ prt_newline(out); -+ -+ prt_printf(out, "expires"); -+ prt_tab(out); -+ prt_printf(out, "%li jiffies", j->buf[i].expires - jiffies); -+ prt_newline(out); -+ -+ printbuf_indent_sub(out, 2); -+ } -+ -+ prt_printf(out, -+ "replay done:\t\t%i\n", -+ test_bit(JOURNAL_REPLAY_DONE, &j->flags)); -+ -+ prt_printf(out, "space:\n"); -+ prt_printf(out, "\tdiscarded\t%u:%u\n", -+ j->space[journal_space_discarded].next_entry, -+ j->space[journal_space_discarded].total); -+ prt_printf(out, "\tclean ondisk\t%u:%u\n", -+ j->space[journal_space_clean_ondisk].next_entry, -+ j->space[journal_space_clean_ondisk].total); -+ prt_printf(out, "\tclean\t\t%u:%u\n", -+ j->space[journal_space_clean].next_entry, -+ j->space[journal_space_clean].total); -+ prt_printf(out, "\ttotal\t\t%u:%u\n", -+ j->space[journal_space_total].next_entry, -+ j->space[journal_space_total].total); -+ -+ for_each_member_device_rcu(ca, c, i, -+ &c->rw_devs[BCH_DATA_journal]) { -+ struct journal_device *ja = &ca->journal; -+ -+ if (!test_bit(ca->dev_idx, c->rw_devs[BCH_DATA_journal].d)) -+ continue; -+ -+ if (!ja->nr) -+ continue; -+ -+ prt_printf(out, "dev %u:\n", i); -+ prt_printf(out, "\tnr\t\t%u\n", ja->nr); -+ prt_printf(out, "\tbucket size\t%u\n", ca->mi.bucket_size); -+ prt_printf(out, "\tavailable\t%u:%u\n", bch2_journal_dev_buckets_available(j, ja, journal_space_discarded), ja->sectors_free); -+ prt_printf(out, "\tdiscard_idx\t%u\n", ja->discard_idx); -+ prt_printf(out, "\tdirty_ondisk\t%u (seq %llu)\n", ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk]); -+ prt_printf(out, "\tdirty_idx\t%u (seq %llu)\n", ja->dirty_idx, ja->bucket_seq[ja->dirty_idx]); -+ prt_printf(out, "\tcur_idx\t\t%u (seq %llu)\n", ja->cur_idx, ja->bucket_seq[ja->cur_idx]); -+ } -+ -+ rcu_read_unlock(); -+ -+ --out->atomic; -+} -+ -+void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) -+{ -+ spin_lock(&j->lock); -+ __bch2_journal_debug_to_text(out, j); -+ spin_unlock(&j->lock); -+} -+ -+bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq) -+{ -+ struct journal_entry_pin_list *pin_list; -+ struct journal_entry_pin *pin; -+ unsigned i; -+ -+ spin_lock(&j->lock); -+ *seq = max(*seq, j->pin.front); -+ -+ if (*seq >= j->pin.back) { -+ spin_unlock(&j->lock); -+ return true; -+ } -+ -+ out->atomic++; -+ -+ pin_list = journal_seq_pin(j, *seq); -+ -+ prt_printf(out, "%llu: count %u", *seq, atomic_read(&pin_list->count)); -+ prt_newline(out); -+ printbuf_indent_add(out, 2); -+ -+ for (i = 0; i < ARRAY_SIZE(pin_list->list); i++) -+ list_for_each_entry(pin, &pin_list->list[i], list) { -+ prt_printf(out, "\t%px %ps", pin, pin->flush); -+ prt_newline(out); -+ } -+ -+ if (!list_empty(&pin_list->flushed)) { -+ prt_printf(out, "flushed:"); -+ prt_newline(out); -+ } -+ -+ list_for_each_entry(pin, &pin_list->flushed, list) { -+ prt_printf(out, "\t%px %ps", pin, pin->flush); -+ prt_newline(out); -+ } -+ -+ printbuf_indent_sub(out, 2); -+ -+ --out->atomic; -+ spin_unlock(&j->lock); -+ -+ return false; -+} -+ -+void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j) -+{ -+ u64 seq = 0; -+ -+ while (!bch2_journal_seq_pins_to_text(out, j, &seq)) -+ seq++; -+} -diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h -new file mode 100644 -index 000000000000..011711e99c8d ---- /dev/null -+++ b/fs/bcachefs/journal.h -@@ -0,0 +1,549 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_JOURNAL_H -+#define _BCACHEFS_JOURNAL_H -+ -+/* -+ * THE JOURNAL: -+ * -+ * The primary purpose of the journal is to log updates (insertions) to the -+ * b-tree, to avoid having to do synchronous updates to the b-tree on disk. -+ * -+ * Without the journal, the b-tree is always internally consistent on -+ * disk - and in fact, in the earliest incarnations bcache didn't have a journal -+ * but did handle unclean shutdowns by doing all index updates synchronously -+ * (with coalescing). -+ * -+ * Updates to interior nodes still happen synchronously and without the journal -+ * (for simplicity) - this may change eventually but updates to interior nodes -+ * are rare enough it's not a huge priority. -+ * -+ * This means the journal is relatively separate from the b-tree; it consists of -+ * just a list of keys and journal replay consists of just redoing those -+ * insertions in same order that they appear in the journal. -+ * -+ * PERSISTENCE: -+ * -+ * For synchronous updates (where we're waiting on the index update to hit -+ * disk), the journal entry will be written out immediately (or as soon as -+ * possible, if the write for the previous journal entry was still in flight). -+ * -+ * Synchronous updates are specified by passing a closure (@flush_cl) to -+ * bch2_btree_insert() or bch_btree_insert_node(), which then pass that parameter -+ * down to the journalling code. That closure will wait on the journal write to -+ * complete (via closure_wait()). -+ * -+ * If the index update wasn't synchronous, the journal entry will be -+ * written out after 10 ms have elapsed, by default (the delay_ms field -+ * in struct journal). -+ * -+ * JOURNAL ENTRIES: -+ * -+ * A journal entry is variable size (struct jset), it's got a fixed length -+ * header and then a variable number of struct jset_entry entries. -+ * -+ * Journal entries are identified by monotonically increasing 64 bit sequence -+ * numbers - jset->seq; other places in the code refer to this sequence number. -+ * -+ * A jset_entry entry contains one or more bkeys (which is what gets inserted -+ * into the b-tree). We need a container to indicate which b-tree the key is -+ * for; also, the roots of the various b-trees are stored in jset_entry entries -+ * (one for each b-tree) - this lets us add new b-tree types without changing -+ * the on disk format. -+ * -+ * We also keep some things in the journal header that are logically part of the -+ * superblock - all the things that are frequently updated. This is for future -+ * bcache on raw flash support; the superblock (which will become another -+ * journal) can't be moved or wear leveled, so it contains just enough -+ * information to find the main journal, and the superblock only has to be -+ * rewritten when we want to move/wear level the main journal. -+ * -+ * JOURNAL LAYOUT ON DISK: -+ * -+ * The journal is written to a ringbuffer of buckets (which is kept in the -+ * superblock); the individual buckets are not necessarily contiguous on disk -+ * which means that journal entries are not allowed to span buckets, but also -+ * that we can resize the journal at runtime if desired (unimplemented). -+ * -+ * The journal buckets exist in the same pool as all the other buckets that are -+ * managed by the allocator and garbage collection - garbage collection marks -+ * the journal buckets as metadata buckets. -+ * -+ * OPEN/DIRTY JOURNAL ENTRIES: -+ * -+ * Open/dirty journal entries are journal entries that contain b-tree updates -+ * that have not yet been written out to the b-tree on disk. We have to track -+ * which journal entries are dirty, and we also have to avoid wrapping around -+ * the journal and overwriting old but still dirty journal entries with new -+ * journal entries. -+ * -+ * On disk, this is represented with the "last_seq" field of struct jset; -+ * last_seq is the first sequence number that journal replay has to replay. -+ * -+ * To avoid overwriting dirty journal entries on disk, we keep a mapping (in -+ * journal_device->seq) of for each journal bucket, the highest sequence number -+ * any journal entry it contains. Then, by comparing that against last_seq we -+ * can determine whether that journal bucket contains dirty journal entries or -+ * not. -+ * -+ * To track which journal entries are dirty, we maintain a fifo of refcounts -+ * (where each entry corresponds to a specific sequence number) - when a ref -+ * goes to 0, that journal entry is no longer dirty. -+ * -+ * Journalling of index updates is done at the same time as the b-tree itself is -+ * being modified (see btree_insert_key()); when we add the key to the journal -+ * the pending b-tree write takes a ref on the journal entry the key was added -+ * to. If a pending b-tree write would need to take refs on multiple dirty -+ * journal entries, it only keeps the ref on the oldest one (since a newer -+ * journal entry will still be replayed if an older entry was dirty). -+ * -+ * JOURNAL FILLING UP: -+ * -+ * There are two ways the journal could fill up; either we could run out of -+ * space to write to, or we could have too many open journal entries and run out -+ * of room in the fifo of refcounts. Since those refcounts are decremented -+ * without any locking we can't safely resize that fifo, so we handle it the -+ * same way. -+ * -+ * If the journal fills up, we start flushing dirty btree nodes until we can -+ * allocate space for a journal write again - preferentially flushing btree -+ * nodes that are pinning the oldest journal entries first. -+ */ -+ -+#include -+ -+#include "journal_types.h" -+ -+struct bch_fs; -+ -+static inline void journal_wake(struct journal *j) -+{ -+ wake_up(&j->wait); -+ closure_wake_up(&j->async_wait); -+ closure_wake_up(&j->preres_wait); -+} -+ -+static inline struct journal_buf *journal_cur_buf(struct journal *j) -+{ -+ return j->buf + j->reservations.idx; -+} -+ -+/* Sequence number of oldest dirty journal entry */ -+ -+static inline u64 journal_last_seq(struct journal *j) -+{ -+ return j->pin.front; -+} -+ -+static inline u64 journal_cur_seq(struct journal *j) -+{ -+ EBUG_ON(j->pin.back - 1 != atomic64_read(&j->seq)); -+ -+ return j->pin.back - 1; -+} -+ -+static inline u64 journal_last_unwritten_seq(struct journal *j) -+{ -+ return j->seq_ondisk + 1; -+} -+ -+static inline int journal_state_count(union journal_res_state s, int idx) -+{ -+ switch (idx) { -+ case 0: return s.buf0_count; -+ case 1: return s.buf1_count; -+ case 2: return s.buf2_count; -+ case 3: return s.buf3_count; -+ } -+ BUG(); -+} -+ -+static inline void journal_state_inc(union journal_res_state *s) -+{ -+ s->buf0_count += s->idx == 0; -+ s->buf1_count += s->idx == 1; -+ s->buf2_count += s->idx == 2; -+ s->buf3_count += s->idx == 3; -+} -+ -+/* -+ * Amount of space that will be taken up by some keys in the journal (i.e. -+ * including the jset header) -+ */ -+static inline unsigned jset_u64s(unsigned u64s) -+{ -+ return u64s + sizeof(struct jset_entry) / sizeof(u64); -+} -+ -+static inline int journal_entry_overhead(struct journal *j) -+{ -+ return sizeof(struct jset) / sizeof(u64) + j->entry_u64s_reserved; -+} -+ -+static inline struct jset_entry * -+bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s) -+{ -+ struct jset *jset = buf->data; -+ struct jset_entry *entry = vstruct_idx(jset, le32_to_cpu(jset->u64s)); -+ -+ memset(entry, 0, sizeof(*entry)); -+ entry->u64s = cpu_to_le16(u64s); -+ -+ le32_add_cpu(&jset->u64s, jset_u64s(u64s)); -+ -+ return entry; -+} -+ -+static inline struct jset_entry * -+journal_res_entry(struct journal *j, struct journal_res *res) -+{ -+ return vstruct_idx(j->buf[res->idx].data, res->offset); -+} -+ -+static inline unsigned journal_entry_init(struct jset_entry *entry, unsigned type, -+ enum btree_id id, unsigned level, -+ unsigned u64s) -+{ -+ entry->u64s = cpu_to_le16(u64s); -+ entry->btree_id = id; -+ entry->level = level; -+ entry->type = type; -+ entry->pad[0] = 0; -+ entry->pad[1] = 0; -+ entry->pad[2] = 0; -+ return jset_u64s(u64s); -+} -+ -+static inline unsigned journal_entry_set(struct jset_entry *entry, unsigned type, -+ enum btree_id id, unsigned level, -+ const void *data, unsigned u64s) -+{ -+ unsigned ret = journal_entry_init(entry, type, id, level, u64s); -+ -+ memcpy_u64s_small(entry->_data, data, u64s); -+ return ret; -+} -+ -+static inline struct jset_entry * -+bch2_journal_add_entry(struct journal *j, struct journal_res *res, -+ unsigned type, enum btree_id id, -+ unsigned level, unsigned u64s) -+{ -+ struct jset_entry *entry = journal_res_entry(j, res); -+ unsigned actual = journal_entry_init(entry, type, id, level, u64s); -+ -+ EBUG_ON(!res->ref); -+ EBUG_ON(actual > res->u64s); -+ -+ res->offset += actual; -+ res->u64s -= actual; -+ return entry; -+} -+ -+static inline bool journal_entry_empty(struct jset *j) -+{ -+ struct jset_entry *i; -+ -+ if (j->seq != j->last_seq) -+ return false; -+ -+ vstruct_for_each(j, i) -+ if (i->type == BCH_JSET_ENTRY_btree_keys && i->u64s) -+ return false; -+ return true; -+} -+ -+/* -+ * Drop reference on a buffer index and return true if the count has hit zero. -+ */ -+static inline union journal_res_state journal_state_buf_put(struct journal *j, unsigned idx) -+{ -+ union journal_res_state s; -+ -+ s.v = atomic64_sub_return(((union journal_res_state) { -+ .buf0_count = idx == 0, -+ .buf1_count = idx == 1, -+ .buf2_count = idx == 2, -+ .buf3_count = idx == 3, -+ }).v, &j->reservations.counter); -+ return s; -+} -+ -+void bch2_journal_buf_put_final(struct journal *, u64, bool); -+ -+static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq) -+{ -+ union journal_res_state s; -+ -+ s = journal_state_buf_put(j, idx); -+ if (!journal_state_count(s, idx)) -+ bch2_journal_buf_put_final(j, seq, idx == s.unwritten_idx); -+} -+ -+static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq) -+{ -+ union journal_res_state s; -+ -+ s = journal_state_buf_put(j, idx); -+ if (!journal_state_count(s, idx)) { -+ spin_lock(&j->lock); -+ bch2_journal_buf_put_final(j, seq, idx == s.unwritten_idx); -+ spin_unlock(&j->lock); -+ } -+} -+ -+/* -+ * This function releases the journal write structure so other threads can -+ * then proceed to add their keys as well. -+ */ -+static inline void bch2_journal_res_put(struct journal *j, -+ struct journal_res *res) -+{ -+ if (!res->ref) -+ return; -+ -+ lock_release(&j->res_map, _THIS_IP_); -+ -+ while (res->u64s) -+ bch2_journal_add_entry(j, res, -+ BCH_JSET_ENTRY_btree_keys, -+ 0, 0, 0); -+ -+ bch2_journal_buf_put(j, res->idx, res->seq); -+ -+ res->ref = 0; -+} -+ -+int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *, -+ unsigned); -+ -+/* First bits for BCH_WATERMARK: */ -+enum journal_res_flags { -+ __JOURNAL_RES_GET_NONBLOCK = BCH_WATERMARK_BITS, -+ __JOURNAL_RES_GET_CHECK, -+}; -+ -+#define JOURNAL_RES_GET_NONBLOCK (1 << __JOURNAL_RES_GET_NONBLOCK) -+#define JOURNAL_RES_GET_CHECK (1 << __JOURNAL_RES_GET_CHECK) -+ -+static inline int journal_res_get_fast(struct journal *j, -+ struct journal_res *res, -+ unsigned flags) -+{ -+ union journal_res_state old, new; -+ u64 v = atomic64_read(&j->reservations.counter); -+ -+ do { -+ old.v = new.v = v; -+ -+ /* -+ * Check if there is still room in the current journal -+ * entry: -+ */ -+ if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s) -+ return 0; -+ -+ EBUG_ON(!journal_state_count(new, new.idx)); -+ -+ if ((flags & BCH_WATERMARK_MASK) < j->watermark) -+ return 0; -+ -+ new.cur_entry_offset += res->u64s; -+ journal_state_inc(&new); -+ -+ /* -+ * If the refcount would overflow, we have to wait: -+ * XXX - tracepoint this: -+ */ -+ if (!journal_state_count(new, new.idx)) -+ return 0; -+ -+ if (flags & JOURNAL_RES_GET_CHECK) -+ return 1; -+ } while ((v = atomic64_cmpxchg(&j->reservations.counter, -+ old.v, new.v)) != old.v); -+ -+ res->ref = true; -+ res->idx = old.idx; -+ res->offset = old.cur_entry_offset; -+ res->seq = le64_to_cpu(j->buf[old.idx].data->seq); -+ return 1; -+} -+ -+static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res, -+ unsigned u64s, unsigned flags) -+{ -+ int ret; -+ -+ EBUG_ON(res->ref); -+ EBUG_ON(!test_bit(JOURNAL_STARTED, &j->flags)); -+ -+ res->u64s = u64s; -+ -+ if (journal_res_get_fast(j, res, flags)) -+ goto out; -+ -+ ret = bch2_journal_res_get_slowpath(j, res, flags); -+ if (ret) -+ return ret; -+out: -+ if (!(flags & JOURNAL_RES_GET_CHECK)) { -+ lock_acquire_shared(&j->res_map, 0, -+ (flags & JOURNAL_RES_GET_NONBLOCK) != 0, -+ NULL, _THIS_IP_); -+ EBUG_ON(!res->ref); -+ } -+ return 0; -+} -+ -+/* journal_preres: */ -+ -+static inline void journal_set_watermark(struct journal *j) -+{ -+ union journal_preres_state s = READ_ONCE(j->prereserved); -+ unsigned watermark = BCH_WATERMARK_stripe; -+ -+ if (fifo_free(&j->pin) < j->pin.size / 4) -+ watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc); -+ if (fifo_free(&j->pin) < j->pin.size / 8) -+ watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim); -+ -+ if (s.reserved > s.remaining) -+ watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc); -+ if (!s.remaining) -+ watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim); -+ -+ if (watermark == j->watermark) -+ return; -+ -+ swap(watermark, j->watermark); -+ if (watermark > j->watermark) -+ journal_wake(j); -+} -+ -+static inline void bch2_journal_preres_put(struct journal *j, -+ struct journal_preres *res) -+{ -+ union journal_preres_state s = { .reserved = res->u64s }; -+ -+ if (!res->u64s) -+ return; -+ -+ s.v = atomic64_sub_return(s.v, &j->prereserved.counter); -+ res->u64s = 0; -+ -+ if (unlikely(s.waiting)) { -+ clear_bit(ilog2((((union journal_preres_state) { .waiting = 1 }).v)), -+ (unsigned long *) &j->prereserved.v); -+ closure_wake_up(&j->preres_wait); -+ } -+ -+ if (s.reserved <= s.remaining && j->watermark) -+ journal_set_watermark(j); -+} -+ -+int __bch2_journal_preres_get(struct journal *, -+ struct journal_preres *, unsigned, unsigned); -+ -+static inline int bch2_journal_preres_get_fast(struct journal *j, -+ struct journal_preres *res, -+ unsigned new_u64s, -+ unsigned flags, -+ bool set_waiting) -+{ -+ int d = new_u64s - res->u64s; -+ union journal_preres_state old, new; -+ u64 v = atomic64_read(&j->prereserved.counter); -+ enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; -+ int ret; -+ -+ do { -+ old.v = new.v = v; -+ ret = 0; -+ -+ if (watermark == BCH_WATERMARK_reclaim || -+ new.reserved + d < new.remaining) { -+ new.reserved += d; -+ ret = 1; -+ } else if (set_waiting && !new.waiting) -+ new.waiting = true; -+ else -+ return 0; -+ } while ((v = atomic64_cmpxchg(&j->prereserved.counter, -+ old.v, new.v)) != old.v); -+ -+ if (ret) -+ res->u64s += d; -+ return ret; -+} -+ -+static inline int bch2_journal_preres_get(struct journal *j, -+ struct journal_preres *res, -+ unsigned new_u64s, -+ unsigned flags) -+{ -+ if (new_u64s <= res->u64s) -+ return 0; -+ -+ if (bch2_journal_preres_get_fast(j, res, new_u64s, flags, false)) -+ return 0; -+ -+ if (flags & JOURNAL_RES_GET_NONBLOCK) -+ return -BCH_ERR_journal_preres_get_blocked; -+ -+ return __bch2_journal_preres_get(j, res, new_u64s, flags); -+} -+ -+/* journal_entry_res: */ -+ -+void bch2_journal_entry_res_resize(struct journal *, -+ struct journal_entry_res *, -+ unsigned); -+ -+int bch2_journal_flush_seq_async(struct journal *, u64, struct closure *); -+void bch2_journal_flush_async(struct journal *, struct closure *); -+ -+int bch2_journal_flush_seq(struct journal *, u64); -+int bch2_journal_flush(struct journal *); -+bool bch2_journal_noflush_seq(struct journal *, u64); -+int bch2_journal_meta(struct journal *); -+ -+void bch2_journal_halt(struct journal *); -+ -+static inline int bch2_journal_error(struct journal *j) -+{ -+ return j->reservations.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL -+ ? -EIO : 0; -+} -+ -+struct bch_dev; -+ -+static inline void bch2_journal_set_replay_done(struct journal *j) -+{ -+ BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags)); -+ set_bit(JOURNAL_REPLAY_DONE, &j->flags); -+} -+ -+void bch2_journal_unblock(struct journal *); -+void bch2_journal_block(struct journal *); -+ -+void __bch2_journal_debug_to_text(struct printbuf *, struct journal *); -+void bch2_journal_debug_to_text(struct printbuf *, struct journal *); -+void bch2_journal_pins_to_text(struct printbuf *, struct journal *); -+bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *); -+ -+int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, -+ unsigned nr); -+int bch2_dev_journal_alloc(struct bch_dev *); -+int bch2_fs_journal_alloc(struct bch_fs *); -+ -+void bch2_dev_journal_stop(struct journal *, struct bch_dev *); -+ -+void bch2_fs_journal_stop(struct journal *); -+int bch2_fs_journal_start(struct journal *, u64); -+ -+void bch2_dev_journal_exit(struct bch_dev *); -+int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *); -+void bch2_fs_journal_exit(struct journal *); -+int bch2_fs_journal_init(struct journal *); -+ -+#endif /* _BCACHEFS_JOURNAL_H */ -diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c -new file mode 100644 -index 000000000000..f4bc2cdbfdd7 ---- /dev/null -+++ b/fs/bcachefs/journal_io.c -@@ -0,0 +1,1947 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include "bcachefs.h" -+#include "alloc_background.h" -+#include "alloc_foreground.h" -+#include "btree_io.h" -+#include "btree_update_interior.h" -+#include "buckets.h" -+#include "checksum.h" -+#include "disk_groups.h" -+#include "error.h" -+#include "journal.h" -+#include "journal_io.h" -+#include "journal_reclaim.h" -+#include "journal_seq_blacklist.h" -+#include "replicas.h" -+#include "sb-clean.h" -+#include "trace.h" -+ -+static struct nonce journal_nonce(const struct jset *jset) -+{ -+ return (struct nonce) {{ -+ [0] = 0, -+ [1] = ((__le32 *) &jset->seq)[0], -+ [2] = ((__le32 *) &jset->seq)[1], -+ [3] = BCH_NONCE_JOURNAL, -+ }}; -+} -+ -+static bool jset_csum_good(struct bch_fs *c, struct jset *j) -+{ -+ return bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)) && -+ !bch2_crc_cmp(j->csum, -+ csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j)); -+} -+ -+static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq) -+{ -+ return (seq - c->journal_entries_base_seq) & (~0U >> 1); -+} -+ -+static void __journal_replay_free(struct bch_fs *c, -+ struct journal_replay *i) -+{ -+ struct journal_replay **p = -+ genradix_ptr(&c->journal_entries, -+ journal_entry_radix_idx(c, le64_to_cpu(i->j.seq))); -+ -+ BUG_ON(*p != i); -+ *p = NULL; -+ kvpfree(i, offsetof(struct journal_replay, j) + -+ vstruct_bytes(&i->j)); -+} -+ -+static void journal_replay_free(struct bch_fs *c, struct journal_replay *i) -+{ -+ i->ignore = true; -+ -+ if (!c->opts.read_entire_journal) -+ __journal_replay_free(c, i); -+} -+ -+struct journal_list { -+ struct closure cl; -+ u64 last_seq; -+ struct mutex lock; -+ int ret; -+}; -+ -+#define JOURNAL_ENTRY_ADD_OK 0 -+#define JOURNAL_ENTRY_ADD_OUT_OF_RANGE 5 -+ -+/* -+ * Given a journal entry we just read, add it to the list of journal entries to -+ * be replayed: -+ */ -+static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, -+ struct journal_ptr entry_ptr, -+ struct journal_list *jlist, struct jset *j) -+{ -+ struct genradix_iter iter; -+ struct journal_replay **_i, *i, *dup; -+ struct journal_ptr *ptr; -+ size_t bytes = vstruct_bytes(j); -+ u64 last_seq = !JSET_NO_FLUSH(j) ? le64_to_cpu(j->last_seq) : 0; -+ int ret = JOURNAL_ENTRY_ADD_OK; -+ -+ /* Is this entry older than the range we need? */ -+ if (!c->opts.read_entire_journal && -+ le64_to_cpu(j->seq) < jlist->last_seq) -+ return JOURNAL_ENTRY_ADD_OUT_OF_RANGE; -+ -+ /* -+ * genradixes are indexed by a ulong, not a u64, so we can't index them -+ * by sequence number directly: Assume instead that they will all fall -+ * within the range of +-2billion of the filrst one we find. -+ */ -+ if (!c->journal_entries_base_seq) -+ c->journal_entries_base_seq = max_t(s64, 1, le64_to_cpu(j->seq) - S32_MAX); -+ -+ /* Drop entries we don't need anymore */ -+ if (last_seq > jlist->last_seq && !c->opts.read_entire_journal) { -+ genradix_for_each_from(&c->journal_entries, iter, _i, -+ journal_entry_radix_idx(c, jlist->last_seq)) { -+ i = *_i; -+ -+ if (!i || i->ignore) -+ continue; -+ -+ if (le64_to_cpu(i->j.seq) >= last_seq) -+ break; -+ journal_replay_free(c, i); -+ } -+ } -+ -+ jlist->last_seq = max(jlist->last_seq, last_seq); -+ -+ _i = genradix_ptr_alloc(&c->journal_entries, -+ journal_entry_radix_idx(c, le64_to_cpu(j->seq)), -+ GFP_KERNEL); -+ if (!_i) -+ return -BCH_ERR_ENOMEM_journal_entry_add; -+ -+ /* -+ * Duplicate journal entries? If so we want the one that didn't have a -+ * checksum error: -+ */ -+ dup = *_i; -+ if (dup) { -+ if (bytes == vstruct_bytes(&dup->j) && -+ !memcmp(j, &dup->j, bytes)) { -+ i = dup; -+ goto found; -+ } -+ -+ if (!entry_ptr.csum_good) { -+ i = dup; -+ goto found; -+ } -+ -+ if (!dup->csum_good) -+ goto replace; -+ -+ fsck_err(c, journal_entry_replicas_data_mismatch, -+ "found duplicate but non identical journal entries (seq %llu)", -+ le64_to_cpu(j->seq)); -+ i = dup; -+ goto found; -+ } -+replace: -+ i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL); -+ if (!i) -+ return -BCH_ERR_ENOMEM_journal_entry_add; -+ -+ i->nr_ptrs = 0; -+ i->csum_good = entry_ptr.csum_good; -+ i->ignore = false; -+ unsafe_memcpy(&i->j, j, bytes, "embedded variable length struct"); -+ i->ptrs[i->nr_ptrs++] = entry_ptr; -+ -+ if (dup) { -+ if (dup->nr_ptrs >= ARRAY_SIZE(dup->ptrs)) { -+ bch_err(c, "found too many copies of journal entry %llu", -+ le64_to_cpu(i->j.seq)); -+ dup->nr_ptrs = ARRAY_SIZE(dup->ptrs) - 1; -+ } -+ -+ /* The first ptr should represent the jset we kept: */ -+ memcpy(i->ptrs + i->nr_ptrs, -+ dup->ptrs, -+ sizeof(dup->ptrs[0]) * dup->nr_ptrs); -+ i->nr_ptrs += dup->nr_ptrs; -+ __journal_replay_free(c, dup); -+ } -+ -+ *_i = i; -+ return 0; -+found: -+ for (ptr = i->ptrs; ptr < i->ptrs + i->nr_ptrs; ptr++) { -+ if (ptr->dev == ca->dev_idx) { -+ bch_err(c, "duplicate journal entry %llu on same device", -+ le64_to_cpu(i->j.seq)); -+ goto out; -+ } -+ } -+ -+ if (i->nr_ptrs >= ARRAY_SIZE(i->ptrs)) { -+ bch_err(c, "found too many copies of journal entry %llu", -+ le64_to_cpu(i->j.seq)); -+ goto out; -+ } -+ -+ i->ptrs[i->nr_ptrs++] = entry_ptr; -+out: -+fsck_err: -+ return ret; -+} -+ -+/* this fills in a range with empty jset_entries: */ -+static void journal_entry_null_range(void *start, void *end) -+{ -+ struct jset_entry *entry; -+ -+ for (entry = start; entry != end; entry = vstruct_next(entry)) -+ memset(entry, 0, sizeof(*entry)); -+} -+ -+#define JOURNAL_ENTRY_REREAD 5 -+#define JOURNAL_ENTRY_NONE 6 -+#define JOURNAL_ENTRY_BAD 7 -+ -+static void journal_entry_err_msg(struct printbuf *out, -+ u32 version, -+ struct jset *jset, -+ struct jset_entry *entry) -+{ -+ prt_str(out, "invalid journal entry, version="); -+ bch2_version_to_text(out, version); -+ -+ if (entry) { -+ prt_str(out, " type="); -+ prt_str(out, bch2_jset_entry_types[entry->type]); -+ } -+ -+ if (!jset) { -+ prt_printf(out, " in superblock"); -+ } else { -+ -+ prt_printf(out, " seq=%llu", le64_to_cpu(jset->seq)); -+ -+ if (entry) -+ prt_printf(out, " offset=%zi/%u", -+ (u64 *) entry - jset->_data, -+ le32_to_cpu(jset->u64s)); -+ } -+ -+ prt_str(out, ": "); -+} -+ -+#define journal_entry_err(c, version, jset, entry, _err, msg, ...) \ -+({ \ -+ struct printbuf _buf = PRINTBUF; \ -+ \ -+ journal_entry_err_msg(&_buf, version, jset, entry); \ -+ prt_printf(&_buf, msg, ##__VA_ARGS__); \ -+ \ -+ switch (flags & BKEY_INVALID_WRITE) { \ -+ case READ: \ -+ mustfix_fsck_err(c, _err, "%s", _buf.buf); \ -+ break; \ -+ case WRITE: \ -+ bch2_sb_error_count(c, BCH_FSCK_ERR_##_err); \ -+ bch_err(c, "corrupt metadata before write: %s\n", _buf.buf);\ -+ if (bch2_fs_inconsistent(c)) { \ -+ ret = -BCH_ERR_fsck_errors_not_fixed; \ -+ goto fsck_err; \ -+ } \ -+ break; \ -+ } \ -+ \ -+ printbuf_exit(&_buf); \ -+ true; \ -+}) -+ -+#define journal_entry_err_on(cond, ...) \ -+ ((cond) ? journal_entry_err(__VA_ARGS__) : false) -+ -+#define FSCK_DELETED_KEY 5 -+ -+static int journal_validate_key(struct bch_fs *c, -+ struct jset *jset, -+ struct jset_entry *entry, -+ unsigned level, enum btree_id btree_id, -+ struct bkey_i *k, -+ unsigned version, int big_endian, -+ enum bkey_invalid_flags flags) -+{ -+ int write = flags & BKEY_INVALID_WRITE; -+ void *next = vstruct_next(entry); -+ struct printbuf buf = PRINTBUF; -+ int ret = 0; -+ -+ if (journal_entry_err_on(!k->k.u64s, -+ c, version, jset, entry, -+ journal_entry_bkey_u64s_0, -+ "k->u64s 0")) { -+ entry->u64s = cpu_to_le16((u64 *) k - entry->_data); -+ journal_entry_null_range(vstruct_next(entry), next); -+ return FSCK_DELETED_KEY; -+ } -+ -+ if (journal_entry_err_on((void *) bkey_next(k) > -+ (void *) vstruct_next(entry), -+ c, version, jset, entry, -+ journal_entry_bkey_past_end, -+ "extends past end of journal entry")) { -+ entry->u64s = cpu_to_le16((u64 *) k - entry->_data); -+ journal_entry_null_range(vstruct_next(entry), next); -+ return FSCK_DELETED_KEY; -+ } -+ -+ if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, -+ c, version, jset, entry, -+ journal_entry_bkey_bad_format, -+ "bad format %u", k->k.format)) { -+ le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); -+ memmove(k, bkey_next(k), next - (void *) bkey_next(k)); -+ journal_entry_null_range(vstruct_next(entry), next); -+ return FSCK_DELETED_KEY; -+ } -+ -+ if (!write) -+ bch2_bkey_compat(level, btree_id, version, big_endian, -+ write, NULL, bkey_to_packed(k)); -+ -+ if (bch2_bkey_invalid(c, bkey_i_to_s_c(k), -+ __btree_node_type(level, btree_id), write, &buf)) { -+ printbuf_reset(&buf); -+ journal_entry_err_msg(&buf, version, jset, entry); -+ prt_newline(&buf); -+ printbuf_indent_add(&buf, 2); -+ -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); -+ prt_newline(&buf); -+ bch2_bkey_invalid(c, bkey_i_to_s_c(k), -+ __btree_node_type(level, btree_id), write, &buf); -+ -+ mustfix_fsck_err(c, journal_entry_bkey_invalid, -+ "%s", buf.buf); -+ -+ le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); -+ memmove(k, bkey_next(k), next - (void *) bkey_next(k)); -+ journal_entry_null_range(vstruct_next(entry), next); -+ -+ printbuf_exit(&buf); -+ return FSCK_DELETED_KEY; -+ } -+ -+ if (write) -+ bch2_bkey_compat(level, btree_id, version, big_endian, -+ write, NULL, bkey_to_packed(k)); -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+static int journal_entry_btree_keys_validate(struct bch_fs *c, -+ struct jset *jset, -+ struct jset_entry *entry, -+ unsigned version, int big_endian, -+ enum bkey_invalid_flags flags) -+{ -+ struct bkey_i *k = entry->start; -+ -+ while (k != vstruct_last(entry)) { -+ int ret = journal_validate_key(c, jset, entry, -+ entry->level, -+ entry->btree_id, -+ k, version, big_endian, -+ flags|BKEY_INVALID_JOURNAL); -+ if (ret == FSCK_DELETED_KEY) -+ continue; -+ -+ k = bkey_next(k); -+ } -+ -+ return 0; -+} -+ -+static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs *c, -+ struct jset_entry *entry) -+{ -+ struct bkey_i *k; -+ bool first = true; -+ -+ jset_entry_for_each_key(entry, k) { -+ if (!first) { -+ prt_newline(out); -+ prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]); -+ } -+ prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level); -+ bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k)); -+ first = false; -+ } -+} -+ -+static int journal_entry_btree_root_validate(struct bch_fs *c, -+ struct jset *jset, -+ struct jset_entry *entry, -+ unsigned version, int big_endian, -+ enum bkey_invalid_flags flags) -+{ -+ struct bkey_i *k = entry->start; -+ int ret = 0; -+ -+ if (journal_entry_err_on(!entry->u64s || -+ le16_to_cpu(entry->u64s) != k->k.u64s, -+ c, version, jset, entry, -+ journal_entry_btree_root_bad_size, -+ "invalid btree root journal entry: wrong number of keys")) { -+ void *next = vstruct_next(entry); -+ /* -+ * we don't want to null out this jset_entry, -+ * just the contents, so that later we can tell -+ * we were _supposed_ to have a btree root -+ */ -+ entry->u64s = 0; -+ journal_entry_null_range(vstruct_next(entry), next); -+ return 0; -+ } -+ -+ return journal_validate_key(c, jset, entry, 1, entry->btree_id, k, -+ version, big_endian, flags); -+fsck_err: -+ return ret; -+} -+ -+static void journal_entry_btree_root_to_text(struct printbuf *out, struct bch_fs *c, -+ struct jset_entry *entry) -+{ -+ journal_entry_btree_keys_to_text(out, c, entry); -+} -+ -+static int journal_entry_prio_ptrs_validate(struct bch_fs *c, -+ struct jset *jset, -+ struct jset_entry *entry, -+ unsigned version, int big_endian, -+ enum bkey_invalid_flags flags) -+{ -+ /* obsolete, don't care: */ -+ return 0; -+} -+ -+static void journal_entry_prio_ptrs_to_text(struct printbuf *out, struct bch_fs *c, -+ struct jset_entry *entry) -+{ -+} -+ -+static int journal_entry_blacklist_validate(struct bch_fs *c, -+ struct jset *jset, -+ struct jset_entry *entry, -+ unsigned version, int big_endian, -+ enum bkey_invalid_flags flags) -+{ -+ int ret = 0; -+ -+ if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1, -+ c, version, jset, entry, -+ journal_entry_blacklist_bad_size, -+ "invalid journal seq blacklist entry: bad size")) { -+ journal_entry_null_range(entry, vstruct_next(entry)); -+ } -+fsck_err: -+ return ret; -+} -+ -+static void journal_entry_blacklist_to_text(struct printbuf *out, struct bch_fs *c, -+ struct jset_entry *entry) -+{ -+ struct jset_entry_blacklist *bl = -+ container_of(entry, struct jset_entry_blacklist, entry); -+ -+ prt_printf(out, "seq=%llu", le64_to_cpu(bl->seq)); -+} -+ -+static int journal_entry_blacklist_v2_validate(struct bch_fs *c, -+ struct jset *jset, -+ struct jset_entry *entry, -+ unsigned version, int big_endian, -+ enum bkey_invalid_flags flags) -+{ -+ struct jset_entry_blacklist_v2 *bl_entry; -+ int ret = 0; -+ -+ if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2, -+ c, version, jset, entry, -+ journal_entry_blacklist_v2_bad_size, -+ "invalid journal seq blacklist entry: bad size")) { -+ journal_entry_null_range(entry, vstruct_next(entry)); -+ goto out; -+ } -+ -+ bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry); -+ -+ if (journal_entry_err_on(le64_to_cpu(bl_entry->start) > -+ le64_to_cpu(bl_entry->end), -+ c, version, jset, entry, -+ journal_entry_blacklist_v2_start_past_end, -+ "invalid journal seq blacklist entry: start > end")) { -+ journal_entry_null_range(entry, vstruct_next(entry)); -+ } -+out: -+fsck_err: -+ return ret; -+} -+ -+static void journal_entry_blacklist_v2_to_text(struct printbuf *out, struct bch_fs *c, -+ struct jset_entry *entry) -+{ -+ struct jset_entry_blacklist_v2 *bl = -+ container_of(entry, struct jset_entry_blacklist_v2, entry); -+ -+ prt_printf(out, "start=%llu end=%llu", -+ le64_to_cpu(bl->start), -+ le64_to_cpu(bl->end)); -+} -+ -+static int journal_entry_usage_validate(struct bch_fs *c, -+ struct jset *jset, -+ struct jset_entry *entry, -+ unsigned version, int big_endian, -+ enum bkey_invalid_flags flags) -+{ -+ struct jset_entry_usage *u = -+ container_of(entry, struct jset_entry_usage, entry); -+ unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); -+ int ret = 0; -+ -+ if (journal_entry_err_on(bytes < sizeof(*u), -+ c, version, jset, entry, -+ journal_entry_usage_bad_size, -+ "invalid journal entry usage: bad size")) { -+ journal_entry_null_range(entry, vstruct_next(entry)); -+ return ret; -+ } -+ -+fsck_err: -+ return ret; -+} -+ -+static void journal_entry_usage_to_text(struct printbuf *out, struct bch_fs *c, -+ struct jset_entry *entry) -+{ -+ struct jset_entry_usage *u = -+ container_of(entry, struct jset_entry_usage, entry); -+ -+ prt_printf(out, "type=%s v=%llu", -+ bch2_fs_usage_types[u->entry.btree_id], -+ le64_to_cpu(u->v)); -+} -+ -+static int journal_entry_data_usage_validate(struct bch_fs *c, -+ struct jset *jset, -+ struct jset_entry *entry, -+ unsigned version, int big_endian, -+ enum bkey_invalid_flags flags) -+{ -+ struct jset_entry_data_usage *u = -+ container_of(entry, struct jset_entry_data_usage, entry); -+ unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); -+ int ret = 0; -+ -+ if (journal_entry_err_on(bytes < sizeof(*u) || -+ bytes < sizeof(*u) + u->r.nr_devs, -+ c, version, jset, entry, -+ journal_entry_data_usage_bad_size, -+ "invalid journal entry usage: bad size")) { -+ journal_entry_null_range(entry, vstruct_next(entry)); -+ return ret; -+ } -+ -+fsck_err: -+ return ret; -+} -+ -+static void journal_entry_data_usage_to_text(struct printbuf *out, struct bch_fs *c, -+ struct jset_entry *entry) -+{ -+ struct jset_entry_data_usage *u = -+ container_of(entry, struct jset_entry_data_usage, entry); -+ -+ bch2_replicas_entry_to_text(out, &u->r); -+ prt_printf(out, "=%llu", le64_to_cpu(u->v)); -+} -+ -+static int journal_entry_clock_validate(struct bch_fs *c, -+ struct jset *jset, -+ struct jset_entry *entry, -+ unsigned version, int big_endian, -+ enum bkey_invalid_flags flags) -+{ -+ struct jset_entry_clock *clock = -+ container_of(entry, struct jset_entry_clock, entry); -+ unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); -+ int ret = 0; -+ -+ if (journal_entry_err_on(bytes != sizeof(*clock), -+ c, version, jset, entry, -+ journal_entry_clock_bad_size, -+ "bad size")) { -+ journal_entry_null_range(entry, vstruct_next(entry)); -+ return ret; -+ } -+ -+ if (journal_entry_err_on(clock->rw > 1, -+ c, version, jset, entry, -+ journal_entry_clock_bad_rw, -+ "bad rw")) { -+ journal_entry_null_range(entry, vstruct_next(entry)); -+ return ret; -+ } -+ -+fsck_err: -+ return ret; -+} -+ -+static void journal_entry_clock_to_text(struct printbuf *out, struct bch_fs *c, -+ struct jset_entry *entry) -+{ -+ struct jset_entry_clock *clock = -+ container_of(entry, struct jset_entry_clock, entry); -+ -+ prt_printf(out, "%s=%llu", clock->rw ? "write" : "read", le64_to_cpu(clock->time)); -+} -+ -+static int journal_entry_dev_usage_validate(struct bch_fs *c, -+ struct jset *jset, -+ struct jset_entry *entry, -+ unsigned version, int big_endian, -+ enum bkey_invalid_flags flags) -+{ -+ struct jset_entry_dev_usage *u = -+ container_of(entry, struct jset_entry_dev_usage, entry); -+ unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); -+ unsigned expected = sizeof(*u); -+ unsigned dev; -+ int ret = 0; -+ -+ if (journal_entry_err_on(bytes < expected, -+ c, version, jset, entry, -+ journal_entry_dev_usage_bad_size, -+ "bad size (%u < %u)", -+ bytes, expected)) { -+ journal_entry_null_range(entry, vstruct_next(entry)); -+ return ret; -+ } -+ -+ dev = le32_to_cpu(u->dev); -+ -+ if (journal_entry_err_on(!bch2_dev_exists2(c, dev), -+ c, version, jset, entry, -+ journal_entry_dev_usage_bad_dev, -+ "bad dev")) { -+ journal_entry_null_range(entry, vstruct_next(entry)); -+ return ret; -+ } -+ -+ if (journal_entry_err_on(u->pad, -+ c, version, jset, entry, -+ journal_entry_dev_usage_bad_pad, -+ "bad pad")) { -+ journal_entry_null_range(entry, vstruct_next(entry)); -+ return ret; -+ } -+ -+fsck_err: -+ return ret; -+} -+ -+static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs *c, -+ struct jset_entry *entry) -+{ -+ struct jset_entry_dev_usage *u = -+ container_of(entry, struct jset_entry_dev_usage, entry); -+ unsigned i, nr_types = jset_entry_dev_usage_nr_types(u); -+ -+ prt_printf(out, "dev=%u", le32_to_cpu(u->dev)); -+ -+ for (i = 0; i < nr_types; i++) { -+ if (i < BCH_DATA_NR) -+ prt_printf(out, " %s", bch2_data_types[i]); -+ else -+ prt_printf(out, " (unknown data type %u)", i); -+ prt_printf(out, ": buckets=%llu sectors=%llu fragmented=%llu", -+ le64_to_cpu(u->d[i].buckets), -+ le64_to_cpu(u->d[i].sectors), -+ le64_to_cpu(u->d[i].fragmented)); -+ } -+ -+ prt_printf(out, " buckets_ec: %llu", le64_to_cpu(u->buckets_ec)); -+} -+ -+static int journal_entry_log_validate(struct bch_fs *c, -+ struct jset *jset, -+ struct jset_entry *entry, -+ unsigned version, int big_endian, -+ enum bkey_invalid_flags flags) -+{ -+ return 0; -+} -+ -+static void journal_entry_log_to_text(struct printbuf *out, struct bch_fs *c, -+ struct jset_entry *entry) -+{ -+ struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry); -+ unsigned bytes = vstruct_bytes(entry) - offsetof(struct jset_entry_log, d); -+ -+ prt_printf(out, "%.*s", bytes, l->d); -+} -+ -+static int journal_entry_overwrite_validate(struct bch_fs *c, -+ struct jset *jset, -+ struct jset_entry *entry, -+ unsigned version, int big_endian, -+ enum bkey_invalid_flags flags) -+{ -+ return journal_entry_btree_keys_validate(c, jset, entry, -+ version, big_endian, READ); -+} -+ -+static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs *c, -+ struct jset_entry *entry) -+{ -+ journal_entry_btree_keys_to_text(out, c, entry); -+} -+ -+struct jset_entry_ops { -+ int (*validate)(struct bch_fs *, struct jset *, -+ struct jset_entry *, unsigned, int, -+ enum bkey_invalid_flags); -+ void (*to_text)(struct printbuf *, struct bch_fs *, struct jset_entry *); -+}; -+ -+static const struct jset_entry_ops bch2_jset_entry_ops[] = { -+#define x(f, nr) \ -+ [BCH_JSET_ENTRY_##f] = (struct jset_entry_ops) { \ -+ .validate = journal_entry_##f##_validate, \ -+ .to_text = journal_entry_##f##_to_text, \ -+ }, -+ BCH_JSET_ENTRY_TYPES() -+#undef x -+}; -+ -+int bch2_journal_entry_validate(struct bch_fs *c, -+ struct jset *jset, -+ struct jset_entry *entry, -+ unsigned version, int big_endian, -+ enum bkey_invalid_flags flags) -+{ -+ return entry->type < BCH_JSET_ENTRY_NR -+ ? bch2_jset_entry_ops[entry->type].validate(c, jset, entry, -+ version, big_endian, flags) -+ : 0; -+} -+ -+void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c, -+ struct jset_entry *entry) -+{ -+ if (entry->type < BCH_JSET_ENTRY_NR) { -+ prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]); -+ bch2_jset_entry_ops[entry->type].to_text(out, c, entry); -+ } else { -+ prt_printf(out, "(unknown type %u)", entry->type); -+ } -+} -+ -+static int jset_validate_entries(struct bch_fs *c, struct jset *jset, -+ enum bkey_invalid_flags flags) -+{ -+ struct jset_entry *entry; -+ unsigned version = le32_to_cpu(jset->version); -+ int ret = 0; -+ -+ vstruct_for_each(jset, entry) { -+ if (journal_entry_err_on(vstruct_next(entry) > vstruct_last(jset), -+ c, version, jset, entry, -+ journal_entry_past_jset_end, -+ "journal entry extends past end of jset")) { -+ jset->u64s = cpu_to_le32((u64 *) entry - jset->_data); -+ break; -+ } -+ -+ ret = bch2_journal_entry_validate(c, jset, entry, -+ version, JSET_BIG_ENDIAN(jset), flags); -+ if (ret) -+ break; -+ } -+fsck_err: -+ return ret; -+} -+ -+static int jset_validate(struct bch_fs *c, -+ struct bch_dev *ca, -+ struct jset *jset, u64 sector, -+ enum bkey_invalid_flags flags) -+{ -+ unsigned version; -+ int ret = 0; -+ -+ if (le64_to_cpu(jset->magic) != jset_magic(c)) -+ return JOURNAL_ENTRY_NONE; -+ -+ version = le32_to_cpu(jset->version); -+ if (journal_entry_err_on(!bch2_version_compatible(version), -+ c, version, jset, NULL, -+ jset_unsupported_version, -+ "%s sector %llu seq %llu: incompatible journal entry version %u.%u", -+ ca ? ca->name : c->name, -+ sector, le64_to_cpu(jset->seq), -+ BCH_VERSION_MAJOR(version), -+ BCH_VERSION_MINOR(version))) { -+ /* don't try to continue: */ -+ return -EINVAL; -+ } -+ -+ if (journal_entry_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), -+ c, version, jset, NULL, -+ jset_unknown_csum, -+ "%s sector %llu seq %llu: journal entry with unknown csum type %llu", -+ ca ? ca->name : c->name, -+ sector, le64_to_cpu(jset->seq), -+ JSET_CSUM_TYPE(jset))) -+ ret = JOURNAL_ENTRY_BAD; -+ -+ /* last_seq is ignored when JSET_NO_FLUSH is true */ -+ if (journal_entry_err_on(!JSET_NO_FLUSH(jset) && -+ le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), -+ c, version, jset, NULL, -+ jset_last_seq_newer_than_seq, -+ "invalid journal entry: last_seq > seq (%llu > %llu)", -+ le64_to_cpu(jset->last_seq), -+ le64_to_cpu(jset->seq))) { -+ jset->last_seq = jset->seq; -+ return JOURNAL_ENTRY_BAD; -+ } -+ -+ ret = jset_validate_entries(c, jset, flags); -+fsck_err: -+ return ret; -+} -+ -+static int jset_validate_early(struct bch_fs *c, -+ struct bch_dev *ca, -+ struct jset *jset, u64 sector, -+ unsigned bucket_sectors_left, -+ unsigned sectors_read) -+{ -+ size_t bytes = vstruct_bytes(jset); -+ unsigned version; -+ enum bkey_invalid_flags flags = BKEY_INVALID_JOURNAL; -+ int ret = 0; -+ -+ if (le64_to_cpu(jset->magic) != jset_magic(c)) -+ return JOURNAL_ENTRY_NONE; -+ -+ version = le32_to_cpu(jset->version); -+ if (journal_entry_err_on(!bch2_version_compatible(version), -+ c, version, jset, NULL, -+ jset_unsupported_version, -+ "%s sector %llu seq %llu: unknown journal entry version %u.%u", -+ ca ? ca->name : c->name, -+ sector, le64_to_cpu(jset->seq), -+ BCH_VERSION_MAJOR(version), -+ BCH_VERSION_MINOR(version))) { -+ /* don't try to continue: */ -+ return -EINVAL; -+ } -+ -+ if (bytes > (sectors_read << 9) && -+ sectors_read < bucket_sectors_left) -+ return JOURNAL_ENTRY_REREAD; -+ -+ if (journal_entry_err_on(bytes > bucket_sectors_left << 9, -+ c, version, jset, NULL, -+ jset_past_bucket_end, -+ "%s sector %llu seq %llu: journal entry too big (%zu bytes)", -+ ca ? ca->name : c->name, -+ sector, le64_to_cpu(jset->seq), bytes)) -+ le32_add_cpu(&jset->u64s, -+ -((bytes - (bucket_sectors_left << 9)) / 8)); -+fsck_err: -+ return ret; -+} -+ -+struct journal_read_buf { -+ void *data; -+ size_t size; -+}; -+ -+static int journal_read_buf_realloc(struct journal_read_buf *b, -+ size_t new_size) -+{ -+ void *n; -+ -+ /* the bios are sized for this many pages, max: */ -+ if (new_size > JOURNAL_ENTRY_SIZE_MAX) -+ return -BCH_ERR_ENOMEM_journal_read_buf_realloc; -+ -+ new_size = roundup_pow_of_two(new_size); -+ n = kvpmalloc(new_size, GFP_KERNEL); -+ if (!n) -+ return -BCH_ERR_ENOMEM_journal_read_buf_realloc; -+ -+ kvpfree(b->data, b->size); -+ b->data = n; -+ b->size = new_size; -+ return 0; -+} -+ -+static int journal_read_bucket(struct bch_dev *ca, -+ struct journal_read_buf *buf, -+ struct journal_list *jlist, -+ unsigned bucket) -+{ -+ struct bch_fs *c = ca->fs; -+ struct journal_device *ja = &ca->journal; -+ struct jset *j = NULL; -+ unsigned sectors, sectors_read = 0; -+ u64 offset = bucket_to_sector(ca, ja->buckets[bucket]), -+ end = offset + ca->mi.bucket_size; -+ bool saw_bad = false, csum_good; -+ int ret = 0; -+ -+ pr_debug("reading %u", bucket); -+ -+ while (offset < end) { -+ if (!sectors_read) { -+ struct bio *bio; -+ unsigned nr_bvecs; -+reread: -+ sectors_read = min_t(unsigned, -+ end - offset, buf->size >> 9); -+ nr_bvecs = buf_pages(buf->data, sectors_read << 9); -+ -+ bio = bio_kmalloc(nr_bvecs, GFP_KERNEL); -+ bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, nr_bvecs, REQ_OP_READ); -+ -+ bio->bi_iter.bi_sector = offset; -+ bch2_bio_map(bio, buf->data, sectors_read << 9); -+ -+ ret = submit_bio_wait(bio); -+ kfree(bio); -+ -+ if (bch2_dev_io_err_on(ret, ca, BCH_MEMBER_ERROR_read, -+ "journal read error: sector %llu", -+ offset) || -+ bch2_meta_read_fault("journal")) { -+ /* -+ * We don't error out of the recovery process -+ * here, since the relevant journal entry may be -+ * found on a different device, and missing or -+ * no journal entries will be handled later -+ */ -+ return 0; -+ } -+ -+ j = buf->data; -+ } -+ -+ ret = jset_validate_early(c, ca, j, offset, -+ end - offset, sectors_read); -+ switch (ret) { -+ case 0: -+ sectors = vstruct_sectors(j, c->block_bits); -+ break; -+ case JOURNAL_ENTRY_REREAD: -+ if (vstruct_bytes(j) > buf->size) { -+ ret = journal_read_buf_realloc(buf, -+ vstruct_bytes(j)); -+ if (ret) -+ return ret; -+ } -+ goto reread; -+ case JOURNAL_ENTRY_NONE: -+ if (!saw_bad) -+ return 0; -+ /* -+ * On checksum error we don't really trust the size -+ * field of the journal entry we read, so try reading -+ * again at next block boundary: -+ */ -+ sectors = block_sectors(c); -+ goto next_block; -+ default: -+ return ret; -+ } -+ -+ /* -+ * This happens sometimes if we don't have discards on - -+ * when we've partially overwritten a bucket with new -+ * journal entries. We don't need the rest of the -+ * bucket: -+ */ -+ if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket]) -+ return 0; -+ -+ ja->bucket_seq[bucket] = le64_to_cpu(j->seq); -+ -+ csum_good = jset_csum_good(c, j); -+ if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum, -+ "journal checksum error")) -+ saw_bad = true; -+ -+ ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j), -+ j->encrypted_start, -+ vstruct_end(j) - (void *) j->encrypted_start); -+ bch2_fs_fatal_err_on(ret, c, -+ "error decrypting journal entry: %i", ret); -+ -+ mutex_lock(&jlist->lock); -+ ret = journal_entry_add(c, ca, (struct journal_ptr) { -+ .csum_good = csum_good, -+ .dev = ca->dev_idx, -+ .bucket = bucket, -+ .bucket_offset = offset - -+ bucket_to_sector(ca, ja->buckets[bucket]), -+ .sector = offset, -+ }, jlist, j); -+ mutex_unlock(&jlist->lock); -+ -+ switch (ret) { -+ case JOURNAL_ENTRY_ADD_OK: -+ break; -+ case JOURNAL_ENTRY_ADD_OUT_OF_RANGE: -+ break; -+ default: -+ return ret; -+ } -+next_block: -+ pr_debug("next"); -+ offset += sectors; -+ sectors_read -= sectors; -+ j = ((void *) j) + (sectors << 9); -+ } -+ -+ return 0; -+} -+ -+static void bch2_journal_read_device(struct closure *cl) -+{ -+ struct journal_device *ja = -+ container_of(cl, struct journal_device, read); -+ struct bch_dev *ca = container_of(ja, struct bch_dev, journal); -+ struct bch_fs *c = ca->fs; -+ struct journal_list *jlist = -+ container_of(cl->parent, struct journal_list, cl); -+ struct journal_replay *r, **_r; -+ struct genradix_iter iter; -+ struct journal_read_buf buf = { NULL, 0 }; -+ unsigned i; -+ int ret = 0; -+ -+ if (!ja->nr) -+ goto out; -+ -+ ret = journal_read_buf_realloc(&buf, PAGE_SIZE); -+ if (ret) -+ goto err; -+ -+ pr_debug("%u journal buckets", ja->nr); -+ -+ for (i = 0; i < ja->nr; i++) { -+ ret = journal_read_bucket(ca, &buf, jlist, i); -+ if (ret) -+ goto err; -+ } -+ -+ ja->sectors_free = ca->mi.bucket_size; -+ -+ mutex_lock(&jlist->lock); -+ genradix_for_each_reverse(&c->journal_entries, iter, _r) { -+ r = *_r; -+ -+ if (!r) -+ continue; -+ -+ for (i = 0; i < r->nr_ptrs; i++) { -+ if (r->ptrs[i].dev == ca->dev_idx) { -+ unsigned wrote = bucket_remainder(ca, r->ptrs[i].sector) + -+ vstruct_sectors(&r->j, c->block_bits); -+ -+ ja->cur_idx = r->ptrs[i].bucket; -+ ja->sectors_free = ca->mi.bucket_size - wrote; -+ goto found; -+ } -+ } -+ } -+found: -+ mutex_unlock(&jlist->lock); -+ -+ if (ja->bucket_seq[ja->cur_idx] && -+ ja->sectors_free == ca->mi.bucket_size) { -+ bch_err(c, "ja->sectors_free == ca->mi.bucket_size"); -+ bch_err(c, "cur_idx %u/%u", ja->cur_idx, ja->nr); -+ for (i = 0; i < 3; i++) { -+ unsigned idx = (ja->cur_idx + ja->nr - 1 + i) % ja->nr; -+ -+ bch_err(c, "bucket_seq[%u] = %llu", idx, ja->bucket_seq[idx]); -+ } -+ ja->sectors_free = 0; -+ } -+ -+ /* -+ * Set dirty_idx to indicate the entire journal is full and needs to be -+ * reclaimed - journal reclaim will immediately reclaim whatever isn't -+ * pinned when it first runs: -+ */ -+ ja->discard_idx = ja->dirty_idx_ondisk = -+ ja->dirty_idx = (ja->cur_idx + 1) % ja->nr; -+out: -+ bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret); -+ kvpfree(buf.data, buf.size); -+ percpu_ref_put(&ca->io_ref); -+ closure_return(cl); -+ return; -+err: -+ mutex_lock(&jlist->lock); -+ jlist->ret = ret; -+ mutex_unlock(&jlist->lock); -+ goto out; -+} -+ -+void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, -+ struct journal_replay *j) -+{ -+ unsigned i; -+ -+ for (i = 0; i < j->nr_ptrs; i++) { -+ struct bch_dev *ca = bch_dev_bkey_exists(c, j->ptrs[i].dev); -+ u64 offset; -+ -+ div64_u64_rem(j->ptrs[i].sector, ca->mi.bucket_size, &offset); -+ -+ if (i) -+ prt_printf(out, " "); -+ prt_printf(out, "%u:%u:%u (sector %llu)", -+ j->ptrs[i].dev, -+ j->ptrs[i].bucket, -+ j->ptrs[i].bucket_offset, -+ j->ptrs[i].sector); -+ } -+} -+ -+int bch2_journal_read(struct bch_fs *c, -+ u64 *last_seq, -+ u64 *blacklist_seq, -+ u64 *start_seq) -+{ -+ struct journal_list jlist; -+ struct journal_replay *i, **_i, *prev = NULL; -+ struct genradix_iter radix_iter; -+ struct bch_dev *ca; -+ unsigned iter; -+ struct printbuf buf = PRINTBUF; -+ bool degraded = false, last_write_torn = false; -+ u64 seq; -+ int ret = 0; -+ -+ closure_init_stack(&jlist.cl); -+ mutex_init(&jlist.lock); -+ jlist.last_seq = 0; -+ jlist.ret = 0; -+ -+ for_each_member_device(ca, c, iter) { -+ if (!c->opts.fsck && -+ !(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_journal))) -+ continue; -+ -+ if ((ca->mi.state == BCH_MEMBER_STATE_rw || -+ ca->mi.state == BCH_MEMBER_STATE_ro) && -+ percpu_ref_tryget(&ca->io_ref)) -+ closure_call(&ca->journal.read, -+ bch2_journal_read_device, -+ system_unbound_wq, -+ &jlist.cl); -+ else -+ degraded = true; -+ } -+ -+ closure_sync(&jlist.cl); -+ -+ if (jlist.ret) -+ return jlist.ret; -+ -+ *last_seq = 0; -+ *start_seq = 0; -+ *blacklist_seq = 0; -+ -+ /* -+ * Find most recent flush entry, and ignore newer non flush entries - -+ * those entries will be blacklisted: -+ */ -+ genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) { -+ enum bkey_invalid_flags flags = BKEY_INVALID_JOURNAL; -+ -+ i = *_i; -+ -+ if (!i || i->ignore) -+ continue; -+ -+ if (!*start_seq) -+ *blacklist_seq = *start_seq = le64_to_cpu(i->j.seq) + 1; -+ -+ if (JSET_NO_FLUSH(&i->j)) { -+ i->ignore = true; -+ continue; -+ } -+ -+ if (!last_write_torn && !i->csum_good) { -+ last_write_torn = true; -+ i->ignore = true; -+ continue; -+ } -+ -+ if (journal_entry_err_on(le64_to_cpu(i->j.last_seq) > le64_to_cpu(i->j.seq), -+ c, le32_to_cpu(i->j.version), &i->j, NULL, -+ jset_last_seq_newer_than_seq, -+ "invalid journal entry: last_seq > seq (%llu > %llu)", -+ le64_to_cpu(i->j.last_seq), -+ le64_to_cpu(i->j.seq))) -+ i->j.last_seq = i->j.seq; -+ -+ *last_seq = le64_to_cpu(i->j.last_seq); -+ *blacklist_seq = le64_to_cpu(i->j.seq) + 1; -+ break; -+ } -+ -+ if (!*start_seq) { -+ bch_info(c, "journal read done, but no entries found"); -+ return 0; -+ } -+ -+ if (!*last_seq) { -+ fsck_err(c, dirty_but_no_journal_entries_post_drop_nonflushes, -+ "journal read done, but no entries found after dropping non-flushes"); -+ return 0; -+ } -+ -+ bch_info(c, "journal read done, replaying entries %llu-%llu", -+ *last_seq, *blacklist_seq - 1); -+ -+ if (*start_seq != *blacklist_seq) -+ bch_info(c, "dropped unflushed entries %llu-%llu", -+ *blacklist_seq, *start_seq - 1); -+ -+ /* Drop blacklisted entries and entries older than last_seq: */ -+ genradix_for_each(&c->journal_entries, radix_iter, _i) { -+ i = *_i; -+ -+ if (!i || i->ignore) -+ continue; -+ -+ seq = le64_to_cpu(i->j.seq); -+ if (seq < *last_seq) { -+ journal_replay_free(c, i); -+ continue; -+ } -+ -+ if (bch2_journal_seq_is_blacklisted(c, seq, true)) { -+ fsck_err_on(!JSET_NO_FLUSH(&i->j), c, -+ jset_seq_blacklisted, -+ "found blacklisted journal entry %llu", seq); -+ i->ignore = true; -+ } -+ } -+ -+ /* Check for missing entries: */ -+ seq = *last_seq; -+ genradix_for_each(&c->journal_entries, radix_iter, _i) { -+ i = *_i; -+ -+ if (!i || i->ignore) -+ continue; -+ -+ BUG_ON(seq > le64_to_cpu(i->j.seq)); -+ -+ while (seq < le64_to_cpu(i->j.seq)) { -+ u64 missing_start, missing_end; -+ struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; -+ -+ while (seq < le64_to_cpu(i->j.seq) && -+ bch2_journal_seq_is_blacklisted(c, seq, false)) -+ seq++; -+ -+ if (seq == le64_to_cpu(i->j.seq)) -+ break; -+ -+ missing_start = seq; -+ -+ while (seq < le64_to_cpu(i->j.seq) && -+ !bch2_journal_seq_is_blacklisted(c, seq, false)) -+ seq++; -+ -+ if (prev) { -+ bch2_journal_ptrs_to_text(&buf1, c, prev); -+ prt_printf(&buf1, " size %zu", vstruct_sectors(&prev->j, c->block_bits)); -+ } else -+ prt_printf(&buf1, "(none)"); -+ bch2_journal_ptrs_to_text(&buf2, c, i); -+ -+ missing_end = seq - 1; -+ fsck_err(c, journal_entries_missing, -+ "journal entries %llu-%llu missing! (replaying %llu-%llu)\n" -+ " prev at %s\n" -+ " next at %s", -+ missing_start, missing_end, -+ *last_seq, *blacklist_seq - 1, -+ buf1.buf, buf2.buf); -+ -+ printbuf_exit(&buf1); -+ printbuf_exit(&buf2); -+ } -+ -+ prev = i; -+ seq++; -+ } -+ -+ genradix_for_each(&c->journal_entries, radix_iter, _i) { -+ struct bch_replicas_padded replicas = { -+ .e.data_type = BCH_DATA_journal, -+ .e.nr_required = 1, -+ }; -+ unsigned ptr; -+ -+ i = *_i; -+ if (!i || i->ignore) -+ continue; -+ -+ for (ptr = 0; ptr < i->nr_ptrs; ptr++) { -+ ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev); -+ -+ if (!i->ptrs[ptr].csum_good) -+ bch_err_dev_offset(ca, i->ptrs[ptr].sector, -+ "invalid journal checksum, seq %llu%s", -+ le64_to_cpu(i->j.seq), -+ i->csum_good ? " (had good copy on another device)" : ""); -+ } -+ -+ ret = jset_validate(c, -+ bch_dev_bkey_exists(c, i->ptrs[0].dev), -+ &i->j, -+ i->ptrs[0].sector, -+ READ); -+ if (ret) -+ goto err; -+ -+ for (ptr = 0; ptr < i->nr_ptrs; ptr++) -+ replicas.e.devs[replicas.e.nr_devs++] = i->ptrs[ptr].dev; -+ -+ bch2_replicas_entry_sort(&replicas.e); -+ -+ printbuf_reset(&buf); -+ bch2_replicas_entry_to_text(&buf, &replicas.e); -+ -+ if (!degraded && -+ !bch2_replicas_marked(c, &replicas.e) && -+ (le64_to_cpu(i->j.seq) == *last_seq || -+ fsck_err(c, journal_entry_replicas_not_marked, -+ "superblock not marked as containing replicas for journal entry %llu\n %s", -+ le64_to_cpu(i->j.seq), buf.buf))) { -+ ret = bch2_mark_replicas(c, &replicas.e); -+ if (ret) -+ goto err; -+ } -+ } -+err: -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+/* journal write: */ -+ -+static void __journal_write_alloc(struct journal *j, -+ struct journal_buf *w, -+ struct dev_alloc_list *devs_sorted, -+ unsigned sectors, -+ unsigned *replicas, -+ unsigned replicas_want) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct journal_device *ja; -+ struct bch_dev *ca; -+ unsigned i; -+ -+ if (*replicas >= replicas_want) -+ return; -+ -+ for (i = 0; i < devs_sorted->nr; i++) { -+ ca = rcu_dereference(c->devs[devs_sorted->devs[i]]); -+ if (!ca) -+ continue; -+ -+ ja = &ca->journal; -+ -+ /* -+ * Check that we can use this device, and aren't already using -+ * it: -+ */ -+ if (!ca->mi.durability || -+ ca->mi.state != BCH_MEMBER_STATE_rw || -+ !ja->nr || -+ bch2_bkey_has_device_c(bkey_i_to_s_c(&w->key), ca->dev_idx) || -+ sectors > ja->sectors_free) -+ continue; -+ -+ bch2_dev_stripe_increment(ca, &j->wp.stripe); -+ -+ bch2_bkey_append_ptr(&w->key, -+ (struct bch_extent_ptr) { -+ .offset = bucket_to_sector(ca, -+ ja->buckets[ja->cur_idx]) + -+ ca->mi.bucket_size - -+ ja->sectors_free, -+ .dev = ca->dev_idx, -+ }); -+ -+ ja->sectors_free -= sectors; -+ ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq); -+ -+ *replicas += ca->mi.durability; -+ -+ if (*replicas >= replicas_want) -+ break; -+ } -+} -+ -+/** -+ * journal_write_alloc - decide where to write next journal entry -+ * -+ * @j: journal object -+ * @w: journal buf (entry to be written) -+ * -+ * Returns: 0 on success, or -EROFS on failure -+ */ -+static int journal_write_alloc(struct journal *j, struct journal_buf *w) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct bch_devs_mask devs; -+ struct journal_device *ja; -+ struct bch_dev *ca; -+ struct dev_alloc_list devs_sorted; -+ unsigned sectors = vstruct_sectors(w->data, c->block_bits); -+ unsigned target = c->opts.metadata_target ?: -+ c->opts.foreground_target; -+ unsigned i, replicas = 0, replicas_want = -+ READ_ONCE(c->opts.metadata_replicas); -+ -+ rcu_read_lock(); -+retry: -+ devs = target_rw_devs(c, BCH_DATA_journal, target); -+ -+ devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs); -+ -+ __journal_write_alloc(j, w, &devs_sorted, -+ sectors, &replicas, replicas_want); -+ -+ if (replicas >= replicas_want) -+ goto done; -+ -+ for (i = 0; i < devs_sorted.nr; i++) { -+ ca = rcu_dereference(c->devs[devs_sorted.devs[i]]); -+ if (!ca) -+ continue; -+ -+ ja = &ca->journal; -+ -+ if (sectors > ja->sectors_free && -+ sectors <= ca->mi.bucket_size && -+ bch2_journal_dev_buckets_available(j, ja, -+ journal_space_discarded)) { -+ ja->cur_idx = (ja->cur_idx + 1) % ja->nr; -+ ja->sectors_free = ca->mi.bucket_size; -+ -+ /* -+ * ja->bucket_seq[ja->cur_idx] must always have -+ * something sensible: -+ */ -+ ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq); -+ } -+ } -+ -+ __journal_write_alloc(j, w, &devs_sorted, -+ sectors, &replicas, replicas_want); -+ -+ if (replicas < replicas_want && target) { -+ /* Retry from all devices: */ -+ target = 0; -+ goto retry; -+ } -+done: -+ rcu_read_unlock(); -+ -+ BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX); -+ -+ return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS; -+} -+ -+static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) -+{ -+ /* we aren't holding j->lock: */ -+ unsigned new_size = READ_ONCE(j->buf_size_want); -+ void *new_buf; -+ -+ if (buf->buf_size >= new_size) -+ return; -+ -+ new_buf = kvpmalloc(new_size, GFP_NOFS|__GFP_NOWARN); -+ if (!new_buf) -+ return; -+ -+ memcpy(new_buf, buf->data, buf->buf_size); -+ -+ spin_lock(&j->lock); -+ swap(buf->data, new_buf); -+ swap(buf->buf_size, new_size); -+ spin_unlock(&j->lock); -+ -+ kvpfree(new_buf, new_size); -+} -+ -+static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j) -+{ -+ return j->buf + (journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK); -+} -+ -+static void journal_write_done(struct closure *cl) -+{ -+ struct journal *j = container_of(cl, struct journal, io); -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct journal_buf *w = journal_last_unwritten_buf(j); -+ struct bch_replicas_padded replicas; -+ union journal_res_state old, new; -+ u64 v, seq; -+ int err = 0; -+ -+ bch2_time_stats_update(!JSET_NO_FLUSH(w->data) -+ ? j->flush_write_time -+ : j->noflush_write_time, j->write_start_time); -+ -+ if (!w->devs_written.nr) { -+ bch_err(c, "unable to write journal to sufficient devices"); -+ err = -EIO; -+ } else { -+ bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, -+ w->devs_written); -+ if (bch2_mark_replicas(c, &replicas.e)) -+ err = -EIO; -+ } -+ -+ if (err) -+ bch2_fatal_error(c); -+ -+ spin_lock(&j->lock); -+ seq = le64_to_cpu(w->data->seq); -+ -+ if (seq >= j->pin.front) -+ journal_seq_pin(j, seq)->devs = w->devs_written; -+ -+ if (!err) { -+ if (!JSET_NO_FLUSH(w->data)) { -+ j->flushed_seq_ondisk = seq; -+ j->last_seq_ondisk = w->last_seq; -+ -+ bch2_do_discards(c); -+ closure_wake_up(&c->freelist_wait); -+ -+ bch2_reset_alloc_cursors(c); -+ } -+ } else if (!j->err_seq || seq < j->err_seq) -+ j->err_seq = seq; -+ -+ j->seq_ondisk = seq; -+ -+ /* -+ * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard -+ * more buckets: -+ * -+ * Must come before signaling write completion, for -+ * bch2_fs_journal_stop(): -+ */ -+ if (j->watermark != BCH_WATERMARK_stripe) -+ journal_reclaim_kick(&c->journal); -+ -+ /* also must come before signalling write completion: */ -+ closure_debug_destroy(cl); -+ -+ v = atomic64_read(&j->reservations.counter); -+ do { -+ old.v = new.v = v; -+ BUG_ON(journal_state_count(new, new.unwritten_idx)); -+ -+ new.unwritten_idx++; -+ } while ((v = atomic64_cmpxchg(&j->reservations.counter, -+ old.v, new.v)) != old.v); -+ -+ bch2_journal_space_available(j); -+ -+ closure_wake_up(&w->wait); -+ journal_wake(j); -+ -+ if (!journal_state_count(new, new.unwritten_idx) && -+ journal_last_unwritten_seq(j) <= journal_cur_seq(j)) { -+ spin_unlock(&j->lock); -+ closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL); -+ } else if (journal_last_unwritten_seq(j) == journal_cur_seq(j) && -+ new.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL) { -+ struct journal_buf *buf = journal_cur_buf(j); -+ long delta = buf->expires - jiffies; -+ -+ /* -+ * We don't close a journal entry to write it while there's -+ * previous entries still in flight - the current journal entry -+ * might want to be written now: -+ */ -+ -+ spin_unlock(&j->lock); -+ mod_delayed_work(c->io_complete_wq, &j->write_work, max(0L, delta)); -+ } else { -+ spin_unlock(&j->lock); -+ } -+} -+ -+static void journal_write_endio(struct bio *bio) -+{ -+ struct bch_dev *ca = bio->bi_private; -+ struct journal *j = &ca->fs->journal; -+ struct journal_buf *w = journal_last_unwritten_buf(j); -+ unsigned long flags; -+ -+ if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, -+ "error writing journal entry %llu: %s", -+ le64_to_cpu(w->data->seq), -+ bch2_blk_status_to_str(bio->bi_status)) || -+ bch2_meta_write_fault("journal")) { -+ spin_lock_irqsave(&j->err_lock, flags); -+ bch2_dev_list_drop_dev(&w->devs_written, ca->dev_idx); -+ spin_unlock_irqrestore(&j->err_lock, flags); -+ } -+ -+ closure_put(&j->io); -+ percpu_ref_put(&ca->io_ref); -+} -+ -+static void do_journal_write(struct closure *cl) -+{ -+ struct journal *j = container_of(cl, struct journal, io); -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct bch_dev *ca; -+ struct journal_buf *w = journal_last_unwritten_buf(j); -+ struct bch_extent_ptr *ptr; -+ struct bio *bio; -+ unsigned sectors = vstruct_sectors(w->data, c->block_bits); -+ -+ extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) { -+ ca = bch_dev_bkey_exists(c, ptr->dev); -+ if (!percpu_ref_tryget(&ca->io_ref)) { -+ /* XXX: fix this */ -+ bch_err(c, "missing device for journal write\n"); -+ continue; -+ } -+ -+ this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal], -+ sectors); -+ -+ bio = ca->journal.bio; -+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META); -+ bio->bi_iter.bi_sector = ptr->offset; -+ bio->bi_end_io = journal_write_endio; -+ bio->bi_private = ca; -+ -+ BUG_ON(bio->bi_iter.bi_sector == ca->prev_journal_sector); -+ ca->prev_journal_sector = bio->bi_iter.bi_sector; -+ -+ if (!JSET_NO_FLUSH(w->data)) -+ bio->bi_opf |= REQ_FUA; -+ if (!JSET_NO_FLUSH(w->data) && !w->separate_flush) -+ bio->bi_opf |= REQ_PREFLUSH; -+ -+ bch2_bio_map(bio, w->data, sectors << 9); -+ -+ trace_and_count(c, journal_write, bio); -+ closure_bio_submit(bio, cl); -+ -+ ca->journal.bucket_seq[ca->journal.cur_idx] = -+ le64_to_cpu(w->data->seq); -+ } -+ -+ continue_at(cl, journal_write_done, c->io_complete_wq); -+} -+ -+static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct jset_entry *start, *end, *i, *next, *prev = NULL; -+ struct jset *jset = w->data; -+ unsigned sectors, bytes, u64s; -+ bool validate_before_checksum = false; -+ unsigned long btree_roots_have = 0; -+ int ret; -+ -+ /* -+ * Simple compaction, dropping empty jset_entries (from journal -+ * reservations that weren't fully used) and merging jset_entries that -+ * can be. -+ * -+ * If we wanted to be really fancy here, we could sort all the keys in -+ * the jset and drop keys that were overwritten - probably not worth it: -+ */ -+ vstruct_for_each_safe(jset, i, next) { -+ unsigned u64s = le16_to_cpu(i->u64s); -+ -+ /* Empty entry: */ -+ if (!u64s) -+ continue; -+ -+ /* -+ * New btree roots are set by journalling them; when the journal -+ * entry gets written we have to propagate them to -+ * c->btree_roots -+ * -+ * But, every journal entry we write has to contain all the -+ * btree roots (at least for now); so after we copy btree roots -+ * to c->btree_roots we have to get any missing btree roots and -+ * add them to this journal entry: -+ */ -+ if (i->type == BCH_JSET_ENTRY_btree_root) { -+ bch2_journal_entry_to_btree_root(c, i); -+ __set_bit(i->btree_id, &btree_roots_have); -+ } -+ -+ /* Can we merge with previous entry? */ -+ if (prev && -+ i->btree_id == prev->btree_id && -+ i->level == prev->level && -+ i->type == prev->type && -+ i->type == BCH_JSET_ENTRY_btree_keys && -+ le16_to_cpu(prev->u64s) + u64s <= U16_MAX) { -+ memmove_u64s_down(vstruct_next(prev), -+ i->_data, -+ u64s); -+ le16_add_cpu(&prev->u64s, u64s); -+ continue; -+ } -+ -+ /* Couldn't merge, move i into new position (after prev): */ -+ prev = prev ? vstruct_next(prev) : jset->start; -+ if (i != prev) -+ memmove_u64s_down(prev, i, jset_u64s(u64s)); -+ } -+ -+ prev = prev ? vstruct_next(prev) : jset->start; -+ jset->u64s = cpu_to_le32((u64 *) prev - jset->_data); -+ -+ start = end = vstruct_last(jset); -+ -+ end = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have); -+ -+ bch2_journal_super_entries_add_common(c, &end, -+ le64_to_cpu(jset->seq)); -+ u64s = (u64 *) end - (u64 *) start; -+ BUG_ON(u64s > j->entry_u64s_reserved); -+ -+ le32_add_cpu(&jset->u64s, u64s); -+ -+ sectors = vstruct_sectors(jset, c->block_bits); -+ bytes = vstruct_bytes(jset); -+ -+ if (sectors > w->sectors) { -+ bch2_fs_fatal_error(c, "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)", -+ vstruct_bytes(jset), w->sectors << 9, -+ u64s, w->u64s_reserved, j->entry_u64s_reserved); -+ return -EINVAL; -+ } -+ -+ jset->magic = cpu_to_le64(jset_magic(c)); -+ jset->version = cpu_to_le32(c->sb.version); -+ -+ SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN); -+ SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c)); -+ -+ if (!JSET_NO_FLUSH(jset) && journal_entry_empty(jset)) -+ j->last_empty_seq = le64_to_cpu(jset->seq); -+ -+ if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset))) -+ validate_before_checksum = true; -+ -+ if (le32_to_cpu(jset->version) < bcachefs_metadata_version_current) -+ validate_before_checksum = true; -+ -+ if (validate_before_checksum && -+ (ret = jset_validate(c, NULL, jset, 0, WRITE))) -+ return ret; -+ -+ ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), -+ jset->encrypted_start, -+ vstruct_end(jset) - (void *) jset->encrypted_start); -+ if (bch2_fs_fatal_err_on(ret, c, -+ "error decrypting journal entry: %i", ret)) -+ return ret; -+ -+ jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), -+ journal_nonce(jset), jset); -+ -+ if (!validate_before_checksum && -+ (ret = jset_validate(c, NULL, jset, 0, WRITE))) -+ return ret; -+ -+ memset((void *) jset + bytes, 0, (sectors << 9) - bytes); -+ return 0; -+} -+ -+static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *w) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ int error = bch2_journal_error(j); -+ -+ /* -+ * If the journal is in an error state - we did an emergency shutdown - -+ * we prefer to continue doing journal writes. We just mark them as -+ * noflush so they'll never be used, but they'll still be visible by the -+ * list_journal tool - this helps in debugging. -+ * -+ * There's a caveat: the first journal write after marking the -+ * superblock dirty must always be a flush write, because on startup -+ * from a clean shutdown we didn't necessarily read the journal and the -+ * new journal write might overwrite whatever was in the journal -+ * previously - we can't leave the journal without any flush writes in -+ * it. -+ * -+ * So if we're in an error state, and we're still starting up, we don't -+ * write anything at all. -+ */ -+ if (error && test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags)) -+ return -EIO; -+ -+ if (error || -+ w->noflush || -+ (!w->must_flush && -+ (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) && -+ test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) { -+ w->noflush = true; -+ SET_JSET_NO_FLUSH(w->data, true); -+ w->data->last_seq = 0; -+ w->last_seq = 0; -+ -+ j->nr_noflush_writes++; -+ } else { -+ j->last_flush_write = jiffies; -+ j->nr_flush_writes++; -+ clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags); -+ } -+ -+ return 0; -+} -+ -+void bch2_journal_write(struct closure *cl) -+{ -+ struct journal *j = container_of(cl, struct journal, io); -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct bch_dev *ca; -+ struct journal_buf *w = journal_last_unwritten_buf(j); -+ struct bch_replicas_padded replicas; -+ struct bio *bio; -+ struct printbuf journal_debug_buf = PRINTBUF; -+ unsigned i, nr_rw_members = 0; -+ int ret; -+ -+ BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); -+ -+ j->write_start_time = local_clock(); -+ -+ spin_lock(&j->lock); -+ ret = bch2_journal_write_pick_flush(j, w); -+ spin_unlock(&j->lock); -+ if (ret) -+ goto err; -+ -+ journal_buf_realloc(j, w); -+ -+ ret = bch2_journal_write_prep(j, w); -+ if (ret) -+ goto err; -+ -+ while (1) { -+ spin_lock(&j->lock); -+ ret = journal_write_alloc(j, w); -+ if (!ret || !j->can_discard) -+ break; -+ -+ spin_unlock(&j->lock); -+ bch2_journal_do_discards(j); -+ } -+ -+ if (ret) { -+ __bch2_journal_debug_to_text(&journal_debug_buf, j); -+ spin_unlock(&j->lock); -+ bch_err(c, "Unable to allocate journal write:\n%s", -+ journal_debug_buf.buf); -+ printbuf_exit(&journal_debug_buf); -+ goto err; -+ } -+ -+ /* -+ * write is allocated, no longer need to account for it in -+ * bch2_journal_space_available(): -+ */ -+ w->sectors = 0; -+ -+ /* -+ * journal entry has been compacted and allocated, recalculate space -+ * available: -+ */ -+ bch2_journal_space_available(j); -+ spin_unlock(&j->lock); -+ -+ w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key)); -+ -+ if (c->opts.nochanges) -+ goto no_io; -+ -+ for_each_rw_member(ca, c, i) -+ nr_rw_members++; -+ -+ if (nr_rw_members > 1) -+ w->separate_flush = true; -+ -+ /* -+ * Mark journal replicas before we submit the write to guarantee -+ * recovery will find the journal entries after a crash. -+ */ -+ bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, -+ w->devs_written); -+ ret = bch2_mark_replicas(c, &replicas.e); -+ if (ret) -+ goto err; -+ -+ if (!JSET_NO_FLUSH(w->data) && w->separate_flush) { -+ for_each_rw_member(ca, c, i) { -+ percpu_ref_get(&ca->io_ref); -+ -+ bio = ca->journal.bio; -+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_FLUSH); -+ bio->bi_end_io = journal_write_endio; -+ bio->bi_private = ca; -+ closure_bio_submit(bio, cl); -+ } -+ } -+ -+ continue_at(cl, do_journal_write, c->io_complete_wq); -+ return; -+no_io: -+ continue_at(cl, journal_write_done, c->io_complete_wq); -+ return; -+err: -+ bch2_fatal_error(c); -+ continue_at(cl, journal_write_done, c->io_complete_wq); -+} -diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h -new file mode 100644 -index 000000000000..a88d097b13f1 ---- /dev/null -+++ b/fs/bcachefs/journal_io.h -@@ -0,0 +1,65 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_JOURNAL_IO_H -+#define _BCACHEFS_JOURNAL_IO_H -+ -+/* -+ * Only used for holding the journal entries we read in btree_journal_read() -+ * during cache_registration -+ */ -+struct journal_replay { -+ struct journal_ptr { -+ bool csum_good; -+ u8 dev; -+ u32 bucket; -+ u32 bucket_offset; -+ u64 sector; -+ } ptrs[BCH_REPLICAS_MAX]; -+ unsigned nr_ptrs; -+ -+ bool csum_good; -+ bool ignore; -+ /* must be last: */ -+ struct jset j; -+}; -+ -+static inline struct jset_entry *__jset_entry_type_next(struct jset *jset, -+ struct jset_entry *entry, unsigned type) -+{ -+ while (entry < vstruct_last(jset)) { -+ if (entry->type == type) -+ return entry; -+ -+ entry = vstruct_next(entry); -+ } -+ -+ return NULL; -+} -+ -+#define for_each_jset_entry_type(entry, jset, type) \ -+ for (entry = (jset)->start; \ -+ (entry = __jset_entry_type_next(jset, entry, type)); \ -+ entry = vstruct_next(entry)) -+ -+#define jset_entry_for_each_key(_e, _k) \ -+ for (_k = (_e)->start; \ -+ _k < vstruct_last(_e); \ -+ _k = bkey_next(_k)) -+ -+#define for_each_jset_key(k, entry, jset) \ -+ for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys)\ -+ jset_entry_for_each_key(entry, k) -+ -+int bch2_journal_entry_validate(struct bch_fs *, struct jset *, -+ struct jset_entry *, unsigned, int, -+ enum bkey_invalid_flags); -+void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *, -+ struct jset_entry *); -+ -+void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *, -+ struct journal_replay *); -+ -+int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *); -+ -+void bch2_journal_write(struct closure *); -+ -+#endif /* _BCACHEFS_JOURNAL_IO_H */ -diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c -new file mode 100644 -index 000000000000..9a584aaaa2eb ---- /dev/null -+++ b/fs/bcachefs/journal_reclaim.c -@@ -0,0 +1,876 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "btree_key_cache.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "errcode.h" -+#include "error.h" -+#include "journal.h" -+#include "journal_io.h" -+#include "journal_reclaim.h" -+#include "replicas.h" -+#include "sb-members.h" -+#include "trace.h" -+ -+#include -+#include -+ -+/* Free space calculations: */ -+ -+static unsigned journal_space_from(struct journal_device *ja, -+ enum journal_space_from from) -+{ -+ switch (from) { -+ case journal_space_discarded: -+ return ja->discard_idx; -+ case journal_space_clean_ondisk: -+ return ja->dirty_idx_ondisk; -+ case journal_space_clean: -+ return ja->dirty_idx; -+ default: -+ BUG(); -+ } -+} -+ -+unsigned bch2_journal_dev_buckets_available(struct journal *j, -+ struct journal_device *ja, -+ enum journal_space_from from) -+{ -+ unsigned available = (journal_space_from(ja, from) - -+ ja->cur_idx - 1 + ja->nr) % ja->nr; -+ -+ /* -+ * Don't use the last bucket unless writing the new last_seq -+ * will make another bucket available: -+ */ -+ if (available && ja->dirty_idx_ondisk == ja->dirty_idx) -+ --available; -+ -+ return available; -+} -+ -+static void journal_set_remaining(struct journal *j, unsigned u64s_remaining) -+{ -+ union journal_preres_state old, new; -+ u64 v = atomic64_read(&j->prereserved.counter); -+ -+ do { -+ old.v = new.v = v; -+ new.remaining = u64s_remaining; -+ } while ((v = atomic64_cmpxchg(&j->prereserved.counter, -+ old.v, new.v)) != old.v); -+} -+ -+static struct journal_space -+journal_dev_space_available(struct journal *j, struct bch_dev *ca, -+ enum journal_space_from from) -+{ -+ struct journal_device *ja = &ca->journal; -+ unsigned sectors, buckets, unwritten; -+ u64 seq; -+ -+ if (from == journal_space_total) -+ return (struct journal_space) { -+ .next_entry = ca->mi.bucket_size, -+ .total = ca->mi.bucket_size * ja->nr, -+ }; -+ -+ buckets = bch2_journal_dev_buckets_available(j, ja, from); -+ sectors = ja->sectors_free; -+ -+ /* -+ * We that we don't allocate the space for a journal entry -+ * until we write it out - thus, account for it here: -+ */ -+ for (seq = journal_last_unwritten_seq(j); -+ seq <= journal_cur_seq(j); -+ seq++) { -+ unwritten = j->buf[seq & JOURNAL_BUF_MASK].sectors; -+ -+ if (!unwritten) -+ continue; -+ -+ /* entry won't fit on this device, skip: */ -+ if (unwritten > ca->mi.bucket_size) -+ continue; -+ -+ if (unwritten >= sectors) { -+ if (!buckets) { -+ sectors = 0; -+ break; -+ } -+ -+ buckets--; -+ sectors = ca->mi.bucket_size; -+ } -+ -+ sectors -= unwritten; -+ } -+ -+ if (sectors < ca->mi.bucket_size && buckets) { -+ buckets--; -+ sectors = ca->mi.bucket_size; -+ } -+ -+ return (struct journal_space) { -+ .next_entry = sectors, -+ .total = sectors + buckets * ca->mi.bucket_size, -+ }; -+} -+ -+static struct journal_space __journal_space_available(struct journal *j, unsigned nr_devs_want, -+ enum journal_space_from from) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct bch_dev *ca; -+ unsigned i, pos, nr_devs = 0; -+ struct journal_space space, dev_space[BCH_SB_MEMBERS_MAX]; -+ -+ BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space)); -+ -+ rcu_read_lock(); -+ for_each_member_device_rcu(ca, c, i, -+ &c->rw_devs[BCH_DATA_journal]) { -+ if (!ca->journal.nr) -+ continue; -+ -+ space = journal_dev_space_available(j, ca, from); -+ if (!space.next_entry) -+ continue; -+ -+ for (pos = 0; pos < nr_devs; pos++) -+ if (space.total > dev_space[pos].total) -+ break; -+ -+ array_insert_item(dev_space, nr_devs, pos, space); -+ } -+ rcu_read_unlock(); -+ -+ if (nr_devs < nr_devs_want) -+ return (struct journal_space) { 0, 0 }; -+ -+ /* -+ * We sorted largest to smallest, and we want the smallest out of the -+ * @nr_devs_want largest devices: -+ */ -+ return dev_space[nr_devs_want - 1]; -+} -+ -+void bch2_journal_space_available(struct journal *j) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct bch_dev *ca; -+ unsigned clean, clean_ondisk, total; -+ s64 u64s_remaining = 0; -+ unsigned max_entry_size = min(j->buf[0].buf_size >> 9, -+ j->buf[1].buf_size >> 9); -+ unsigned i, nr_online = 0, nr_devs_want; -+ bool can_discard = false; -+ int ret = 0; -+ -+ lockdep_assert_held(&j->lock); -+ -+ rcu_read_lock(); -+ for_each_member_device_rcu(ca, c, i, -+ &c->rw_devs[BCH_DATA_journal]) { -+ struct journal_device *ja = &ca->journal; -+ -+ if (!ja->nr) -+ continue; -+ -+ while (ja->dirty_idx != ja->cur_idx && -+ ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j)) -+ ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr; -+ -+ while (ja->dirty_idx_ondisk != ja->dirty_idx && -+ ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk) -+ ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr; -+ -+ if (ja->discard_idx != ja->dirty_idx_ondisk) -+ can_discard = true; -+ -+ max_entry_size = min_t(unsigned, max_entry_size, ca->mi.bucket_size); -+ nr_online++; -+ } -+ rcu_read_unlock(); -+ -+ j->can_discard = can_discard; -+ -+ if (nr_online < c->opts.metadata_replicas_required) { -+ ret = JOURNAL_ERR_insufficient_devices; -+ goto out; -+ } -+ -+ nr_devs_want = min_t(unsigned, nr_online, c->opts.metadata_replicas); -+ -+ for (i = 0; i < journal_space_nr; i++) -+ j->space[i] = __journal_space_available(j, nr_devs_want, i); -+ -+ clean_ondisk = j->space[journal_space_clean_ondisk].total; -+ clean = j->space[journal_space_clean].total; -+ total = j->space[journal_space_total].total; -+ -+ if (!j->space[journal_space_discarded].next_entry) -+ ret = JOURNAL_ERR_journal_full; -+ -+ if ((j->space[journal_space_clean_ondisk].next_entry < -+ j->space[journal_space_clean_ondisk].total) && -+ (clean - clean_ondisk <= total / 8) && -+ (clean_ondisk * 2 > clean)) -+ set_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags); -+ else -+ clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags); -+ -+ u64s_remaining = (u64) clean << 6; -+ u64s_remaining -= (u64) total << 3; -+ u64s_remaining = max(0LL, u64s_remaining); -+ u64s_remaining /= 4; -+ u64s_remaining = min_t(u64, u64s_remaining, U32_MAX); -+out: -+ j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0; -+ j->cur_entry_error = ret; -+ journal_set_remaining(j, u64s_remaining); -+ journal_set_watermark(j); -+ -+ if (!ret) -+ journal_wake(j); -+} -+ -+/* Discards - last part of journal reclaim: */ -+ -+static bool should_discard_bucket(struct journal *j, struct journal_device *ja) -+{ -+ bool ret; -+ -+ spin_lock(&j->lock); -+ ret = ja->discard_idx != ja->dirty_idx_ondisk; -+ spin_unlock(&j->lock); -+ -+ return ret; -+} -+ -+/* -+ * Advance ja->discard_idx as long as it points to buckets that are no longer -+ * dirty, issuing discards if necessary: -+ */ -+void bch2_journal_do_discards(struct journal *j) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct bch_dev *ca; -+ unsigned iter; -+ -+ mutex_lock(&j->discard_lock); -+ -+ for_each_rw_member(ca, c, iter) { -+ struct journal_device *ja = &ca->journal; -+ -+ while (should_discard_bucket(j, ja)) { -+ if (!c->opts.nochanges && -+ ca->mi.discard && -+ bdev_max_discard_sectors(ca->disk_sb.bdev)) -+ blkdev_issue_discard(ca->disk_sb.bdev, -+ bucket_to_sector(ca, -+ ja->buckets[ja->discard_idx]), -+ ca->mi.bucket_size, GFP_NOFS); -+ -+ spin_lock(&j->lock); -+ ja->discard_idx = (ja->discard_idx + 1) % ja->nr; -+ -+ bch2_journal_space_available(j); -+ spin_unlock(&j->lock); -+ } -+ } -+ -+ mutex_unlock(&j->discard_lock); -+} -+ -+/* -+ * Journal entry pinning - machinery for holding a reference on a given journal -+ * entry, holding it open to ensure it gets replayed during recovery: -+ */ -+ -+void bch2_journal_reclaim_fast(struct journal *j) -+{ -+ bool popped = false; -+ -+ lockdep_assert_held(&j->lock); -+ -+ /* -+ * Unpin journal entries whose reference counts reached zero, meaning -+ * all btree nodes got written out -+ */ -+ while (!fifo_empty(&j->pin) && -+ !atomic_read(&fifo_peek_front(&j->pin).count)) { -+ j->pin.front++; -+ popped = true; -+ } -+ -+ if (popped) -+ bch2_journal_space_available(j); -+} -+ -+bool __bch2_journal_pin_put(struct journal *j, u64 seq) -+{ -+ struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq); -+ -+ return atomic_dec_and_test(&pin_list->count); -+} -+ -+void bch2_journal_pin_put(struct journal *j, u64 seq) -+{ -+ if (__bch2_journal_pin_put(j, seq)) { -+ spin_lock(&j->lock); -+ bch2_journal_reclaim_fast(j); -+ spin_unlock(&j->lock); -+ } -+} -+ -+static inline bool __journal_pin_drop(struct journal *j, -+ struct journal_entry_pin *pin) -+{ -+ struct journal_entry_pin_list *pin_list; -+ -+ if (!journal_pin_active(pin)) -+ return false; -+ -+ if (j->flush_in_progress == pin) -+ j->flush_in_progress_dropped = true; -+ -+ pin_list = journal_seq_pin(j, pin->seq); -+ pin->seq = 0; -+ list_del_init(&pin->list); -+ -+ /* -+ * Unpinning a journal entry may make journal_next_bucket() succeed, if -+ * writing a new last_seq will now make another bucket available: -+ */ -+ return atomic_dec_and_test(&pin_list->count) && -+ pin_list == &fifo_peek_front(&j->pin); -+} -+ -+void bch2_journal_pin_drop(struct journal *j, -+ struct journal_entry_pin *pin) -+{ -+ spin_lock(&j->lock); -+ if (__journal_pin_drop(j, pin)) -+ bch2_journal_reclaim_fast(j); -+ spin_unlock(&j->lock); -+} -+ -+static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn) -+{ -+ if (fn == bch2_btree_node_flush0 || -+ fn == bch2_btree_node_flush1) -+ return JOURNAL_PIN_btree; -+ else if (fn == bch2_btree_key_cache_journal_flush) -+ return JOURNAL_PIN_key_cache; -+ else -+ return JOURNAL_PIN_other; -+} -+ -+void bch2_journal_pin_set(struct journal *j, u64 seq, -+ struct journal_entry_pin *pin, -+ journal_pin_flush_fn flush_fn) -+{ -+ struct journal_entry_pin_list *pin_list; -+ bool reclaim; -+ -+ spin_lock(&j->lock); -+ -+ if (seq < journal_last_seq(j)) { -+ /* -+ * bch2_journal_pin_copy() raced with bch2_journal_pin_drop() on -+ * the src pin - with the pin dropped, the entry to pin might no -+ * longer to exist, but that means there's no longer anything to -+ * copy and we can bail out here: -+ */ -+ spin_unlock(&j->lock); -+ return; -+ } -+ -+ pin_list = journal_seq_pin(j, seq); -+ -+ reclaim = __journal_pin_drop(j, pin); -+ -+ atomic_inc(&pin_list->count); -+ pin->seq = seq; -+ pin->flush = flush_fn; -+ -+ if (flush_fn) -+ list_add(&pin->list, &pin_list->list[journal_pin_type(flush_fn)]); -+ else -+ list_add(&pin->list, &pin_list->flushed); -+ -+ if (reclaim) -+ bch2_journal_reclaim_fast(j); -+ spin_unlock(&j->lock); -+ -+ /* -+ * If the journal is currently full, we might want to call flush_fn -+ * immediately: -+ */ -+ journal_wake(j); -+} -+ -+/** -+ * bch2_journal_pin_flush: ensure journal pin callback is no longer running -+ * @j: journal object -+ * @pin: pin to flush -+ */ -+void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin) -+{ -+ BUG_ON(journal_pin_active(pin)); -+ -+ wait_event(j->pin_flush_wait, j->flush_in_progress != pin); -+} -+ -+/* -+ * Journal reclaim: flush references to open journal entries to reclaim space in -+ * the journal -+ * -+ * May be done by the journal code in the background as needed to free up space -+ * for more journal entries, or as part of doing a clean shutdown, or to migrate -+ * data off of a specific device: -+ */ -+ -+static struct journal_entry_pin * -+journal_get_next_pin(struct journal *j, -+ u64 seq_to_flush, -+ unsigned allowed_below_seq, -+ unsigned allowed_above_seq, -+ u64 *seq) -+{ -+ struct journal_entry_pin_list *pin_list; -+ struct journal_entry_pin *ret = NULL; -+ unsigned i; -+ -+ fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) { -+ if (*seq > seq_to_flush && !allowed_above_seq) -+ break; -+ -+ for (i = 0; i < JOURNAL_PIN_NR; i++) -+ if ((((1U << i) & allowed_below_seq) && *seq <= seq_to_flush) || -+ ((1U << i) & allowed_above_seq)) { -+ ret = list_first_entry_or_null(&pin_list->list[i], -+ struct journal_entry_pin, list); -+ if (ret) -+ return ret; -+ } -+ } -+ -+ return NULL; -+} -+ -+/* returns true if we did work */ -+static size_t journal_flush_pins(struct journal *j, -+ u64 seq_to_flush, -+ unsigned allowed_below_seq, -+ unsigned allowed_above_seq, -+ unsigned min_any, -+ unsigned min_key_cache) -+{ -+ struct journal_entry_pin *pin; -+ size_t nr_flushed = 0; -+ journal_pin_flush_fn flush_fn; -+ u64 seq; -+ int err; -+ -+ lockdep_assert_held(&j->reclaim_lock); -+ -+ while (1) { -+ unsigned allowed_above = allowed_above_seq; -+ unsigned allowed_below = allowed_below_seq; -+ -+ if (min_any) { -+ allowed_above |= ~0; -+ allowed_below |= ~0; -+ } -+ -+ if (min_key_cache) { -+ allowed_above |= 1U << JOURNAL_PIN_key_cache; -+ allowed_below |= 1U << JOURNAL_PIN_key_cache; -+ } -+ -+ cond_resched(); -+ -+ j->last_flushed = jiffies; -+ -+ spin_lock(&j->lock); -+ pin = journal_get_next_pin(j, seq_to_flush, allowed_below, allowed_above, &seq); -+ if (pin) { -+ BUG_ON(j->flush_in_progress); -+ j->flush_in_progress = pin; -+ j->flush_in_progress_dropped = false; -+ flush_fn = pin->flush; -+ } -+ spin_unlock(&j->lock); -+ -+ if (!pin) -+ break; -+ -+ if (min_key_cache && pin->flush == bch2_btree_key_cache_journal_flush) -+ min_key_cache--; -+ -+ if (min_any) -+ min_any--; -+ -+ err = flush_fn(j, pin, seq); -+ -+ spin_lock(&j->lock); -+ /* Pin might have been dropped or rearmed: */ -+ if (likely(!err && !j->flush_in_progress_dropped)) -+ list_move(&pin->list, &journal_seq_pin(j, seq)->flushed); -+ j->flush_in_progress = NULL; -+ j->flush_in_progress_dropped = false; -+ spin_unlock(&j->lock); -+ -+ wake_up(&j->pin_flush_wait); -+ -+ if (err) -+ break; -+ -+ nr_flushed++; -+ } -+ -+ return nr_flushed; -+} -+ -+static u64 journal_seq_to_flush(struct journal *j) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct bch_dev *ca; -+ u64 seq_to_flush = 0; -+ unsigned iter; -+ -+ spin_lock(&j->lock); -+ -+ for_each_rw_member(ca, c, iter) { -+ struct journal_device *ja = &ca->journal; -+ unsigned nr_buckets, bucket_to_flush; -+ -+ if (!ja->nr) -+ continue; -+ -+ /* Try to keep the journal at most half full: */ -+ nr_buckets = ja->nr / 2; -+ -+ /* And include pre-reservations: */ -+ nr_buckets += DIV_ROUND_UP(j->prereserved.reserved, -+ (ca->mi.bucket_size << 6) - -+ journal_entry_overhead(j)); -+ -+ nr_buckets = min(nr_buckets, ja->nr); -+ -+ bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr; -+ seq_to_flush = max(seq_to_flush, -+ ja->bucket_seq[bucket_to_flush]); -+ } -+ -+ /* Also flush if the pin fifo is more than half full */ -+ seq_to_flush = max_t(s64, seq_to_flush, -+ (s64) journal_cur_seq(j) - -+ (j->pin.size >> 1)); -+ spin_unlock(&j->lock); -+ -+ return seq_to_flush; -+} -+ -+/** -+ * __bch2_journal_reclaim - free up journal buckets -+ * @j: journal object -+ * @direct: direct or background reclaim? -+ * @kicked: requested to run since we last ran? -+ * Returns: 0 on success, or -EIO if the journal has been shutdown -+ * -+ * Background journal reclaim writes out btree nodes. It should be run -+ * early enough so that we never completely run out of journal buckets. -+ * -+ * High watermarks for triggering background reclaim: -+ * - FIFO has fewer than 512 entries left -+ * - fewer than 25% journal buckets free -+ * -+ * Background reclaim runs until low watermarks are reached: -+ * - FIFO has more than 1024 entries left -+ * - more than 50% journal buckets free -+ * -+ * As long as a reclaim can complete in the time it takes to fill up -+ * 512 journal entries or 25% of all journal buckets, then -+ * journal_next_bucket() should not stall. -+ */ -+static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ bool kthread = (current->flags & PF_KTHREAD) != 0; -+ u64 seq_to_flush; -+ size_t min_nr, min_key_cache, nr_flushed; -+ unsigned flags; -+ int ret = 0; -+ -+ /* -+ * We can't invoke memory reclaim while holding the reclaim_lock - -+ * journal reclaim is required to make progress for memory reclaim -+ * (cleaning the caches), so we can't get stuck in memory reclaim while -+ * we're holding the reclaim lock: -+ */ -+ lockdep_assert_held(&j->reclaim_lock); -+ flags = memalloc_noreclaim_save(); -+ -+ do { -+ if (kthread && kthread_should_stop()) -+ break; -+ -+ if (bch2_journal_error(j)) { -+ ret = -EIO; -+ break; -+ } -+ -+ bch2_journal_do_discards(j); -+ -+ seq_to_flush = journal_seq_to_flush(j); -+ min_nr = 0; -+ -+ /* -+ * If it's been longer than j->reclaim_delay_ms since we last flushed, -+ * make sure to flush at least one journal pin: -+ */ -+ if (time_after(jiffies, j->last_flushed + -+ msecs_to_jiffies(c->opts.journal_reclaim_delay))) -+ min_nr = 1; -+ -+ if (j->prereserved.reserved * 4 > j->prereserved.remaining) -+ min_nr = 1; -+ -+ if (fifo_free(&j->pin) <= 32) -+ min_nr = 1; -+ -+ if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used) -+ min_nr = 1; -+ -+ min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128); -+ -+ trace_and_count(c, journal_reclaim_start, c, -+ direct, kicked, -+ min_nr, min_key_cache, -+ j->prereserved.reserved, -+ j->prereserved.remaining, -+ atomic_read(&c->btree_cache.dirty), -+ c->btree_cache.used, -+ atomic_long_read(&c->btree_key_cache.nr_dirty), -+ atomic_long_read(&c->btree_key_cache.nr_keys)); -+ -+ nr_flushed = journal_flush_pins(j, seq_to_flush, -+ ~0, 0, -+ min_nr, min_key_cache); -+ -+ if (direct) -+ j->nr_direct_reclaim += nr_flushed; -+ else -+ j->nr_background_reclaim += nr_flushed; -+ trace_and_count(c, journal_reclaim_finish, c, nr_flushed); -+ -+ if (nr_flushed) -+ wake_up(&j->reclaim_wait); -+ } while ((min_nr || min_key_cache) && nr_flushed && !direct); -+ -+ memalloc_noreclaim_restore(flags); -+ -+ return ret; -+} -+ -+int bch2_journal_reclaim(struct journal *j) -+{ -+ return __bch2_journal_reclaim(j, true, true); -+} -+ -+static int bch2_journal_reclaim_thread(void *arg) -+{ -+ struct journal *j = arg; -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ unsigned long delay, now; -+ bool journal_empty; -+ int ret = 0; -+ -+ set_freezable(); -+ -+ j->last_flushed = jiffies; -+ -+ while (!ret && !kthread_should_stop()) { -+ bool kicked = j->reclaim_kicked; -+ -+ j->reclaim_kicked = false; -+ -+ mutex_lock(&j->reclaim_lock); -+ ret = __bch2_journal_reclaim(j, false, kicked); -+ mutex_unlock(&j->reclaim_lock); -+ -+ now = jiffies; -+ delay = msecs_to_jiffies(c->opts.journal_reclaim_delay); -+ j->next_reclaim = j->last_flushed + delay; -+ -+ if (!time_in_range(j->next_reclaim, now, now + delay)) -+ j->next_reclaim = now + delay; -+ -+ while (1) { -+ set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); -+ if (kthread_should_stop()) -+ break; -+ if (j->reclaim_kicked) -+ break; -+ -+ spin_lock(&j->lock); -+ journal_empty = fifo_empty(&j->pin); -+ spin_unlock(&j->lock); -+ -+ if (journal_empty) -+ schedule(); -+ else if (time_after(j->next_reclaim, jiffies)) -+ schedule_timeout(j->next_reclaim - jiffies); -+ else -+ break; -+ } -+ __set_current_state(TASK_RUNNING); -+ } -+ -+ return 0; -+} -+ -+void bch2_journal_reclaim_stop(struct journal *j) -+{ -+ struct task_struct *p = j->reclaim_thread; -+ -+ j->reclaim_thread = NULL; -+ -+ if (p) { -+ kthread_stop(p); -+ put_task_struct(p); -+ } -+} -+ -+int bch2_journal_reclaim_start(struct journal *j) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct task_struct *p; -+ int ret; -+ -+ if (j->reclaim_thread) -+ return 0; -+ -+ p = kthread_create(bch2_journal_reclaim_thread, j, -+ "bch-reclaim/%s", c->name); -+ ret = PTR_ERR_OR_ZERO(p); -+ if (ret) { -+ bch_err_msg(c, ret, "creating journal reclaim thread"); -+ return ret; -+ } -+ -+ get_task_struct(p); -+ j->reclaim_thread = p; -+ wake_up_process(p); -+ return 0; -+} -+ -+static int journal_flush_done(struct journal *j, u64 seq_to_flush, -+ bool *did_work) -+{ -+ int ret; -+ -+ ret = bch2_journal_error(j); -+ if (ret) -+ return ret; -+ -+ mutex_lock(&j->reclaim_lock); -+ -+ if (journal_flush_pins(j, seq_to_flush, -+ (1U << JOURNAL_PIN_key_cache)| -+ (1U << JOURNAL_PIN_other), 0, 0, 0) || -+ journal_flush_pins(j, seq_to_flush, -+ (1U << JOURNAL_PIN_btree), 0, 0, 0)) -+ *did_work = true; -+ -+ spin_lock(&j->lock); -+ /* -+ * If journal replay hasn't completed, the unreplayed journal entries -+ * hold refs on their corresponding sequence numbers -+ */ -+ ret = !test_bit(JOURNAL_REPLAY_DONE, &j->flags) || -+ journal_last_seq(j) > seq_to_flush || -+ !fifo_used(&j->pin); -+ -+ spin_unlock(&j->lock); -+ mutex_unlock(&j->reclaim_lock); -+ -+ return ret; -+} -+ -+bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush) -+{ -+ bool did_work = false; -+ -+ if (!test_bit(JOURNAL_STARTED, &j->flags)) -+ return false; -+ -+ closure_wait_event(&j->async_wait, -+ journal_flush_done(j, seq_to_flush, &did_work)); -+ -+ return did_work; -+} -+ -+int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) -+{ -+ struct bch_fs *c = container_of(j, struct bch_fs, journal); -+ struct journal_entry_pin_list *p; -+ u64 iter, seq = 0; -+ int ret = 0; -+ -+ spin_lock(&j->lock); -+ fifo_for_each_entry_ptr(p, &j->pin, iter) -+ if (dev_idx >= 0 -+ ? bch2_dev_list_has_dev(p->devs, dev_idx) -+ : p->devs.nr < c->opts.metadata_replicas) -+ seq = iter; -+ spin_unlock(&j->lock); -+ -+ bch2_journal_flush_pins(j, seq); -+ -+ ret = bch2_journal_error(j); -+ if (ret) -+ return ret; -+ -+ mutex_lock(&c->replicas_gc_lock); -+ bch2_replicas_gc_start(c, 1 << BCH_DATA_journal); -+ -+ /* -+ * Now that we've populated replicas_gc, write to the journal to mark -+ * active journal devices. This handles the case where the journal might -+ * be empty. Otherwise we could clear all journal replicas and -+ * temporarily put the fs into an unrecoverable state. Journal recovery -+ * expects to find devices marked for journal data on unclean mount. -+ */ -+ ret = bch2_journal_meta(&c->journal); -+ if (ret) -+ goto err; -+ -+ seq = 0; -+ spin_lock(&j->lock); -+ while (!ret) { -+ struct bch_replicas_padded replicas; -+ -+ seq = max(seq, journal_last_seq(j)); -+ if (seq >= j->pin.back) -+ break; -+ bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, -+ journal_seq_pin(j, seq)->devs); -+ seq++; -+ -+ spin_unlock(&j->lock); -+ ret = bch2_mark_replicas(c, &replicas.e); -+ spin_lock(&j->lock); -+ } -+ spin_unlock(&j->lock); -+err: -+ ret = bch2_replicas_gc_end(c, ret); -+ mutex_unlock(&c->replicas_gc_lock); -+ -+ return ret; -+} -diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h -new file mode 100644 -index 000000000000..494d1a6eddb0 ---- /dev/null -+++ b/fs/bcachefs/journal_reclaim.h -@@ -0,0 +1,87 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_JOURNAL_RECLAIM_H -+#define _BCACHEFS_JOURNAL_RECLAIM_H -+ -+#define JOURNAL_PIN (32 * 1024) -+ -+static inline void journal_reclaim_kick(struct journal *j) -+{ -+ struct task_struct *p = READ_ONCE(j->reclaim_thread); -+ -+ j->reclaim_kicked = true; -+ if (p) -+ wake_up_process(p); -+} -+ -+unsigned bch2_journal_dev_buckets_available(struct journal *, -+ struct journal_device *, -+ enum journal_space_from); -+void bch2_journal_space_available(struct journal *); -+ -+static inline bool journal_pin_active(struct journal_entry_pin *pin) -+{ -+ return pin->seq != 0; -+} -+ -+static inline struct journal_entry_pin_list * -+journal_seq_pin(struct journal *j, u64 seq) -+{ -+ EBUG_ON(seq < j->pin.front || seq >= j->pin.back); -+ -+ return &j->pin.data[seq & j->pin.mask]; -+} -+ -+void bch2_journal_reclaim_fast(struct journal *); -+bool __bch2_journal_pin_put(struct journal *, u64); -+void bch2_journal_pin_put(struct journal *, u64); -+void bch2_journal_pin_drop(struct journal *, struct journal_entry_pin *); -+ -+void bch2_journal_pin_set(struct journal *, u64, struct journal_entry_pin *, -+ journal_pin_flush_fn); -+ -+static inline void bch2_journal_pin_add(struct journal *j, u64 seq, -+ struct journal_entry_pin *pin, -+ journal_pin_flush_fn flush_fn) -+{ -+ if (unlikely(!journal_pin_active(pin) || pin->seq > seq)) -+ bch2_journal_pin_set(j, seq, pin, flush_fn); -+} -+ -+static inline void bch2_journal_pin_copy(struct journal *j, -+ struct journal_entry_pin *dst, -+ struct journal_entry_pin *src, -+ journal_pin_flush_fn flush_fn) -+{ -+ /* Guard against racing with journal_pin_drop(src): */ -+ u64 seq = READ_ONCE(src->seq); -+ -+ if (seq) -+ bch2_journal_pin_add(j, seq, dst, flush_fn); -+} -+ -+static inline void bch2_journal_pin_update(struct journal *j, u64 seq, -+ struct journal_entry_pin *pin, -+ journal_pin_flush_fn flush_fn) -+{ -+ if (unlikely(!journal_pin_active(pin) || pin->seq < seq)) -+ bch2_journal_pin_set(j, seq, pin, flush_fn); -+} -+ -+void bch2_journal_pin_flush(struct journal *, struct journal_entry_pin *); -+ -+void bch2_journal_do_discards(struct journal *); -+int bch2_journal_reclaim(struct journal *); -+ -+void bch2_journal_reclaim_stop(struct journal *); -+int bch2_journal_reclaim_start(struct journal *); -+ -+bool bch2_journal_flush_pins(struct journal *, u64); -+ -+static inline bool bch2_journal_flush_all_pins(struct journal *j) -+{ -+ return bch2_journal_flush_pins(j, U64_MAX); -+} -+ -+int bch2_journal_flush_device_pins(struct journal *, int); -+ -+#endif /* _BCACHEFS_JOURNAL_RECLAIM_H */ -diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c -new file mode 100644 -index 000000000000..ae4fb8c3a2bc ---- /dev/null -+++ b/fs/bcachefs/journal_sb.c -@@ -0,0 +1,219 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "journal_sb.h" -+#include "darray.h" -+ -+#include -+ -+/* BCH_SB_FIELD_journal: */ -+ -+static int u64_cmp(const void *_l, const void *_r) -+{ -+ const u64 *l = _l; -+ const u64 *r = _r; -+ -+ return cmp_int(*l, *r); -+} -+ -+static int bch2_sb_journal_validate(struct bch_sb *sb, -+ struct bch_sb_field *f, -+ struct printbuf *err) -+{ -+ struct bch_sb_field_journal *journal = field_to_type(f, journal); -+ struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx); -+ int ret = -BCH_ERR_invalid_sb_journal; -+ unsigned nr; -+ unsigned i; -+ u64 *b; -+ -+ nr = bch2_nr_journal_buckets(journal); -+ if (!nr) -+ return 0; -+ -+ b = kmalloc_array(nr, sizeof(u64), GFP_KERNEL); -+ if (!b) -+ return -BCH_ERR_ENOMEM_sb_journal_validate; -+ -+ for (i = 0; i < nr; i++) -+ b[i] = le64_to_cpu(journal->buckets[i]); -+ -+ sort(b, nr, sizeof(u64), u64_cmp, NULL); -+ -+ if (!b[0]) { -+ prt_printf(err, "journal bucket at sector 0"); -+ goto err; -+ } -+ -+ if (b[0] < le16_to_cpu(m.first_bucket)) { -+ prt_printf(err, "journal bucket %llu before first bucket %u", -+ b[0], le16_to_cpu(m.first_bucket)); -+ goto err; -+ } -+ -+ if (b[nr - 1] >= le64_to_cpu(m.nbuckets)) { -+ prt_printf(err, "journal bucket %llu past end of device (nbuckets %llu)", -+ b[nr - 1], le64_to_cpu(m.nbuckets)); -+ goto err; -+ } -+ -+ for (i = 0; i + 1 < nr; i++) -+ if (b[i] == b[i + 1]) { -+ prt_printf(err, "duplicate journal buckets %llu", b[i]); -+ goto err; -+ } -+ -+ ret = 0; -+err: -+ kfree(b); -+ return ret; -+} -+ -+static void bch2_sb_journal_to_text(struct printbuf *out, struct bch_sb *sb, -+ struct bch_sb_field *f) -+{ -+ struct bch_sb_field_journal *journal = field_to_type(f, journal); -+ unsigned i, nr = bch2_nr_journal_buckets(journal); -+ -+ prt_printf(out, "Buckets: "); -+ for (i = 0; i < nr; i++) -+ prt_printf(out, " %llu", le64_to_cpu(journal->buckets[i])); -+ prt_newline(out); -+} -+ -+const struct bch_sb_field_ops bch_sb_field_ops_journal = { -+ .validate = bch2_sb_journal_validate, -+ .to_text = bch2_sb_journal_to_text, -+}; -+ -+struct u64_range { -+ u64 start; -+ u64 end; -+}; -+ -+static int u64_range_cmp(const void *_l, const void *_r) -+{ -+ const struct u64_range *l = _l; -+ const struct u64_range *r = _r; -+ -+ return cmp_int(l->start, r->start); -+} -+ -+static int bch2_sb_journal_v2_validate(struct bch_sb *sb, -+ struct bch_sb_field *f, -+ struct printbuf *err) -+{ -+ struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2); -+ struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx); -+ int ret = -BCH_ERR_invalid_sb_journal; -+ unsigned nr; -+ unsigned i; -+ struct u64_range *b; -+ -+ nr = bch2_sb_field_journal_v2_nr_entries(journal); -+ if (!nr) -+ return 0; -+ -+ b = kmalloc_array(nr, sizeof(*b), GFP_KERNEL); -+ if (!b) -+ return -BCH_ERR_ENOMEM_sb_journal_v2_validate; -+ -+ for (i = 0; i < nr; i++) { -+ b[i].start = le64_to_cpu(journal->d[i].start); -+ b[i].end = b[i].start + le64_to_cpu(journal->d[i].nr); -+ } -+ -+ sort(b, nr, sizeof(*b), u64_range_cmp, NULL); -+ -+ if (!b[0].start) { -+ prt_printf(err, "journal bucket at sector 0"); -+ goto err; -+ } -+ -+ if (b[0].start < le16_to_cpu(m.first_bucket)) { -+ prt_printf(err, "journal bucket %llu before first bucket %u", -+ b[0].start, le16_to_cpu(m.first_bucket)); -+ goto err; -+ } -+ -+ if (b[nr - 1].end > le64_to_cpu(m.nbuckets)) { -+ prt_printf(err, "journal bucket %llu past end of device (nbuckets %llu)", -+ b[nr - 1].end - 1, le64_to_cpu(m.nbuckets)); -+ goto err; -+ } -+ -+ for (i = 0; i + 1 < nr; i++) { -+ if (b[i].end > b[i + 1].start) { -+ prt_printf(err, "duplicate journal buckets in ranges %llu-%llu, %llu-%llu", -+ b[i].start, b[i].end, b[i + 1].start, b[i + 1].end); -+ goto err; -+ } -+ } -+ -+ ret = 0; -+err: -+ kfree(b); -+ return ret; -+} -+ -+static void bch2_sb_journal_v2_to_text(struct printbuf *out, struct bch_sb *sb, -+ struct bch_sb_field *f) -+{ -+ struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2); -+ unsigned i, nr = bch2_sb_field_journal_v2_nr_entries(journal); -+ -+ prt_printf(out, "Buckets: "); -+ for (i = 0; i < nr; i++) -+ prt_printf(out, " %llu-%llu", -+ le64_to_cpu(journal->d[i].start), -+ le64_to_cpu(journal->d[i].start) + le64_to_cpu(journal->d[i].nr)); -+ prt_newline(out); -+} -+ -+const struct bch_sb_field_ops bch_sb_field_ops_journal_v2 = { -+ .validate = bch2_sb_journal_v2_validate, -+ .to_text = bch2_sb_journal_v2_to_text, -+}; -+ -+int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca, -+ u64 *buckets, unsigned nr) -+{ -+ struct bch_sb_field_journal_v2 *j; -+ unsigned i, dst = 0, nr_compacted = 1; -+ -+ if (c) -+ lockdep_assert_held(&c->sb_lock); -+ -+ if (!nr) { -+ bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal); -+ bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal_v2); -+ return 0; -+ } -+ -+ for (i = 0; i + 1 < nr; i++) -+ if (buckets[i] + 1 != buckets[i + 1]) -+ nr_compacted++; -+ -+ j = bch2_sb_field_resize(&ca->disk_sb, journal_v2, -+ (sizeof(*j) + sizeof(j->d[0]) * nr_compacted) / sizeof(u64)); -+ if (!j) -+ return -BCH_ERR_ENOSPC_sb_journal; -+ -+ bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal); -+ -+ j->d[dst].start = cpu_to_le64(buckets[0]); -+ j->d[dst].nr = cpu_to_le64(1); -+ -+ for (i = 1; i < nr; i++) { -+ if (buckets[i] == buckets[i - 1] + 1) { -+ le64_add_cpu(&j->d[dst].nr, 1); -+ } else { -+ dst++; -+ j->d[dst].start = cpu_to_le64(buckets[i]); -+ j->d[dst].nr = cpu_to_le64(1); -+ } -+ } -+ -+ BUG_ON(dst + 1 != nr_compacted); -+ return 0; -+} -diff --git a/fs/bcachefs/journal_sb.h b/fs/bcachefs/journal_sb.h -new file mode 100644 -index 000000000000..ba40a7e8d90a ---- /dev/null -+++ b/fs/bcachefs/journal_sb.h -@@ -0,0 +1,24 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+ -+#include "super-io.h" -+#include "vstructs.h" -+ -+static inline unsigned bch2_nr_journal_buckets(struct bch_sb_field_journal *j) -+{ -+ return j -+ ? (__le64 *) vstruct_end(&j->field) - j->buckets -+ : 0; -+} -+ -+static inline unsigned bch2_sb_field_journal_v2_nr_entries(struct bch_sb_field_journal_v2 *j) -+{ -+ if (!j) -+ return 0; -+ -+ return (struct bch_sb_field_journal_v2_entry *) vstruct_end(&j->field) - &j->d[0]; -+} -+ -+extern const struct bch_sb_field_ops bch_sb_field_ops_journal; -+extern const struct bch_sb_field_ops bch_sb_field_ops_journal_v2; -+ -+int bch2_journal_buckets_to_sb(struct bch_fs *, struct bch_dev *, u64 *, unsigned); -diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c -new file mode 100644 -index 000000000000..f9d9aa95bf3a ---- /dev/null -+++ b/fs/bcachefs/journal_seq_blacklist.c -@@ -0,0 +1,320 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "btree_iter.h" -+#include "eytzinger.h" -+#include "journal_seq_blacklist.h" -+#include "super-io.h" -+ -+/* -+ * journal_seq_blacklist machinery: -+ * -+ * To guarantee order of btree updates after a crash, we need to detect when a -+ * btree node entry (bset) is newer than the newest journal entry that was -+ * successfully written, and ignore it - effectively ignoring any btree updates -+ * that didn't make it into the journal. -+ * -+ * If we didn't do this, we might have two btree nodes, a and b, both with -+ * updates that weren't written to the journal yet: if b was updated after a, -+ * but b was flushed and not a - oops; on recovery we'll find that the updates -+ * to b happened, but not the updates to a that happened before it. -+ * -+ * Ignoring bsets that are newer than the newest journal entry is always safe, -+ * because everything they contain will also have been journalled - and must -+ * still be present in the journal on disk until a journal entry has been -+ * written _after_ that bset was written. -+ * -+ * To accomplish this, bsets record the newest journal sequence number they -+ * contain updates for; then, on startup, the btree code queries the journal -+ * code to ask "Is this sequence number newer than the newest journal entry? If -+ * so, ignore it." -+ * -+ * When this happens, we must blacklist that journal sequence number: the -+ * journal must not write any entries with that sequence number, and it must -+ * record that it was blacklisted so that a) on recovery we don't think we have -+ * missing journal entries and b) so that the btree code continues to ignore -+ * that bset, until that btree node is rewritten. -+ */ -+ -+static unsigned sb_blacklist_u64s(unsigned nr) -+{ -+ struct bch_sb_field_journal_seq_blacklist *bl; -+ -+ return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64); -+} -+ -+static struct bch_sb_field_journal_seq_blacklist * -+blacklist_entry_try_merge(struct bch_fs *c, -+ struct bch_sb_field_journal_seq_blacklist *bl, -+ unsigned i) -+{ -+ unsigned nr = blacklist_nr_entries(bl); -+ -+ if (le64_to_cpu(bl->start[i].end) >= -+ le64_to_cpu(bl->start[i + 1].start)) { -+ bl->start[i].end = bl->start[i + 1].end; -+ --nr; -+ memmove(&bl->start[i], -+ &bl->start[i + 1], -+ sizeof(bl->start[0]) * (nr - i)); -+ -+ bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist, -+ sb_blacklist_u64s(nr)); -+ BUG_ON(!bl); -+ } -+ -+ return bl; -+} -+ -+static bool bl_entry_contig_or_overlaps(struct journal_seq_blacklist_entry *e, -+ u64 start, u64 end) -+{ -+ return !(end < le64_to_cpu(e->start) || le64_to_cpu(e->end) < start); -+} -+ -+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) -+{ -+ struct bch_sb_field_journal_seq_blacklist *bl; -+ unsigned i, nr; -+ int ret = 0; -+ -+ mutex_lock(&c->sb_lock); -+ bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist); -+ nr = blacklist_nr_entries(bl); -+ -+ for (i = 0; i < nr; i++) { -+ struct journal_seq_blacklist_entry *e = -+ bl->start + i; -+ -+ if (bl_entry_contig_or_overlaps(e, start, end)) { -+ e->start = cpu_to_le64(min(start, le64_to_cpu(e->start))); -+ e->end = cpu_to_le64(max(end, le64_to_cpu(e->end))); -+ -+ if (i + 1 < nr) -+ bl = blacklist_entry_try_merge(c, -+ bl, i); -+ if (i) -+ bl = blacklist_entry_try_merge(c, -+ bl, i - 1); -+ goto out_write_sb; -+ } -+ } -+ -+ bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist, -+ sb_blacklist_u64s(nr + 1)); -+ if (!bl) { -+ ret = -BCH_ERR_ENOSPC_sb_journal_seq_blacklist; -+ goto out; -+ } -+ -+ bl->start[nr].start = cpu_to_le64(start); -+ bl->start[nr].end = cpu_to_le64(end); -+out_write_sb: -+ c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << BCH_FEATURE_journal_seq_blacklist_v3); -+ -+ ret = bch2_write_super(c); -+out: -+ mutex_unlock(&c->sb_lock); -+ -+ return ret ?: bch2_blacklist_table_initialize(c); -+} -+ -+static int journal_seq_blacklist_table_cmp(const void *_l, -+ const void *_r, size_t size) -+{ -+ const struct journal_seq_blacklist_table_entry *l = _l; -+ const struct journal_seq_blacklist_table_entry *r = _r; -+ -+ return cmp_int(l->start, r->start); -+} -+ -+bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq, -+ bool dirty) -+{ -+ struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table; -+ struct journal_seq_blacklist_table_entry search = { .start = seq }; -+ int idx; -+ -+ if (!t) -+ return false; -+ -+ idx = eytzinger0_find_le(t->entries, t->nr, -+ sizeof(t->entries[0]), -+ journal_seq_blacklist_table_cmp, -+ &search); -+ if (idx < 0) -+ return false; -+ -+ BUG_ON(t->entries[idx].start > seq); -+ -+ if (seq >= t->entries[idx].end) -+ return false; -+ -+ if (dirty) -+ t->entries[idx].dirty = true; -+ return true; -+} -+ -+int bch2_blacklist_table_initialize(struct bch_fs *c) -+{ -+ struct bch_sb_field_journal_seq_blacklist *bl = -+ bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist); -+ struct journal_seq_blacklist_table *t; -+ unsigned i, nr = blacklist_nr_entries(bl); -+ -+ if (!bl) -+ return 0; -+ -+ t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr, -+ GFP_KERNEL); -+ if (!t) -+ return -BCH_ERR_ENOMEM_blacklist_table_init; -+ -+ t->nr = nr; -+ -+ for (i = 0; i < nr; i++) { -+ t->entries[i].start = le64_to_cpu(bl->start[i].start); -+ t->entries[i].end = le64_to_cpu(bl->start[i].end); -+ } -+ -+ eytzinger0_sort(t->entries, -+ t->nr, -+ sizeof(t->entries[0]), -+ journal_seq_blacklist_table_cmp, -+ NULL); -+ -+ kfree(c->journal_seq_blacklist_table); -+ c->journal_seq_blacklist_table = t; -+ return 0; -+} -+ -+static int bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb, -+ struct bch_sb_field *f, -+ struct printbuf *err) -+{ -+ struct bch_sb_field_journal_seq_blacklist *bl = -+ field_to_type(f, journal_seq_blacklist); -+ unsigned i, nr = blacklist_nr_entries(bl); -+ -+ for (i = 0; i < nr; i++) { -+ struct journal_seq_blacklist_entry *e = bl->start + i; -+ -+ if (le64_to_cpu(e->start) >= -+ le64_to_cpu(e->end)) { -+ prt_printf(err, "entry %u start >= end (%llu >= %llu)", -+ i, le64_to_cpu(e->start), le64_to_cpu(e->end)); -+ return -BCH_ERR_invalid_sb_journal_seq_blacklist; -+ } -+ -+ if (i + 1 < nr && -+ le64_to_cpu(e[0].end) > -+ le64_to_cpu(e[1].start)) { -+ prt_printf(err, "entry %u out of order with next entry (%llu > %llu)", -+ i + 1, le64_to_cpu(e[0].end), le64_to_cpu(e[1].start)); -+ return -BCH_ERR_invalid_sb_journal_seq_blacklist; -+ } -+ } -+ -+ return 0; -+} -+ -+static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out, -+ struct bch_sb *sb, -+ struct bch_sb_field *f) -+{ -+ struct bch_sb_field_journal_seq_blacklist *bl = -+ field_to_type(f, journal_seq_blacklist); -+ struct journal_seq_blacklist_entry *i; -+ unsigned nr = blacklist_nr_entries(bl); -+ -+ for (i = bl->start; i < bl->start + nr; i++) { -+ if (i != bl->start) -+ prt_printf(out, " "); -+ -+ prt_printf(out, "%llu-%llu", -+ le64_to_cpu(i->start), -+ le64_to_cpu(i->end)); -+ } -+ prt_newline(out); -+} -+ -+const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = { -+ .validate = bch2_sb_journal_seq_blacklist_validate, -+ .to_text = bch2_sb_journal_seq_blacklist_to_text -+}; -+ -+void bch2_blacklist_entries_gc(struct work_struct *work) -+{ -+ struct bch_fs *c = container_of(work, struct bch_fs, -+ journal_seq_blacklist_gc_work); -+ struct journal_seq_blacklist_table *t; -+ struct bch_sb_field_journal_seq_blacklist *bl; -+ struct journal_seq_blacklist_entry *src, *dst; -+ struct btree_trans *trans = bch2_trans_get(c); -+ unsigned i, nr, new_nr; -+ int ret; -+ -+ for (i = 0; i < BTREE_ID_NR; i++) { -+ struct btree_iter iter; -+ struct btree *b; -+ -+ bch2_trans_node_iter_init(trans, &iter, i, POS_MIN, -+ 0, 0, BTREE_ITER_PREFETCH); -+retry: -+ bch2_trans_begin(trans); -+ -+ b = bch2_btree_iter_peek_node(&iter); -+ -+ while (!(ret = PTR_ERR_OR_ZERO(b)) && -+ b && -+ !test_bit(BCH_FS_STOPPING, &c->flags)) -+ b = bch2_btree_iter_next_node(&iter); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ bch2_trans_iter_exit(trans, &iter); -+ } -+ -+ bch2_trans_put(trans); -+ if (ret) -+ return; -+ -+ mutex_lock(&c->sb_lock); -+ bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist); -+ if (!bl) -+ goto out; -+ -+ nr = blacklist_nr_entries(bl); -+ dst = bl->start; -+ -+ t = c->journal_seq_blacklist_table; -+ BUG_ON(nr != t->nr); -+ -+ for (src = bl->start, i = eytzinger0_first(t->nr); -+ src < bl->start + nr; -+ src++, i = eytzinger0_next(i, nr)) { -+ BUG_ON(t->entries[i].start != le64_to_cpu(src->start)); -+ BUG_ON(t->entries[i].end != le64_to_cpu(src->end)); -+ -+ if (t->entries[i].dirty) -+ *dst++ = *src; -+ } -+ -+ new_nr = dst - bl->start; -+ -+ bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr); -+ -+ if (new_nr != nr) { -+ bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist, -+ new_nr ? sb_blacklist_u64s(new_nr) : 0); -+ BUG_ON(new_nr && !bl); -+ -+ if (!new_nr) -+ c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_journal_seq_blacklist_v3)); -+ -+ bch2_write_super(c); -+ } -+out: -+ mutex_unlock(&c->sb_lock); -+} -diff --git a/fs/bcachefs/journal_seq_blacklist.h b/fs/bcachefs/journal_seq_blacklist.h -new file mode 100644 -index 000000000000..afb886ec8e25 ---- /dev/null -+++ b/fs/bcachefs/journal_seq_blacklist.h -@@ -0,0 +1,22 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H -+#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H -+ -+static inline unsigned -+blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl) -+{ -+ return bl -+ ? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) / -+ sizeof(struct journal_seq_blacklist_entry)) -+ : 0; -+} -+ -+bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool); -+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64); -+int bch2_blacklist_table_initialize(struct bch_fs *); -+ -+extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist; -+ -+void bch2_blacklist_entries_gc(struct work_struct *); -+ -+#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */ -diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h -new file mode 100644 -index 000000000000..42504e16acb6 ---- /dev/null -+++ b/fs/bcachefs/journal_types.h -@@ -0,0 +1,345 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_JOURNAL_TYPES_H -+#define _BCACHEFS_JOURNAL_TYPES_H -+ -+#include -+#include -+ -+#include "alloc_types.h" -+#include "super_types.h" -+#include "fifo.h" -+ -+#define JOURNAL_BUF_BITS 2 -+#define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS) -+#define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1) -+ -+/* -+ * We put JOURNAL_BUF_NR of these in struct journal; we used them for writes to -+ * the journal that are being staged or in flight. -+ */ -+struct journal_buf { -+ struct jset *data; -+ -+ __BKEY_PADDED(key, BCH_REPLICAS_MAX); -+ struct bch_devs_list devs_written; -+ -+ struct closure_waitlist wait; -+ u64 last_seq; /* copy of data->last_seq */ -+ long expires; -+ u64 flush_time; -+ -+ unsigned buf_size; /* size in bytes of @data */ -+ unsigned sectors; /* maximum size for current entry */ -+ unsigned disk_sectors; /* maximum size entry could have been, if -+ buf_size was bigger */ -+ unsigned u64s_reserved; -+ bool noflush; /* write has already been kicked off, and was noflush */ -+ bool must_flush; /* something wants a flush */ -+ bool separate_flush; -+}; -+ -+/* -+ * Something that makes a journal entry dirty - i.e. a btree node that has to be -+ * flushed: -+ */ -+ -+enum journal_pin_type { -+ JOURNAL_PIN_btree, -+ JOURNAL_PIN_key_cache, -+ JOURNAL_PIN_other, -+ JOURNAL_PIN_NR, -+}; -+ -+struct journal_entry_pin_list { -+ struct list_head list[JOURNAL_PIN_NR]; -+ struct list_head flushed; -+ atomic_t count; -+ struct bch_devs_list devs; -+}; -+ -+struct journal; -+struct journal_entry_pin; -+typedef int (*journal_pin_flush_fn)(struct journal *j, -+ struct journal_entry_pin *, u64); -+ -+struct journal_entry_pin { -+ struct list_head list; -+ journal_pin_flush_fn flush; -+ u64 seq; -+}; -+ -+struct journal_res { -+ bool ref; -+ u8 idx; -+ u16 u64s; -+ u32 offset; -+ u64 seq; -+}; -+ -+/* -+ * For reserving space in the journal prior to getting a reservation on a -+ * particular journal entry: -+ */ -+struct journal_preres { -+ unsigned u64s; -+}; -+ -+union journal_res_state { -+ struct { -+ atomic64_t counter; -+ }; -+ -+ struct { -+ u64 v; -+ }; -+ -+ struct { -+ u64 cur_entry_offset:20, -+ idx:2, -+ unwritten_idx:2, -+ buf0_count:10, -+ buf1_count:10, -+ buf2_count:10, -+ buf3_count:10; -+ }; -+}; -+ -+union journal_preres_state { -+ struct { -+ atomic64_t counter; -+ }; -+ -+ struct { -+ u64 v; -+ }; -+ -+ struct { -+ u64 waiting:1, -+ reserved:31, -+ remaining:32; -+ }; -+}; -+ -+/* bytes: */ -+#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */ -+#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */ -+ -+/* -+ * We stash some journal state as sentinal values in cur_entry_offset: -+ * note - cur_entry_offset is in units of u64s -+ */ -+#define JOURNAL_ENTRY_OFFSET_MAX ((1U << 20) - 1) -+ -+#define JOURNAL_ENTRY_CLOSED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 1) -+#define JOURNAL_ENTRY_ERROR_VAL (JOURNAL_ENTRY_OFFSET_MAX) -+ -+struct journal_space { -+ /* Units of 512 bytes sectors: */ -+ unsigned next_entry; /* How big the next journal entry can be */ -+ unsigned total; -+}; -+ -+enum journal_space_from { -+ journal_space_discarded, -+ journal_space_clean_ondisk, -+ journal_space_clean, -+ journal_space_total, -+ journal_space_nr, -+}; -+ -+enum journal_flags { -+ JOURNAL_REPLAY_DONE, -+ JOURNAL_STARTED, -+ JOURNAL_MAY_SKIP_FLUSH, -+ JOURNAL_NEED_FLUSH_WRITE, -+}; -+ -+/* Reasons we may fail to get a journal reservation: */ -+#define JOURNAL_ERRORS() \ -+ x(ok) \ -+ x(blocked) \ -+ x(max_in_flight) \ -+ x(journal_full) \ -+ x(journal_pin_full) \ -+ x(journal_stuck) \ -+ x(insufficient_devices) -+ -+enum journal_errors { -+#define x(n) JOURNAL_ERR_##n, -+ JOURNAL_ERRORS() -+#undef x -+}; -+ -+typedef DARRAY(u64) darray_u64; -+ -+/* Embedded in struct bch_fs */ -+struct journal { -+ /* Fastpath stuff up front: */ -+ struct { -+ -+ union journal_res_state reservations; -+ enum bch_watermark watermark; -+ -+ union journal_preres_state prereserved; -+ -+ } __aligned(SMP_CACHE_BYTES); -+ -+ unsigned long flags; -+ -+ /* Max size of current journal entry */ -+ unsigned cur_entry_u64s; -+ unsigned cur_entry_sectors; -+ -+ /* Reserved space in journal entry to be used just prior to write */ -+ unsigned entry_u64s_reserved; -+ -+ -+ /* -+ * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if -+ * insufficient devices: -+ */ -+ enum journal_errors cur_entry_error; -+ -+ unsigned buf_size_want; -+ /* -+ * We may queue up some things to be journalled (log messages) before -+ * the journal has actually started - stash them here: -+ */ -+ darray_u64 early_journal_entries; -+ -+ /* -+ * Two journal entries -- one is currently open for new entries, the -+ * other is possibly being written out. -+ */ -+ struct journal_buf buf[JOURNAL_BUF_NR]; -+ -+ spinlock_t lock; -+ -+ /* if nonzero, we may not open a new journal entry: */ -+ unsigned blocked; -+ -+ /* Used when waiting because the journal was full */ -+ wait_queue_head_t wait; -+ struct closure_waitlist async_wait; -+ struct closure_waitlist preres_wait; -+ -+ struct closure io; -+ struct delayed_work write_work; -+ -+ /* Sequence number of most recent journal entry (last entry in @pin) */ -+ atomic64_t seq; -+ -+ /* seq, last_seq from the most recent journal entry successfully written */ -+ u64 seq_ondisk; -+ u64 flushed_seq_ondisk; -+ u64 last_seq_ondisk; -+ u64 err_seq; -+ u64 last_empty_seq; -+ -+ /* -+ * FIFO of journal entries whose btree updates have not yet been -+ * written out. -+ * -+ * Each entry is a reference count. The position in the FIFO is the -+ * entry's sequence number relative to @seq. -+ * -+ * The journal entry itself holds a reference count, put when the -+ * journal entry is written out. Each btree node modified by the journal -+ * entry also holds a reference count, put when the btree node is -+ * written. -+ * -+ * When a reference count reaches zero, the journal entry is no longer -+ * needed. When all journal entries in the oldest journal bucket are no -+ * longer needed, the bucket can be discarded and reused. -+ */ -+ struct { -+ u64 front, back, size, mask; -+ struct journal_entry_pin_list *data; -+ } pin; -+ -+ struct journal_space space[journal_space_nr]; -+ -+ u64 replay_journal_seq; -+ u64 replay_journal_seq_end; -+ -+ struct write_point wp; -+ spinlock_t err_lock; -+ -+ struct mutex reclaim_lock; -+ /* -+ * Used for waiting until journal reclaim has freed up space in the -+ * journal: -+ */ -+ wait_queue_head_t reclaim_wait; -+ struct task_struct *reclaim_thread; -+ bool reclaim_kicked; -+ unsigned long next_reclaim; -+ u64 nr_direct_reclaim; -+ u64 nr_background_reclaim; -+ -+ unsigned long last_flushed; -+ struct journal_entry_pin *flush_in_progress; -+ bool flush_in_progress_dropped; -+ wait_queue_head_t pin_flush_wait; -+ -+ /* protects advancing ja->discard_idx: */ -+ struct mutex discard_lock; -+ bool can_discard; -+ -+ unsigned long last_flush_write; -+ -+ u64 res_get_blocked_start; -+ u64 write_start_time; -+ -+ u64 nr_flush_writes; -+ u64 nr_noflush_writes; -+ -+ struct bch2_time_stats *flush_write_time; -+ struct bch2_time_stats *noflush_write_time; -+ struct bch2_time_stats *blocked_time; -+ struct bch2_time_stats *flush_seq_time; -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map res_map; -+#endif -+} __aligned(SMP_CACHE_BYTES); -+ -+/* -+ * Embedded in struct bch_dev. First three fields refer to the array of journal -+ * buckets, in bch_sb. -+ */ -+struct journal_device { -+ /* -+ * For each journal bucket, contains the max sequence number of the -+ * journal writes it contains - so we know when a bucket can be reused. -+ */ -+ u64 *bucket_seq; -+ -+ unsigned sectors_free; -+ -+ /* -+ * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx: -+ */ -+ unsigned discard_idx; /* Next bucket to discard */ -+ unsigned dirty_idx_ondisk; -+ unsigned dirty_idx; -+ unsigned cur_idx; /* Journal bucket we're currently writing to */ -+ unsigned nr; -+ -+ u64 *buckets; -+ -+ /* Bio for journal reads/writes to this device */ -+ struct bio *bio; -+ -+ /* for bch_journal_read_device */ -+ struct closure read; -+}; -+ -+/* -+ * journal_entry_res - reserve space in every journal entry: -+ */ -+struct journal_entry_res { -+ unsigned u64s; -+}; -+ -+#endif /* _BCACHEFS_JOURNAL_TYPES_H */ -diff --git a/fs/bcachefs/keylist.c b/fs/bcachefs/keylist.c -new file mode 100644 -index 000000000000..5699cd4873c8 ---- /dev/null -+++ b/fs/bcachefs/keylist.c -@@ -0,0 +1,52 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "bkey.h" -+#include "keylist.h" -+ -+int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s, -+ size_t nr_inline_u64s, size_t new_u64s) -+{ -+ size_t oldsize = bch2_keylist_u64s(l); -+ size_t newsize = oldsize + new_u64s; -+ u64 *old_buf = l->keys_p == inline_u64s ? NULL : l->keys_p; -+ u64 *new_keys; -+ -+ newsize = roundup_pow_of_two(newsize); -+ -+ if (newsize <= nr_inline_u64s || -+ (old_buf && roundup_pow_of_two(oldsize) == newsize)) -+ return 0; -+ -+ new_keys = krealloc(old_buf, sizeof(u64) * newsize, GFP_NOFS); -+ if (!new_keys) -+ return -ENOMEM; -+ -+ if (!old_buf) -+ memcpy_u64s(new_keys, inline_u64s, oldsize); -+ -+ l->keys_p = new_keys; -+ l->top_p = new_keys + oldsize; -+ -+ return 0; -+} -+ -+void bch2_keylist_pop_front(struct keylist *l) -+{ -+ l->top_p -= bch2_keylist_front(l)->k.u64s; -+ -+ memmove_u64s_down(l->keys, -+ bkey_next(l->keys), -+ bch2_keylist_u64s(l)); -+} -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+void bch2_verify_keylist_sorted(struct keylist *l) -+{ -+ struct bkey_i *k; -+ -+ for_each_keylist_key(l, k) -+ BUG_ON(bkey_next(k) != l->top && -+ bpos_ge(k->k.p, bkey_next(k)->k.p)); -+} -+#endif -diff --git a/fs/bcachefs/keylist.h b/fs/bcachefs/keylist.h -new file mode 100644 -index 000000000000..fe759c7031e0 ---- /dev/null -+++ b/fs/bcachefs/keylist.h -@@ -0,0 +1,74 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_KEYLIST_H -+#define _BCACHEFS_KEYLIST_H -+ -+#include "keylist_types.h" -+ -+int bch2_keylist_realloc(struct keylist *, u64 *, size_t, size_t); -+void bch2_keylist_pop_front(struct keylist *); -+ -+static inline void bch2_keylist_init(struct keylist *l, u64 *inline_keys) -+{ -+ l->top_p = l->keys_p = inline_keys; -+} -+ -+static inline void bch2_keylist_free(struct keylist *l, u64 *inline_keys) -+{ -+ if (l->keys_p != inline_keys) -+ kfree(l->keys_p); -+} -+ -+static inline void bch2_keylist_push(struct keylist *l) -+{ -+ l->top = bkey_next(l->top); -+} -+ -+static inline void bch2_keylist_add(struct keylist *l, const struct bkey_i *k) -+{ -+ bkey_copy(l->top, k); -+ bch2_keylist_push(l); -+} -+ -+static inline bool bch2_keylist_empty(struct keylist *l) -+{ -+ return l->top == l->keys; -+} -+ -+static inline size_t bch2_keylist_u64s(struct keylist *l) -+{ -+ return l->top_p - l->keys_p; -+} -+ -+static inline size_t bch2_keylist_bytes(struct keylist *l) -+{ -+ return bch2_keylist_u64s(l) * sizeof(u64); -+} -+ -+static inline struct bkey_i *bch2_keylist_front(struct keylist *l) -+{ -+ return l->keys; -+} -+ -+#define for_each_keylist_key(_keylist, _k) \ -+ for (_k = (_keylist)->keys; \ -+ _k != (_keylist)->top; \ -+ _k = bkey_next(_k)) -+ -+static inline u64 keylist_sectors(struct keylist *keys) -+{ -+ struct bkey_i *k; -+ u64 ret = 0; -+ -+ for_each_keylist_key(keys, k) -+ ret += k->k.size; -+ -+ return ret; -+} -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+void bch2_verify_keylist_sorted(struct keylist *); -+#else -+static inline void bch2_verify_keylist_sorted(struct keylist *l) {} -+#endif -+ -+#endif /* _BCACHEFS_KEYLIST_H */ -diff --git a/fs/bcachefs/keylist_types.h b/fs/bcachefs/keylist_types.h -new file mode 100644 -index 000000000000..4b3ff7d8a875 ---- /dev/null -+++ b/fs/bcachefs/keylist_types.h -@@ -0,0 +1,16 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_KEYLIST_TYPES_H -+#define _BCACHEFS_KEYLIST_TYPES_H -+ -+struct keylist { -+ union { -+ struct bkey_i *keys; -+ u64 *keys_p; -+ }; -+ union { -+ struct bkey_i *top; -+ u64 *top_p; -+ }; -+}; -+ -+#endif /* _BCACHEFS_KEYLIST_TYPES_H */ -diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c -new file mode 100644 -index 000000000000..8640f7dee0de ---- /dev/null -+++ b/fs/bcachefs/logged_ops.c -@@ -0,0 +1,112 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "bkey_buf.h" -+#include "btree_update.h" -+#include "error.h" -+#include "io_misc.h" -+#include "logged_ops.h" -+#include "super.h" -+ -+struct bch_logged_op_fn { -+ u8 type; -+ int (*resume)(struct btree_trans *, struct bkey_i *); -+}; -+ -+static const struct bch_logged_op_fn logged_op_fns[] = { -+#define x(n) { \ -+ .type = KEY_TYPE_logged_op_##n, \ -+ .resume = bch2_resume_logged_op_##n, \ -+}, -+ BCH_LOGGED_OPS() -+#undef x -+}; -+ -+static const struct bch_logged_op_fn *logged_op_fn(enum bch_bkey_type type) -+{ -+ for (unsigned i = 0; i < ARRAY_SIZE(logged_op_fns); i++) -+ if (logged_op_fns[i].type == type) -+ return logged_op_fns + i; -+ return NULL; -+} -+ -+static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type); -+ struct bkey_buf sk; -+ u32 restart_count = trans->restart_count; -+ int ret; -+ -+ if (!fn) -+ return 0; -+ -+ bch2_bkey_buf_init(&sk); -+ bch2_bkey_buf_reassemble(&sk, c, k); -+ -+ ret = drop_locks_do(trans, (bch2_fs_lazy_rw(c), 0)) ?: -+ fn->resume(trans, sk.k) ?: trans_was_restarted(trans, restart_count); -+ -+ bch2_bkey_buf_exit(&sk, c); -+ return ret; -+} -+ -+int bch2_resume_logged_ops(struct bch_fs *c) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ ret = bch2_trans_run(c, -+ for_each_btree_key2(trans, iter, -+ BTREE_ID_logged_ops, POS_MIN, BTREE_ITER_PREFETCH, k, -+ resume_logged_op(trans, &iter, k))); -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) -+{ -+ struct btree_iter iter; -+ int ret; -+ -+ ret = bch2_bkey_get_empty_slot(trans, &iter, BTREE_ID_logged_ops, POS_MAX); -+ if (ret) -+ return ret; -+ -+ k->k.p = iter.pos; -+ -+ ret = bch2_trans_update(trans, &iter, k, 0); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) -+{ -+ return commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, -+ __bch2_logged_op_start(trans, k)); -+} -+ -+void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) -+{ -+ int ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, -+ bch2_btree_delete(trans, BTREE_ID_logged_ops, k->k.p, 0)); -+ /* -+ * This needs to be a fatal error because we've left an unfinished -+ * operation in the logged ops btree. -+ * -+ * We should only ever see an error here if the filesystem has already -+ * been shut down, but make sure of that here: -+ */ -+ if (ret) { -+ struct bch_fs *c = trans->c; -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); -+ bch2_fs_fatal_error(c, "%s: error deleting logged operation %s: %s", -+ __func__, buf.buf, bch2_err_str(ret)); -+ printbuf_exit(&buf); -+ } -+} -diff --git a/fs/bcachefs/logged_ops.h b/fs/bcachefs/logged_ops.h -new file mode 100644 -index 000000000000..4d1e786a27a8 ---- /dev/null -+++ b/fs/bcachefs/logged_ops.h -@@ -0,0 +1,20 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_LOGGED_OPS_H -+#define _BCACHEFS_LOGGED_OPS_H -+ -+#include "bkey.h" -+ -+#define BCH_LOGGED_OPS() \ -+ x(truncate) \ -+ x(finsert) -+ -+static inline int bch2_logged_op_update(struct btree_trans *trans, struct bkey_i *op) -+{ -+ return bch2_btree_insert_nonextent(trans, BTREE_ID_logged_ops, op, 0); -+} -+ -+int bch2_resume_logged_ops(struct bch_fs *); -+int bch2_logged_op_start(struct btree_trans *, struct bkey_i *); -+void bch2_logged_op_finish(struct btree_trans *, struct bkey_i *); -+ -+#endif /* _BCACHEFS_LOGGED_OPS_H */ -diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c -new file mode 100644 -index 000000000000..a5cc0ed195d6 ---- /dev/null -+++ b/fs/bcachefs/lru.c -@@ -0,0 +1,164 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "alloc_background.h" -+#include "btree_iter.h" -+#include "btree_update.h" -+#include "btree_write_buffer.h" -+#include "error.h" -+#include "lru.h" -+#include "recovery.h" -+ -+/* KEY_TYPE_lru is obsolete: */ -+int bch2_lru_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(!lru_pos_time(k.k->p), c, err, -+ lru_entry_at_time_0, -+ "lru entry at time=0"); -+fsck_err: -+ return ret; -+} -+ -+void bch2_lru_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ const struct bch_lru *lru = bkey_s_c_to_lru(k).v; -+ -+ prt_printf(out, "idx %llu", le64_to_cpu(lru->idx)); -+} -+ -+void bch2_lru_pos_to_text(struct printbuf *out, struct bpos lru) -+{ -+ prt_printf(out, "%llu:%llu -> %llu:%llu", -+ lru_pos_id(lru), -+ lru_pos_time(lru), -+ u64_to_bucket(lru.offset).inode, -+ u64_to_bucket(lru.offset).offset); -+} -+ -+static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, -+ u64 dev_bucket, u64 time, bool set) -+{ -+ return time -+ ? bch2_btree_bit_mod(trans, BTREE_ID_lru, -+ lru_pos(lru_id, dev_bucket, time), set) -+ : 0; -+} -+ -+int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) -+{ -+ return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted); -+} -+ -+int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) -+{ -+ return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set); -+} -+ -+int bch2_lru_change(struct btree_trans *trans, -+ u16 lru_id, u64 dev_bucket, -+ u64 old_time, u64 new_time) -+{ -+ if (old_time == new_time) -+ return 0; -+ -+ return bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?: -+ bch2_lru_set(trans, lru_id, dev_bucket, new_time); -+} -+ -+static const char * const bch2_lru_types[] = { -+#define x(n) #n, -+ BCH_LRU_TYPES() -+#undef x -+ NULL -+}; -+ -+static int bch2_check_lru_key(struct btree_trans *trans, -+ struct btree_iter *lru_iter, -+ struct bkey_s_c lru_k, -+ struct bpos *last_flushed_pos) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a; -+ struct printbuf buf1 = PRINTBUF; -+ struct printbuf buf2 = PRINTBUF; -+ enum bch_lru_type type = lru_type(lru_k); -+ struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset); -+ u64 idx; -+ int ret; -+ -+ if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), c, -+ lru_entry_to_invalid_bucket, -+ "lru key points to nonexistent device:bucket %llu:%llu", -+ alloc_pos.inode, alloc_pos.offset)) -+ return bch2_btree_delete_at(trans, lru_iter, 0); -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, alloc_pos, 0); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ a = bch2_alloc_to_v4(k, &a_convert); -+ -+ switch (type) { -+ case BCH_LRU_read: -+ idx = alloc_lru_idx_read(*a); -+ break; -+ case BCH_LRU_fragmentation: -+ idx = a->fragmentation_lru; -+ break; -+ } -+ -+ if (lru_k.k->type != KEY_TYPE_set || -+ lru_pos_time(lru_k.k->p) != idx) { -+ if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) { -+ *last_flushed_pos = lru_k.k->p; -+ ret = bch2_btree_write_buffer_flush_sync(trans) ?: -+ -BCH_ERR_transaction_restart_write_buffer_flush; -+ goto out; -+ } -+ -+ if (c->opts.reconstruct_alloc || -+ fsck_err(c, lru_entry_bad, -+ "incorrect lru entry: lru %s time %llu\n" -+ " %s\n" -+ " for %s", -+ bch2_lru_types[type], -+ lru_pos_time(lru_k.k->p), -+ (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), -+ (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) -+ ret = bch2_btree_delete_at(trans, lru_iter, 0); -+ } -+out: -+err: -+fsck_err: -+ bch2_trans_iter_exit(trans, &iter); -+ printbuf_exit(&buf2); -+ printbuf_exit(&buf1); -+ return ret; -+} -+ -+int bch2_check_lrus(struct bch_fs *c) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bpos last_flushed_pos = POS_MIN; -+ int ret = 0; -+ -+ ret = bch2_trans_run(c, -+ for_each_btree_key_commit(trans, iter, -+ BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, -+ NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, -+ bch2_check_lru_key(trans, &iter, k, &last_flushed_pos))); -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+ -+} -diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h -new file mode 100644 -index 000000000000..429dca816df5 ---- /dev/null -+++ b/fs/bcachefs/lru.h -@@ -0,0 +1,69 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_LRU_H -+#define _BCACHEFS_LRU_H -+ -+#define LRU_TIME_BITS 48 -+#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1) -+ -+static inline u64 lru_pos_id(struct bpos pos) -+{ -+ return pos.inode >> LRU_TIME_BITS; -+} -+ -+static inline u64 lru_pos_time(struct bpos pos) -+{ -+ return pos.inode & ~(~0ULL << LRU_TIME_BITS); -+} -+ -+static inline struct bpos lru_pos(u16 lru_id, u64 dev_bucket, u64 time) -+{ -+ struct bpos pos = POS(((u64) lru_id << LRU_TIME_BITS)|time, dev_bucket); -+ -+ EBUG_ON(time > LRU_TIME_MAX); -+ EBUG_ON(lru_pos_id(pos) != lru_id); -+ EBUG_ON(lru_pos_time(pos) != time); -+ EBUG_ON(pos.offset != dev_bucket); -+ -+ return pos; -+} -+ -+#define BCH_LRU_TYPES() \ -+ x(read) \ -+ x(fragmentation) -+ -+enum bch_lru_type { -+#define x(n) BCH_LRU_##n, -+ BCH_LRU_TYPES() -+#undef x -+}; -+ -+#define BCH_LRU_FRAGMENTATION_START ((1U << 16) - 1) -+ -+static inline enum bch_lru_type lru_type(struct bkey_s_c l) -+{ -+ u16 lru_id = l.k->p.inode >> 48; -+ -+ if (lru_id == BCH_LRU_FRAGMENTATION_START) -+ return BCH_LRU_fragmentation; -+ return BCH_LRU_read; -+} -+ -+int bch2_lru_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+ -+void bch2_lru_pos_to_text(struct printbuf *, struct bpos); -+ -+#define bch2_bkey_ops_lru ((struct bkey_ops) { \ -+ .key_invalid = bch2_lru_invalid, \ -+ .val_to_text = bch2_lru_to_text, \ -+ .min_val_size = 8, \ -+}) -+ -+int bch2_lru_del(struct btree_trans *, u16, u64, u64); -+int bch2_lru_set(struct btree_trans *, u16, u64, u64); -+int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64); -+ -+int bch2_check_lrus(struct bch_fs *); -+ -+#endif /* _BCACHEFS_LRU_H */ -diff --git a/fs/bcachefs/mean_and_variance.c b/fs/bcachefs/mean_and_variance.c -new file mode 100644 -index 000000000000..1f0801e2e565 ---- /dev/null -+++ b/fs/bcachefs/mean_and_variance.c -@@ -0,0 +1,159 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Functions for incremental mean and variance. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * Copyright © 2022 Daniel B. Hill -+ * -+ * Author: Daniel B. Hill -+ * -+ * Description: -+ * -+ * This is includes some incremental algorithms for mean and variance calculation -+ * -+ * Derived from the paper: https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf -+ * -+ * Create a struct and if it's the weighted variant set the w field (weight = 2^k). -+ * -+ * Use mean_and_variance[_weighted]_update() on the struct to update it's state. -+ * -+ * Use the mean_and_variance[_weighted]_get_* functions to calculate the mean and variance, some computation -+ * is deferred to these functions for performance reasons. -+ * -+ * see lib/math/mean_and_variance_test.c for examples of usage. -+ * -+ * DO NOT access the mean and variance fields of the weighted variants directly. -+ * DO NOT change the weight after calling update. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "mean_and_variance.h" -+ -+u128_u u128_div(u128_u n, u64 d) -+{ -+ u128_u r; -+ u64 rem; -+ u64 hi = u128_hi(n); -+ u64 lo = u128_lo(n); -+ u64 h = hi & ((u64) U32_MAX << 32); -+ u64 l = (hi & (u64) U32_MAX) << 32; -+ -+ r = u128_shl(u64_to_u128(div64_u64_rem(h, d, &rem)), 64); -+ r = u128_add(r, u128_shl(u64_to_u128(div64_u64_rem(l + (rem << 32), d, &rem)), 32)); -+ r = u128_add(r, u64_to_u128(div64_u64_rem(lo + (rem << 32), d, &rem))); -+ return r; -+} -+EXPORT_SYMBOL_GPL(u128_div); -+ -+/** -+ * mean_and_variance_get_mean() - get mean from @s -+ */ -+s64 mean_and_variance_get_mean(struct mean_and_variance s) -+{ -+ return s.n ? div64_u64(s.sum, s.n) : 0; -+} -+EXPORT_SYMBOL_GPL(mean_and_variance_get_mean); -+ -+/** -+ * mean_and_variance_get_variance() - get variance from @s1 -+ * -+ * see linked pdf equation 12. -+ */ -+u64 mean_and_variance_get_variance(struct mean_and_variance s1) -+{ -+ if (s1.n) { -+ u128_u s2 = u128_div(s1.sum_squares, s1.n); -+ u64 s3 = abs(mean_and_variance_get_mean(s1)); -+ -+ return u128_lo(u128_sub(s2, u128_square(s3))); -+ } else { -+ return 0; -+ } -+} -+EXPORT_SYMBOL_GPL(mean_and_variance_get_variance); -+ -+/** -+ * mean_and_variance_get_stddev() - get standard deviation from @s -+ */ -+u32 mean_and_variance_get_stddev(struct mean_and_variance s) -+{ -+ return int_sqrt64(mean_and_variance_get_variance(s)); -+} -+EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev); -+ -+/** -+ * mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update() -+ * @s1: .. -+ * @s2: .. -+ * -+ * see linked pdf: function derived from equations 140-143 where alpha = 2^w. -+ * values are stored bitshifted for performance and added precision. -+ */ -+void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 x) -+{ -+ // previous weighted variance. -+ u8 w = s->weight; -+ u64 var_w0 = s->variance; -+ // new value weighted. -+ s64 x_w = x << w; -+ s64 diff_w = x_w - s->mean; -+ s64 diff = fast_divpow2(diff_w, w); -+ // new mean weighted. -+ s64 u_w1 = s->mean + diff; -+ -+ if (!s->init) { -+ s->mean = x_w; -+ s->variance = 0; -+ } else { -+ s->mean = u_w1; -+ s->variance = ((var_w0 << w) - var_w0 + ((diff_w * (x_w - u_w1)) >> w)) >> w; -+ } -+ s->init = true; -+} -+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update); -+ -+/** -+ * mean_and_variance_weighted_get_mean() - get mean from @s -+ */ -+s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s) -+{ -+ return fast_divpow2(s.mean, s.weight); -+} -+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean); -+ -+/** -+ * mean_and_variance_weighted_get_variance() -- get variance from @s -+ */ -+u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s) -+{ -+ // always positive don't need fast divpow2 -+ return s.variance >> s.weight; -+} -+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance); -+ -+/** -+ * mean_and_variance_weighted_get_stddev() - get standard deviation from @s -+ */ -+u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s) -+{ -+ return int_sqrt64(mean_and_variance_weighted_get_variance(s)); -+} -+EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_stddev); -+ -+MODULE_AUTHOR("Daniel B. Hill"); -+MODULE_LICENSE("GPL"); -diff --git a/fs/bcachefs/mean_and_variance.h b/fs/bcachefs/mean_and_variance.h -new file mode 100644 -index 000000000000..647505010b39 ---- /dev/null -+++ b/fs/bcachefs/mean_and_variance.h -@@ -0,0 +1,198 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef MEAN_AND_VARIANCE_H_ -+#define MEAN_AND_VARIANCE_H_ -+ -+#include -+#include -+#include -+#include -+ -+#define SQRT_U64_MAX 4294967295ULL -+ -+/* -+ * u128_u: u128 user mode, because not all architectures support a real int128 -+ * type -+ */ -+ -+#ifdef __SIZEOF_INT128__ -+ -+typedef struct { -+ unsigned __int128 v; -+} __aligned(16) u128_u; -+ -+static inline u128_u u64_to_u128(u64 a) -+{ -+ return (u128_u) { .v = a }; -+} -+ -+static inline u64 u128_lo(u128_u a) -+{ -+ return a.v; -+} -+ -+static inline u64 u128_hi(u128_u a) -+{ -+ return a.v >> 64; -+} -+ -+static inline u128_u u128_add(u128_u a, u128_u b) -+{ -+ a.v += b.v; -+ return a; -+} -+ -+static inline u128_u u128_sub(u128_u a, u128_u b) -+{ -+ a.v -= b.v; -+ return a; -+} -+ -+static inline u128_u u128_shl(u128_u a, s8 shift) -+{ -+ a.v <<= shift; -+ return a; -+} -+ -+static inline u128_u u128_square(u64 a) -+{ -+ u128_u b = u64_to_u128(a); -+ -+ b.v *= b.v; -+ return b; -+} -+ -+#else -+ -+typedef struct { -+ u64 hi, lo; -+} __aligned(16) u128_u; -+ -+/* conversions */ -+ -+static inline u128_u u64_to_u128(u64 a) -+{ -+ return (u128_u) { .lo = a }; -+} -+ -+static inline u64 u128_lo(u128_u a) -+{ -+ return a.lo; -+} -+ -+static inline u64 u128_hi(u128_u a) -+{ -+ return a.hi; -+} -+ -+/* arithmetic */ -+ -+static inline u128_u u128_add(u128_u a, u128_u b) -+{ -+ u128_u c; -+ -+ c.lo = a.lo + b.lo; -+ c.hi = a.hi + b.hi + (c.lo < a.lo); -+ return c; -+} -+ -+static inline u128_u u128_sub(u128_u a, u128_u b) -+{ -+ u128_u c; -+ -+ c.lo = a.lo - b.lo; -+ c.hi = a.hi - b.hi - (c.lo > a.lo); -+ return c; -+} -+ -+static inline u128_u u128_shl(u128_u i, s8 shift) -+{ -+ u128_u r; -+ -+ r.lo = i.lo << shift; -+ if (shift < 64) -+ r.hi = (i.hi << shift) | (i.lo >> (64 - shift)); -+ else { -+ r.hi = i.lo << (shift - 64); -+ r.lo = 0; -+ } -+ return r; -+} -+ -+static inline u128_u u128_square(u64 i) -+{ -+ u128_u r; -+ u64 h = i >> 32, l = i & U32_MAX; -+ -+ r = u128_shl(u64_to_u128(h*h), 64); -+ r = u128_add(r, u128_shl(u64_to_u128(h*l), 32)); -+ r = u128_add(r, u128_shl(u64_to_u128(l*h), 32)); -+ r = u128_add(r, u64_to_u128(l*l)); -+ return r; -+} -+ -+#endif -+ -+static inline u128_u u64s_to_u128(u64 hi, u64 lo) -+{ -+ u128_u c = u64_to_u128(hi); -+ -+ c = u128_shl(c, 64); -+ c = u128_add(c, u64_to_u128(lo)); -+ return c; -+} -+ -+u128_u u128_div(u128_u n, u64 d); -+ -+struct mean_and_variance { -+ s64 n; -+ s64 sum; -+ u128_u sum_squares; -+}; -+ -+/* expontentially weighted variant */ -+struct mean_and_variance_weighted { -+ bool init; -+ u8 weight; /* base 2 logarithim */ -+ s64 mean; -+ u64 variance; -+}; -+ -+/** -+ * fast_divpow2() - fast approximation for n / (1 << d) -+ * @n: numerator -+ * @d: the power of 2 denominator. -+ * -+ * note: this rounds towards 0. -+ */ -+static inline s64 fast_divpow2(s64 n, u8 d) -+{ -+ return (n + ((n < 0) ? ((1 << d) - 1) : 0)) >> d; -+} -+ -+/** -+ * mean_and_variance_update() - update a mean_and_variance struct @s1 with a new sample @v1 -+ * and return it. -+ * @s1: the mean_and_variance to update. -+ * @v1: the new sample. -+ * -+ * see linked pdf equation 12. -+ */ -+static inline void -+mean_and_variance_update(struct mean_and_variance *s, s64 v) -+{ -+ s->n++; -+ s->sum += v; -+ s->sum_squares = u128_add(s->sum_squares, u128_square(abs(v))); -+} -+ -+s64 mean_and_variance_get_mean(struct mean_and_variance s); -+u64 mean_and_variance_get_variance(struct mean_and_variance s1); -+u32 mean_and_variance_get_stddev(struct mean_and_variance s); -+ -+void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 v); -+ -+s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s); -+u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s); -+u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s); -+ -+#endif // MEAN_AND_VAIRANCE_H_ -diff --git a/fs/bcachefs/mean_and_variance_test.c b/fs/bcachefs/mean_and_variance_test.c -new file mode 100644 -index 000000000000..019583c3ca0e ---- /dev/null -+++ b/fs/bcachefs/mean_and_variance_test.c -@@ -0,0 +1,240 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include -+ -+#include "mean_and_variance.h" -+ -+#define MAX_SQR (SQRT_U64_MAX*SQRT_U64_MAX) -+ -+static void mean_and_variance_basic_test(struct kunit *test) -+{ -+ struct mean_and_variance s = {}; -+ -+ mean_and_variance_update(&s, 2); -+ mean_and_variance_update(&s, 2); -+ -+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_mean(s), 2); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_variance(s), 0); -+ KUNIT_EXPECT_EQ(test, s.n, 2); -+ -+ mean_and_variance_update(&s, 4); -+ mean_and_variance_update(&s, 4); -+ -+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_mean(s), 3); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_variance(s), 1); -+ KUNIT_EXPECT_EQ(test, s.n, 4); -+} -+ -+/* -+ * Test values computed using a spreadsheet from the psuedocode at the bottom: -+ * https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf -+ */ -+ -+static void mean_and_variance_weighted_test(struct kunit *test) -+{ -+ struct mean_and_variance_weighted s = { .weight = 2 }; -+ -+ mean_and_variance_weighted_update(&s, 10); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 10); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 0); -+ -+ mean_and_variance_weighted_update(&s, 20); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 12); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 18); -+ -+ mean_and_variance_weighted_update(&s, 30); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 16); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 72); -+ -+ s = (struct mean_and_variance_weighted) { .weight = 2 }; -+ -+ mean_and_variance_weighted_update(&s, -10); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -10); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 0); -+ -+ mean_and_variance_weighted_update(&s, -20); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -12); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 18); -+ -+ mean_and_variance_weighted_update(&s, -30); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -16); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 72); -+} -+ -+static void mean_and_variance_weighted_advanced_test(struct kunit *test) -+{ -+ struct mean_and_variance_weighted s = { .weight = 8 }; -+ s64 i; -+ -+ for (i = 10; i <= 100; i += 10) -+ mean_and_variance_weighted_update(&s, i); -+ -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 11); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 107); -+ -+ s = (struct mean_and_variance_weighted) { .weight = 8 }; -+ -+ for (i = -10; i >= -100; i -= 10) -+ mean_and_variance_weighted_update(&s, i); -+ -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -11); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 107); -+} -+ -+static void do_mean_and_variance_test(struct kunit *test, -+ s64 initial_value, -+ s64 initial_n, -+ s64 n, -+ unsigned weight, -+ s64 *data, -+ s64 *mean, -+ s64 *stddev, -+ s64 *weighted_mean, -+ s64 *weighted_stddev) -+{ -+ struct mean_and_variance mv = {}; -+ struct mean_and_variance_weighted vw = { .weight = weight }; -+ -+ for (unsigned i = 0; i < initial_n; i++) { -+ mean_and_variance_update(&mv, initial_value); -+ mean_and_variance_weighted_update(&vw, initial_value); -+ -+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_mean(mv), initial_value); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_stddev(mv), 0); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(vw), initial_value); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_stddev(vw),0); -+ } -+ -+ for (unsigned i = 0; i < n; i++) { -+ mean_and_variance_update(&mv, data[i]); -+ mean_and_variance_weighted_update(&vw, data[i]); -+ -+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_mean(mv), mean[i]); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_get_stddev(mv), stddev[i]); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(vw), weighted_mean[i]); -+ KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_stddev(vw),weighted_stddev[i]); -+ } -+ -+ KUNIT_EXPECT_EQ(test, mv.n, initial_n + n); -+} -+ -+/* Test behaviour with a single outlier, then back to steady state: */ -+static void mean_and_variance_test_1(struct kunit *test) -+{ -+ s64 d[] = { 100, 10, 10, 10, 10, 10, 10 }; -+ s64 mean[] = { 22, 21, 20, 19, 18, 17, 16 }; -+ s64 stddev[] = { 32, 29, 28, 27, 26, 25, 24 }; -+ s64 weighted_mean[] = { 32, 27, 22, 19, 17, 15, 14 }; -+ s64 weighted_stddev[] = { 38, 35, 31, 27, 24, 21, 18 }; -+ -+ do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2, -+ d, mean, stddev, weighted_mean, weighted_stddev); -+} -+ -+static void mean_and_variance_test_2(struct kunit *test) -+{ -+ s64 d[] = { 100, 10, 10, 10, 10, 10, 10 }; -+ s64 mean[] = { 10, 10, 10, 10, 10, 10, 10 }; -+ s64 stddev[] = { 9, 9, 9, 9, 9, 9, 9 }; -+ s64 weighted_mean[] = { 32, 27, 22, 19, 17, 15, 14 }; -+ s64 weighted_stddev[] = { 38, 35, 31, 27, 24, 21, 18 }; -+ -+ do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2, -+ d, mean, stddev, weighted_mean, weighted_stddev); -+} -+ -+/* Test behaviour where we switch from one steady state to another: */ -+static void mean_and_variance_test_3(struct kunit *test) -+{ -+ s64 d[] = { 100, 100, 100, 100, 100 }; -+ s64 mean[] = { 22, 32, 40, 46, 50 }; -+ s64 stddev[] = { 32, 39, 42, 44, 45 }; -+ s64 weighted_mean[] = { 32, 49, 61, 71, 78 }; -+ s64 weighted_stddev[] = { 38, 44, 44, 41, 38 }; -+ -+ do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2, -+ d, mean, stddev, weighted_mean, weighted_stddev); -+} -+ -+static void mean_and_variance_test_4(struct kunit *test) -+{ -+ s64 d[] = { 100, 100, 100, 100, 100 }; -+ s64 mean[] = { 10, 11, 12, 13, 14 }; -+ s64 stddev[] = { 9, 13, 15, 17, 19 }; -+ s64 weighted_mean[] = { 32, 49, 61, 71, 78 }; -+ s64 weighted_stddev[] = { 38, 44, 44, 41, 38 }; -+ -+ do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2, -+ d, mean, stddev, weighted_mean, weighted_stddev); -+} -+ -+static void mean_and_variance_fast_divpow2(struct kunit *test) -+{ -+ s64 i; -+ u8 d; -+ -+ for (i = 0; i < 100; i++) { -+ d = 0; -+ KUNIT_EXPECT_EQ(test, fast_divpow2(i, d), div_u64(i, 1LLU << d)); -+ KUNIT_EXPECT_EQ(test, abs(fast_divpow2(-i, d)), div_u64(i, 1LLU << d)); -+ for (d = 1; d < 32; d++) { -+ KUNIT_EXPECT_EQ_MSG(test, abs(fast_divpow2(i, d)), -+ div_u64(i, 1 << d), "%lld %u", i, d); -+ KUNIT_EXPECT_EQ_MSG(test, abs(fast_divpow2(-i, d)), -+ div_u64(i, 1 << d), "%lld %u", -i, d); -+ } -+ } -+} -+ -+static void mean_and_variance_u128_basic_test(struct kunit *test) -+{ -+ u128_u a = u64s_to_u128(0, U64_MAX); -+ u128_u a1 = u64s_to_u128(0, 1); -+ u128_u b = u64s_to_u128(1, 0); -+ u128_u c = u64s_to_u128(0, 1LLU << 63); -+ u128_u c2 = u64s_to_u128(U64_MAX, U64_MAX); -+ -+ KUNIT_EXPECT_EQ(test, u128_hi(u128_add(a, a1)), 1); -+ KUNIT_EXPECT_EQ(test, u128_lo(u128_add(a, a1)), 0); -+ KUNIT_EXPECT_EQ(test, u128_hi(u128_add(a1, a)), 1); -+ KUNIT_EXPECT_EQ(test, u128_lo(u128_add(a1, a)), 0); -+ -+ KUNIT_EXPECT_EQ(test, u128_lo(u128_sub(b, a1)), U64_MAX); -+ KUNIT_EXPECT_EQ(test, u128_hi(u128_sub(b, a1)), 0); -+ -+ KUNIT_EXPECT_EQ(test, u128_hi(u128_shl(c, 1)), 1); -+ KUNIT_EXPECT_EQ(test, u128_lo(u128_shl(c, 1)), 0); -+ -+ KUNIT_EXPECT_EQ(test, u128_hi(u128_square(U64_MAX)), U64_MAX - 1); -+ KUNIT_EXPECT_EQ(test, u128_lo(u128_square(U64_MAX)), 1); -+ -+ KUNIT_EXPECT_EQ(test, u128_lo(u128_div(b, 2)), 1LLU << 63); -+ -+ KUNIT_EXPECT_EQ(test, u128_hi(u128_div(c2, 2)), U64_MAX >> 1); -+ KUNIT_EXPECT_EQ(test, u128_lo(u128_div(c2, 2)), U64_MAX); -+ -+ KUNIT_EXPECT_EQ(test, u128_hi(u128_div(u128_shl(u64_to_u128(U64_MAX), 32), 2)), U32_MAX >> 1); -+ KUNIT_EXPECT_EQ(test, u128_lo(u128_div(u128_shl(u64_to_u128(U64_MAX), 32), 2)), U64_MAX << 31); -+} -+ -+static struct kunit_case mean_and_variance_test_cases[] = { -+ KUNIT_CASE(mean_and_variance_fast_divpow2), -+ KUNIT_CASE(mean_and_variance_u128_basic_test), -+ KUNIT_CASE(mean_and_variance_basic_test), -+ KUNIT_CASE(mean_and_variance_weighted_test), -+ KUNIT_CASE(mean_and_variance_weighted_advanced_test), -+ KUNIT_CASE(mean_and_variance_test_1), -+ KUNIT_CASE(mean_and_variance_test_2), -+ KUNIT_CASE(mean_and_variance_test_3), -+ KUNIT_CASE(mean_and_variance_test_4), -+ {} -+}; -+ -+static struct kunit_suite mean_and_variance_test_suite = { -+ .name = "mean and variance tests", -+ .test_cases = mean_and_variance_test_cases -+}; -+ -+kunit_test_suite(mean_and_variance_test_suite); -+ -+MODULE_AUTHOR("Daniel B. Hill"); -+MODULE_LICENSE("GPL"); -diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c -new file mode 100644 -index 000000000000..e3a51f6d6c9b ---- /dev/null -+++ b/fs/bcachefs/migrate.c -@@ -0,0 +1,179 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Code for moving data off a device. -+ */ -+ -+#include "bcachefs.h" -+#include "bkey_buf.h" -+#include "btree_update.h" -+#include "btree_update_interior.h" -+#include "buckets.h" -+#include "errcode.h" -+#include "extents.h" -+#include "io_write.h" -+#include "journal.h" -+#include "keylist.h" -+#include "migrate.h" -+#include "move.h" -+#include "replicas.h" -+#include "super-io.h" -+ -+static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k, -+ unsigned dev_idx, int flags, bool metadata) -+{ -+ unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas; -+ unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST; -+ unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED; -+ unsigned nr_good; -+ -+ bch2_bkey_drop_device(k, dev_idx); -+ -+ nr_good = bch2_bkey_durability(c, k.s_c); -+ if ((!nr_good && !(flags & lost)) || -+ (nr_good < replicas && !(flags & degraded))) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+static int bch2_dev_usrdata_drop_key(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k, -+ unsigned dev_idx, -+ int flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_i *n; -+ int ret; -+ -+ if (!bch2_bkey_has_device_c(k, dev_idx)) -+ return 0; -+ -+ n = bch2_bkey_make_mut(trans, iter, &k, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+ ret = PTR_ERR_OR_ZERO(n); -+ if (ret) -+ return ret; -+ -+ ret = drop_dev_ptrs(c, bkey_i_to_s(n), dev_idx, flags, false); -+ if (ret) -+ return ret; -+ -+ /* -+ * If the new extent no longer has any pointers, bch2_extent_normalize() -+ * will do the appropriate thing with it (turning it into a -+ * KEY_TYPE_error key, or just a discard if it was a cached extent) -+ */ -+ bch2_extent_normalize(c, bkey_i_to_s(n)); -+ -+ /* -+ * Since we're not inserting through an extent iterator -+ * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators), -+ * we aren't using the extent overwrite path to delete, we're -+ * just using the normal key deletion path: -+ */ -+ if (bkey_deleted(&n->k)) -+ n->k.size = 0; -+ return 0; -+} -+ -+static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ enum btree_id id; -+ int ret = 0; -+ -+ for (id = 0; id < BTREE_ID_NR; id++) { -+ if (!btree_type_has_ptrs(id)) -+ continue; -+ -+ ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, -+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, -+ NULL, NULL, BTREE_INSERT_NOFAIL, -+ bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags)); -+ if (ret) -+ break; -+ } -+ -+ bch2_trans_put(trans); -+ -+ return ret; -+} -+ -+static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) -+{ -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct closure cl; -+ struct btree *b; -+ struct bkey_buf k; -+ unsigned id; -+ int ret; -+ -+ /* don't handle this yet: */ -+ if (flags & BCH_FORCE_IF_METADATA_LOST) -+ return -EINVAL; -+ -+ trans = bch2_trans_get(c); -+ bch2_bkey_buf_init(&k); -+ closure_init_stack(&cl); -+ -+ for (id = 0; id < BTREE_ID_NR; id++) { -+ bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0, -+ BTREE_ITER_PREFETCH); -+retry: -+ ret = 0; -+ while (bch2_trans_begin(trans), -+ (b = bch2_btree_iter_peek_node(&iter)) && -+ !(ret = PTR_ERR_OR_ZERO(b))) { -+ if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx)) -+ goto next; -+ -+ bch2_bkey_buf_copy(&k, c, &b->key); -+ -+ ret = drop_dev_ptrs(c, bkey_i_to_s(k.k), -+ dev_idx, flags, true); -+ if (ret) { -+ bch_err(c, "Cannot drop device without losing data"); -+ break; -+ } -+ -+ ret = bch2_btree_node_update_key(trans, &iter, b, k.k, 0, false); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { -+ ret = 0; -+ continue; -+ } -+ -+ if (ret) { -+ bch_err_msg(c, ret, "updating btree node key"); -+ break; -+ } -+next: -+ bch2_btree_iter_next_node(&iter); -+ } -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (ret) -+ goto err; -+ } -+ -+ bch2_btree_interior_updates_flush(c); -+ ret = 0; -+err: -+ bch2_bkey_buf_exit(&k, c); -+ bch2_trans_put(trans); -+ -+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); -+ -+ return ret; -+} -+ -+int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags) -+{ -+ return bch2_dev_usrdata_drop(c, dev_idx, flags) ?: -+ bch2_dev_metadata_drop(c, dev_idx, flags); -+} -diff --git a/fs/bcachefs/migrate.h b/fs/bcachefs/migrate.h -new file mode 100644 -index 000000000000..027efaa0d575 ---- /dev/null -+++ b/fs/bcachefs/migrate.h -@@ -0,0 +1,7 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_MIGRATE_H -+#define _BCACHEFS_MIGRATE_H -+ -+int bch2_dev_data_drop(struct bch_fs *, unsigned, int); -+ -+#endif /* _BCACHEFS_MIGRATE_H */ -diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c -new file mode 100644 -index 000000000000..ab749bf2fcbc ---- /dev/null -+++ b/fs/bcachefs/move.c -@@ -0,0 +1,1198 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "alloc_background.h" -+#include "alloc_foreground.h" -+#include "backpointers.h" -+#include "bkey_buf.h" -+#include "btree_gc.h" -+#include "btree_update.h" -+#include "btree_update_interior.h" -+#include "btree_write_buffer.h" -+#include "disk_groups.h" -+#include "ec.h" -+#include "errcode.h" -+#include "error.h" -+#include "inode.h" -+#include "io_read.h" -+#include "io_write.h" -+#include "journal_reclaim.h" -+#include "keylist.h" -+#include "move.h" -+#include "replicas.h" -+#include "snapshot.h" -+#include "super-io.h" -+#include "trace.h" -+ -+#include -+#include -+ -+static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k) -+{ -+ if (trace_move_extent_enabled()) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, k); -+ trace_move_extent(c, buf.buf); -+ printbuf_exit(&buf); -+ } -+} -+ -+static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k) -+{ -+ if (trace_move_extent_read_enabled()) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, k); -+ trace_move_extent_read(c, buf.buf); -+ printbuf_exit(&buf); -+ } -+} -+ -+static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c k) -+{ -+ if (trace_move_extent_alloc_mem_fail_enabled()) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_bkey_val_to_text(&buf, c, k); -+ trace_move_extent_alloc_mem_fail(c, buf.buf); -+ printbuf_exit(&buf); -+ } -+} -+ -+struct moving_io { -+ struct list_head read_list; -+ struct list_head io_list; -+ struct move_bucket_in_flight *b; -+ struct closure cl; -+ bool read_completed; -+ -+ unsigned read_sectors; -+ unsigned write_sectors; -+ -+ struct bch_read_bio rbio; -+ -+ struct data_update write; -+ /* Must be last since it is variable size */ -+ struct bio_vec bi_inline_vecs[0]; -+}; -+ -+static void move_free(struct moving_io *io) -+{ -+ struct moving_context *ctxt = io->write.ctxt; -+ -+ if (io->b) -+ atomic_dec(&io->b->count); -+ -+ bch2_data_update_exit(&io->write); -+ -+ mutex_lock(&ctxt->lock); -+ list_del(&io->io_list); -+ wake_up(&ctxt->wait); -+ mutex_unlock(&ctxt->lock); -+ -+ kfree(io); -+} -+ -+static void move_write_done(struct bch_write_op *op) -+{ -+ struct moving_io *io = container_of(op, struct moving_io, write.op); -+ struct moving_context *ctxt = io->write.ctxt; -+ -+ if (io->write.op.error) -+ ctxt->write_error = true; -+ -+ atomic_sub(io->write_sectors, &io->write.ctxt->write_sectors); -+ atomic_dec(&io->write.ctxt->write_ios); -+ move_free(io); -+ closure_put(&ctxt->cl); -+} -+ -+static void move_write(struct moving_io *io) -+{ -+ if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) { -+ move_free(io); -+ return; -+ } -+ -+ closure_get(&io->write.ctxt->cl); -+ atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); -+ atomic_inc(&io->write.ctxt->write_ios); -+ -+ bch2_data_update_read_done(&io->write, io->rbio.pick.crc); -+} -+ -+struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt) -+{ -+ struct moving_io *io = -+ list_first_entry_or_null(&ctxt->reads, struct moving_io, read_list); -+ -+ return io && io->read_completed ? io : NULL; -+} -+ -+static void move_read_endio(struct bio *bio) -+{ -+ struct moving_io *io = container_of(bio, struct moving_io, rbio.bio); -+ struct moving_context *ctxt = io->write.ctxt; -+ -+ atomic_sub(io->read_sectors, &ctxt->read_sectors); -+ atomic_dec(&ctxt->read_ios); -+ io->read_completed = true; -+ -+ wake_up(&ctxt->wait); -+ closure_put(&ctxt->cl); -+} -+ -+void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt) -+{ -+ struct moving_io *io; -+ -+ while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) { -+ bch2_trans_unlock_long(ctxt->trans); -+ list_del(&io->read_list); -+ move_write(io); -+ } -+} -+ -+void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt) -+{ -+ unsigned sectors_pending = atomic_read(&ctxt->write_sectors); -+ -+ move_ctxt_wait_event(ctxt, -+ !atomic_read(&ctxt->write_sectors) || -+ atomic_read(&ctxt->write_sectors) != sectors_pending); -+} -+ -+void bch2_moving_ctxt_exit(struct moving_context *ctxt) -+{ -+ struct bch_fs *c = ctxt->trans->c; -+ -+ move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); -+ closure_sync(&ctxt->cl); -+ -+ EBUG_ON(atomic_read(&ctxt->write_sectors)); -+ EBUG_ON(atomic_read(&ctxt->write_ios)); -+ EBUG_ON(atomic_read(&ctxt->read_sectors)); -+ EBUG_ON(atomic_read(&ctxt->read_ios)); -+ -+ mutex_lock(&c->moving_context_lock); -+ list_del(&ctxt->list); -+ mutex_unlock(&c->moving_context_lock); -+ -+ bch2_trans_put(ctxt->trans); -+ memset(ctxt, 0, sizeof(*ctxt)); -+} -+ -+void bch2_moving_ctxt_init(struct moving_context *ctxt, -+ struct bch_fs *c, -+ struct bch_ratelimit *rate, -+ struct bch_move_stats *stats, -+ struct write_point_specifier wp, -+ bool wait_on_copygc) -+{ -+ memset(ctxt, 0, sizeof(*ctxt)); -+ -+ ctxt->trans = bch2_trans_get(c); -+ ctxt->fn = (void *) _RET_IP_; -+ ctxt->rate = rate; -+ ctxt->stats = stats; -+ ctxt->wp = wp; -+ ctxt->wait_on_copygc = wait_on_copygc; -+ -+ closure_init_stack(&ctxt->cl); -+ -+ mutex_init(&ctxt->lock); -+ INIT_LIST_HEAD(&ctxt->reads); -+ INIT_LIST_HEAD(&ctxt->ios); -+ init_waitqueue_head(&ctxt->wait); -+ -+ mutex_lock(&c->moving_context_lock); -+ list_add(&ctxt->list, &c->moving_context_list); -+ mutex_unlock(&c->moving_context_lock); -+} -+ -+void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c) -+{ -+ trace_move_data(c, stats); -+} -+ -+void bch2_move_stats_init(struct bch_move_stats *stats, char *name) -+{ -+ memset(stats, 0, sizeof(*stats)); -+ stats->data_type = BCH_DATA_user; -+ scnprintf(stats->name, sizeof(stats->name), "%s", name); -+} -+ -+static int bch2_extent_drop_ptrs(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k, -+ struct data_update_opts data_opts) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_i *n; -+ int ret; -+ -+ n = bch2_bkey_make_mut_noupdate(trans, k); -+ ret = PTR_ERR_OR_ZERO(n); -+ if (ret) -+ return ret; -+ -+ while (data_opts.kill_ptrs) { -+ unsigned i = 0, drop = __fls(data_opts.kill_ptrs); -+ struct bch_extent_ptr *ptr; -+ -+ bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop); -+ data_opts.kill_ptrs ^= 1U << drop; -+ } -+ -+ /* -+ * If the new extent no longer has any pointers, bch2_extent_normalize() -+ * will do the appropriate thing with it (turning it into a -+ * KEY_TYPE_error key, or just a discard if it was a cached extent) -+ */ -+ bch2_extent_normalize(c, bkey_i_to_s(n)); -+ -+ /* -+ * Since we're not inserting through an extent iterator -+ * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators), -+ * we aren't using the extent overwrite path to delete, we're -+ * just using the normal key deletion path: -+ */ -+ if (bkey_deleted(&n->k)) -+ n->k.size = 0; -+ -+ return bch2_trans_relock(trans) ?: -+ bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: -+ bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); -+} -+ -+int bch2_move_extent(struct moving_context *ctxt, -+ struct move_bucket_in_flight *bucket_in_flight, -+ struct btree_iter *iter, -+ struct bkey_s_c k, -+ struct bch_io_opts io_opts, -+ struct data_update_opts data_opts) -+{ -+ struct btree_trans *trans = ctxt->trans; -+ struct bch_fs *c = trans->c; -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ struct moving_io *io; -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ unsigned sectors = k.k->size, pages; -+ int ret = -ENOMEM; -+ -+ if (ctxt->stats) -+ ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos); -+ trace_move_extent2(c, k); -+ -+ bch2_data_update_opts_normalize(k, &data_opts); -+ -+ if (!data_opts.rewrite_ptrs && -+ !data_opts.extra_replicas) { -+ if (data_opts.kill_ptrs) -+ return bch2_extent_drop_ptrs(trans, iter, k, data_opts); -+ return 0; -+ } -+ -+ /* -+ * Before memory allocations & taking nocow locks in -+ * bch2_data_update_init(): -+ */ -+ bch2_trans_unlock(trans); -+ -+ /* write path might have to decompress data: */ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) -+ sectors = max_t(unsigned, sectors, p.crc.uncompressed_size); -+ -+ pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); -+ io = kzalloc(sizeof(struct moving_io) + -+ sizeof(struct bio_vec) * pages, GFP_KERNEL); -+ if (!io) -+ goto err; -+ -+ INIT_LIST_HEAD(&io->io_list); -+ io->write.ctxt = ctxt; -+ io->read_sectors = k.k->size; -+ io->write_sectors = k.k->size; -+ -+ bio_init(&io->write.op.wbio.bio, NULL, io->bi_inline_vecs, pages, 0); -+ bio_set_prio(&io->write.op.wbio.bio, -+ IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); -+ -+ if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9, -+ GFP_KERNEL)) -+ goto err_free; -+ -+ io->rbio.c = c; -+ io->rbio.opts = io_opts; -+ bio_init(&io->rbio.bio, NULL, io->bi_inline_vecs, pages, 0); -+ io->rbio.bio.bi_vcnt = pages; -+ bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); -+ io->rbio.bio.bi_iter.bi_size = sectors << 9; -+ -+ io->rbio.bio.bi_opf = REQ_OP_READ; -+ io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); -+ io->rbio.bio.bi_end_io = move_read_endio; -+ -+ ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp, -+ io_opts, data_opts, iter->btree_id, k); -+ if (ret && ret != -BCH_ERR_unwritten_extent_update) -+ goto err_free_pages; -+ -+ if (ret == -BCH_ERR_unwritten_extent_update) { -+ bch2_update_unwritten_extent(trans, &io->write); -+ move_free(io); -+ return 0; -+ } -+ -+ BUG_ON(ret); -+ -+ io->write.op.end_io = move_write_done; -+ -+ if (ctxt->rate) -+ bch2_ratelimit_increment(ctxt->rate, k.k->size); -+ -+ if (ctxt->stats) { -+ atomic64_inc(&ctxt->stats->keys_moved); -+ atomic64_add(k.k->size, &ctxt->stats->sectors_moved); -+ } -+ -+ if (bucket_in_flight) { -+ io->b = bucket_in_flight; -+ atomic_inc(&io->b->count); -+ } -+ -+ this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size); -+ this_cpu_add(c->counters[BCH_COUNTER_move_extent_read], k.k->size); -+ trace_move_extent_read2(c, k); -+ -+ mutex_lock(&ctxt->lock); -+ atomic_add(io->read_sectors, &ctxt->read_sectors); -+ atomic_inc(&ctxt->read_ios); -+ -+ list_add_tail(&io->read_list, &ctxt->reads); -+ list_add_tail(&io->io_list, &ctxt->ios); -+ mutex_unlock(&ctxt->lock); -+ -+ /* -+ * dropped by move_read_endio() - guards against use after free of -+ * ctxt when doing wakeup -+ */ -+ closure_get(&ctxt->cl); -+ bch2_read_extent(trans, &io->rbio, -+ bkey_start_pos(k.k), -+ iter->btree_id, k, 0, -+ BCH_READ_NODECODE| -+ BCH_READ_LAST_FRAGMENT); -+ return 0; -+err_free_pages: -+ bio_free_pages(&io->write.op.wbio.bio); -+err_free: -+ kfree(io); -+err: -+ this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]); -+ trace_move_extent_alloc_mem_fail2(c, k); -+ return ret; -+} -+ -+struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, -+ struct per_snapshot_io_opts *io_opts, -+ struct bkey_s_c extent_k) -+{ -+ struct bch_fs *c = trans->c; -+ u32 restart_count = trans->restart_count; -+ int ret = 0; -+ -+ if (io_opts->cur_inum != extent_k.k->p.inode) { -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ -+ io_opts->d.nr = 0; -+ -+ for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), -+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) { -+ if (k.k->p.offset != extent_k.k->p.inode) -+ break; -+ -+ if (!bkey_is_inode(k.k)) -+ continue; -+ -+ struct bch_inode_unpacked inode; -+ BUG_ON(bch2_inode_unpack(k, &inode)); -+ -+ struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; -+ bch2_inode_opts_get(&e.io_opts, trans->c, &inode); -+ -+ ret = darray_push(&io_opts->d, e); -+ if (ret) -+ break; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ io_opts->cur_inum = extent_k.k->p.inode; -+ } -+ -+ ret = ret ?: trans_was_restarted(trans, restart_count); -+ if (ret) -+ return ERR_PTR(ret); -+ -+ if (extent_k.k->p.snapshot) { -+ struct snapshot_io_opts_entry *i; -+ darray_for_each(io_opts->d, i) -+ if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) -+ return &i->io_opts; -+ } -+ -+ return &io_opts->fs_io_opts; -+} -+ -+int bch2_move_get_io_opts_one(struct btree_trans *trans, -+ struct bch_io_opts *io_opts, -+ struct bkey_s_c extent_k) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ /* reflink btree? */ -+ if (!extent_k.k->p.inode) { -+ *io_opts = bch2_opts_to_inode_opts(trans->c->opts); -+ return 0; -+ } -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -+ SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), -+ BTREE_ITER_CACHED); -+ ret = bkey_err(k); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ return ret; -+ -+ if (!ret && bkey_is_inode(k.k)) { -+ struct bch_inode_unpacked inode; -+ bch2_inode_unpack(k, &inode); -+ bch2_inode_opts_get(io_opts, trans->c, &inode); -+ } else { -+ *io_opts = bch2_opts_to_inode_opts(trans->c->opts); -+ } -+ -+ bch2_trans_iter_exit(trans, &iter); -+ return 0; -+} -+ -+int bch2_move_ratelimit(struct moving_context *ctxt) -+{ -+ struct bch_fs *c = ctxt->trans->c; -+ u64 delay; -+ -+ if (ctxt->wait_on_copygc && !c->copygc_running) { -+ bch2_trans_unlock_long(ctxt->trans); -+ wait_event_killable(c->copygc_running_wq, -+ !c->copygc_running || -+ kthread_should_stop()); -+ } -+ -+ do { -+ delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0; -+ -+ -+ if (delay) { -+ if (delay > HZ / 10) -+ bch2_trans_unlock_long(ctxt->trans); -+ else -+ bch2_trans_unlock(ctxt->trans); -+ set_current_state(TASK_INTERRUPTIBLE); -+ } -+ -+ if ((current->flags & PF_KTHREAD) && kthread_should_stop()) { -+ __set_current_state(TASK_RUNNING); -+ return 1; -+ } -+ -+ if (delay) -+ schedule_timeout(delay); -+ -+ if (unlikely(freezing(current))) { -+ move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); -+ try_to_freeze(); -+ } -+ } while (delay); -+ -+ /* -+ * XXX: these limits really ought to be per device, SSDs and hard drives -+ * will want different limits -+ */ -+ move_ctxt_wait_event(ctxt, -+ atomic_read(&ctxt->write_sectors) < c->opts.move_bytes_in_flight >> 9 && -+ atomic_read(&ctxt->read_sectors) < c->opts.move_bytes_in_flight >> 9 && -+ atomic_read(&ctxt->write_ios) < c->opts.move_ios_in_flight && -+ atomic_read(&ctxt->read_ios) < c->opts.move_ios_in_flight); -+ -+ return 0; -+} -+ -+static int bch2_move_data_btree(struct moving_context *ctxt, -+ struct bpos start, -+ struct bpos end, -+ move_pred_fn pred, void *arg, -+ enum btree_id btree_id) -+{ -+ struct btree_trans *trans = ctxt->trans; -+ struct bch_fs *c = trans->c; -+ struct per_snapshot_io_opts snapshot_io_opts; -+ struct bch_io_opts *io_opts; -+ struct bkey_buf sk; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct data_update_opts data_opts; -+ int ret = 0, ret2; -+ -+ per_snapshot_io_opts_init(&snapshot_io_opts, c); -+ bch2_bkey_buf_init(&sk); -+ -+ if (ctxt->stats) { -+ ctxt->stats->data_type = BCH_DATA_user; -+ ctxt->stats->pos = BBPOS(btree_id, start); -+ } -+ -+ bch2_trans_iter_init(trans, &iter, btree_id, start, -+ BTREE_ITER_PREFETCH| -+ BTREE_ITER_ALL_SNAPSHOTS); -+ -+ if (ctxt->rate) -+ bch2_ratelimit_reset(ctxt->rate); -+ -+ while (!bch2_move_ratelimit(ctxt)) { -+ bch2_trans_begin(trans); -+ -+ k = bch2_btree_iter_peek(&iter); -+ if (!k.k) -+ break; -+ -+ ret = bkey_err(k); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret) -+ break; -+ -+ if (bkey_ge(bkey_start_pos(k.k), end)) -+ break; -+ -+ if (ctxt->stats) -+ ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); -+ -+ if (!bkey_extent_is_direct_data(k.k)) -+ goto next_nondata; -+ -+ io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k); -+ ret = PTR_ERR_OR_ZERO(io_opts); -+ if (ret) -+ continue; -+ -+ memset(&data_opts, 0, sizeof(data_opts)); -+ if (!pred(c, arg, k, io_opts, &data_opts)) -+ goto next; -+ -+ /* -+ * The iterator gets unlocked by __bch2_read_extent - need to -+ * save a copy of @k elsewhere: -+ */ -+ bch2_bkey_buf_reassemble(&sk, c, k); -+ k = bkey_i_to_s_c(sk.k); -+ -+ ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts); -+ if (ret2) { -+ if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) -+ continue; -+ -+ if (ret2 == -ENOMEM) { -+ /* memory allocation failure, wait for some IO to finish */ -+ bch2_move_ctxt_wait_for_io(ctxt); -+ continue; -+ } -+ -+ /* XXX signal failure */ -+ goto next; -+ } -+next: -+ if (ctxt->stats) -+ atomic64_add(k.k->size, &ctxt->stats->sectors_seen); -+next_nondata: -+ bch2_btree_iter_advance(&iter); -+ } -+ -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_bkey_buf_exit(&sk, c); -+ per_snapshot_io_opts_exit(&snapshot_io_opts); -+ -+ return ret; -+} -+ -+int __bch2_move_data(struct moving_context *ctxt, -+ struct bbpos start, -+ struct bbpos end, -+ move_pred_fn pred, void *arg) -+{ -+ struct bch_fs *c = ctxt->trans->c; -+ enum btree_id id; -+ int ret = 0; -+ -+ for (id = start.btree; -+ id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1); -+ id++) { -+ ctxt->stats->pos = BBPOS(id, POS_MIN); -+ -+ if (!btree_type_has_ptrs(id) || -+ !bch2_btree_id_root(c, id)->b) -+ continue; -+ -+ ret = bch2_move_data_btree(ctxt, -+ id == start.btree ? start.pos : POS_MIN, -+ id == end.btree ? end.pos : POS_MAX, -+ pred, arg, id); -+ if (ret) -+ break; -+ } -+ -+ return ret; -+} -+ -+int bch2_move_data(struct bch_fs *c, -+ struct bbpos start, -+ struct bbpos end, -+ struct bch_ratelimit *rate, -+ struct bch_move_stats *stats, -+ struct write_point_specifier wp, -+ bool wait_on_copygc, -+ move_pred_fn pred, void *arg) -+{ -+ -+ struct moving_context ctxt; -+ int ret; -+ -+ bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); -+ ret = __bch2_move_data(&ctxt, start, end, pred, arg); -+ bch2_moving_ctxt_exit(&ctxt); -+ -+ return ret; -+} -+ -+int __bch2_evacuate_bucket(struct moving_context *ctxt, -+ struct move_bucket_in_flight *bucket_in_flight, -+ struct bpos bucket, int gen, -+ struct data_update_opts _data_opts) -+{ -+ struct btree_trans *trans = ctxt->trans; -+ struct bch_fs *c = trans->c; -+ struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); -+ struct btree_iter iter; -+ struct bkey_buf sk; -+ struct bch_backpointer bp; -+ struct bch_alloc_v4 a_convert; -+ const struct bch_alloc_v4 *a; -+ struct bkey_s_c k; -+ struct data_update_opts data_opts; -+ unsigned dirty_sectors, bucket_size; -+ u64 fragmentation; -+ struct bpos bp_pos = POS_MIN; -+ int ret = 0; -+ -+ trace_bucket_evacuate(c, &bucket); -+ -+ bch2_bkey_buf_init(&sk); -+ -+ /* -+ * We're not run in a context that handles transaction restarts: -+ */ -+ bch2_trans_begin(trans); -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, -+ bucket, BTREE_ITER_CACHED); -+ ret = lockrestart_do(trans, -+ bkey_err(k = bch2_btree_iter_peek_slot(&iter))); -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (ret) { -+ bch_err_msg(c, ret, "looking up alloc key"); -+ goto err; -+ } -+ -+ a = bch2_alloc_to_v4(k, &a_convert); -+ dirty_sectors = a->dirty_sectors; -+ bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size; -+ fragmentation = a->fragmentation_lru; -+ -+ ret = bch2_btree_write_buffer_flush(trans); -+ if (ret) { -+ bch_err_msg(c, ret, "flushing btree write buffer"); -+ goto err; -+ } -+ -+ while (!(ret = bch2_move_ratelimit(ctxt))) { -+ bch2_trans_begin(trans); -+ -+ ret = bch2_get_next_backpointer(trans, bucket, gen, -+ &bp_pos, &bp, -+ BTREE_ITER_CACHED); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret) -+ goto err; -+ if (bkey_eq(bp_pos, POS_MAX)) -+ break; -+ -+ if (!bp.level) { -+ const struct bch_extent_ptr *ptr; -+ unsigned i = 0; -+ -+ k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0); -+ ret = bkey_err(k); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret) -+ goto err; -+ if (!k.k) -+ goto next; -+ -+ bch2_bkey_buf_reassemble(&sk, c, k); -+ k = bkey_i_to_s_c(sk.k); -+ -+ ret = bch2_move_get_io_opts_one(trans, &io_opts, k); -+ if (ret) { -+ bch2_trans_iter_exit(trans, &iter); -+ continue; -+ } -+ -+ data_opts = _data_opts; -+ data_opts.target = io_opts.background_target; -+ data_opts.rewrite_ptrs = 0; -+ -+ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { -+ if (ptr->dev == bucket.inode) { -+ data_opts.rewrite_ptrs |= 1U << i; -+ if (ptr->cached) { -+ bch2_trans_iter_exit(trans, &iter); -+ goto next; -+ } -+ } -+ i++; -+ } -+ -+ ret = bch2_move_extent(ctxt, bucket_in_flight, -+ &iter, k, io_opts, data_opts); -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret == -ENOMEM) { -+ /* memory allocation failure, wait for some IO to finish */ -+ bch2_move_ctxt_wait_for_io(ctxt); -+ continue; -+ } -+ if (ret) -+ goto err; -+ -+ if (ctxt->stats) -+ atomic64_add(k.k->size, &ctxt->stats->sectors_seen); -+ } else { -+ struct btree *b; -+ -+ b = bch2_backpointer_get_node(trans, &iter, bp_pos, bp); -+ ret = PTR_ERR_OR_ZERO(b); -+ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) -+ continue; -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret) -+ goto err; -+ if (!b) -+ goto next; -+ -+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0); -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret) -+ goto err; -+ -+ if (ctxt->rate) -+ bch2_ratelimit_increment(ctxt->rate, -+ c->opts.btree_node_size >> 9); -+ if (ctxt->stats) { -+ atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_seen); -+ atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_moved); -+ } -+ } -+next: -+ bp_pos = bpos_nosnap_successor(bp_pos); -+ } -+ -+ trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret); -+err: -+ bch2_bkey_buf_exit(&sk, c); -+ return ret; -+} -+ -+int bch2_evacuate_bucket(struct bch_fs *c, -+ struct bpos bucket, int gen, -+ struct data_update_opts data_opts, -+ struct bch_ratelimit *rate, -+ struct bch_move_stats *stats, -+ struct write_point_specifier wp, -+ bool wait_on_copygc) -+{ -+ struct moving_context ctxt; -+ int ret; -+ -+ bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); -+ ret = __bch2_evacuate_bucket(&ctxt, NULL, bucket, gen, data_opts); -+ bch2_moving_ctxt_exit(&ctxt); -+ -+ return ret; -+} -+ -+typedef bool (*move_btree_pred)(struct bch_fs *, void *, -+ struct btree *, struct bch_io_opts *, -+ struct data_update_opts *); -+ -+static int bch2_move_btree(struct bch_fs *c, -+ enum btree_id start_btree_id, struct bpos start_pos, -+ enum btree_id end_btree_id, struct bpos end_pos, -+ move_btree_pred pred, void *arg, -+ struct bch_move_stats *stats) -+{ -+ bool kthread = (current->flags & PF_KTHREAD) != 0; -+ struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); -+ struct moving_context ctxt; -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct btree *b; -+ enum btree_id id; -+ struct data_update_opts data_opts; -+ int ret = 0; -+ -+ bch2_moving_ctxt_init(&ctxt, c, NULL, stats, -+ writepoint_ptr(&c->btree_write_point), -+ true); -+ trans = ctxt.trans; -+ -+ stats->data_type = BCH_DATA_btree; -+ -+ for (id = start_btree_id; -+ id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1); -+ id++) { -+ stats->pos = BBPOS(id, POS_MIN); -+ -+ if (!bch2_btree_id_root(c, id)->b) -+ continue; -+ -+ bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0, -+ BTREE_ITER_PREFETCH); -+retry: -+ ret = 0; -+ while (bch2_trans_begin(trans), -+ (b = bch2_btree_iter_peek_node(&iter)) && -+ !(ret = PTR_ERR_OR_ZERO(b))) { -+ if (kthread && kthread_should_stop()) -+ break; -+ -+ if ((cmp_int(id, end_btree_id) ?: -+ bpos_cmp(b->key.k.p, end_pos)) > 0) -+ break; -+ -+ stats->pos = BBPOS(iter.btree_id, iter.pos); -+ -+ if (!pred(c, arg, b, &io_opts, &data_opts)) -+ goto next; -+ -+ ret = bch2_btree_node_rewrite(trans, &iter, b, 0) ?: ret; -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret) -+ break; -+next: -+ bch2_btree_iter_next_node(&iter); -+ } -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (kthread && kthread_should_stop()) -+ break; -+ } -+ -+ bch_err_fn(c, ret); -+ bch2_moving_ctxt_exit(&ctxt); -+ bch2_btree_interior_updates_flush(c); -+ -+ return ret; -+} -+ -+static bool rereplicate_pred(struct bch_fs *c, void *arg, -+ struct bkey_s_c k, -+ struct bch_io_opts *io_opts, -+ struct data_update_opts *data_opts) -+{ -+ unsigned nr_good = bch2_bkey_durability(c, k); -+ unsigned replicas = bkey_is_btree_ptr(k.k) -+ ? c->opts.metadata_replicas -+ : io_opts->data_replicas; -+ -+ if (!nr_good || nr_good >= replicas) -+ return false; -+ -+ data_opts->target = 0; -+ data_opts->extra_replicas = replicas - nr_good; -+ data_opts->btree_insert_flags = 0; -+ return true; -+} -+ -+static bool migrate_pred(struct bch_fs *c, void *arg, -+ struct bkey_s_c k, -+ struct bch_io_opts *io_opts, -+ struct data_update_opts *data_opts) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const struct bch_extent_ptr *ptr; -+ struct bch_ioctl_data *op = arg; -+ unsigned i = 0; -+ -+ data_opts->rewrite_ptrs = 0; -+ data_opts->target = 0; -+ data_opts->extra_replicas = 0; -+ data_opts->btree_insert_flags = 0; -+ -+ bkey_for_each_ptr(ptrs, ptr) { -+ if (ptr->dev == op->migrate.dev) -+ data_opts->rewrite_ptrs |= 1U << i; -+ i++; -+ } -+ -+ return data_opts->rewrite_ptrs != 0; -+} -+ -+static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, -+ struct btree *b, -+ struct bch_io_opts *io_opts, -+ struct data_update_opts *data_opts) -+{ -+ return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); -+} -+ -+static bool migrate_btree_pred(struct bch_fs *c, void *arg, -+ struct btree *b, -+ struct bch_io_opts *io_opts, -+ struct data_update_opts *data_opts) -+{ -+ return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); -+} -+ -+static bool bformat_needs_redo(struct bkey_format *f) -+{ -+ unsigned i; -+ -+ for (i = 0; i < f->nr_fields; i++) { -+ unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; -+ u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1)); -+ u64 field_offset = le64_to_cpu(f->field_offset[i]); -+ -+ if (f->bits_per_field[i] > unpacked_bits) -+ return true; -+ -+ if ((f->bits_per_field[i] == unpacked_bits) && field_offset) -+ return true; -+ -+ if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) & -+ unpacked_mask) < -+ field_offset) -+ return true; -+ } -+ -+ return false; -+} -+ -+static bool rewrite_old_nodes_pred(struct bch_fs *c, void *arg, -+ struct btree *b, -+ struct bch_io_opts *io_opts, -+ struct data_update_opts *data_opts) -+{ -+ if (b->version_ondisk != c->sb.version || -+ btree_node_need_rewrite(b) || -+ bformat_needs_redo(&b->format)) { -+ data_opts->target = 0; -+ data_opts->extra_replicas = 0; -+ data_opts->btree_insert_flags = 0; -+ return true; -+ } -+ -+ return false; -+} -+ -+int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) -+{ -+ int ret; -+ -+ ret = bch2_move_btree(c, -+ 0, POS_MIN, -+ BTREE_ID_NR, SPOS_MAX, -+ rewrite_old_nodes_pred, c, stats); -+ if (!ret) { -+ mutex_lock(&c->sb_lock); -+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); -+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); -+ c->disk_sb.sb->version_min = c->disk_sb.sb->version; -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ } -+ -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+int bch2_data_job(struct bch_fs *c, -+ struct bch_move_stats *stats, -+ struct bch_ioctl_data op) -+{ -+ int ret = 0; -+ -+ switch (op.op) { -+ case BCH_DATA_OP_REREPLICATE: -+ bch2_move_stats_init(stats, "rereplicate"); -+ stats->data_type = BCH_DATA_journal; -+ ret = bch2_journal_flush_device_pins(&c->journal, -1); -+ -+ ret = bch2_move_btree(c, -+ op.start_btree, op.start_pos, -+ op.end_btree, op.end_pos, -+ rereplicate_btree_pred, c, stats) ?: ret; -+ ret = bch2_replicas_gc2(c) ?: ret; -+ -+ ret = bch2_move_data(c, -+ (struct bbpos) { op.start_btree, op.start_pos }, -+ (struct bbpos) { op.end_btree, op.end_pos }, -+ NULL, -+ stats, -+ writepoint_hashed((unsigned long) current), -+ true, -+ rereplicate_pred, c) ?: ret; -+ ret = bch2_replicas_gc2(c) ?: ret; -+ -+ bch2_move_stats_exit(stats, c); -+ break; -+ case BCH_DATA_OP_MIGRATE: -+ if (op.migrate.dev >= c->sb.nr_devices) -+ return -EINVAL; -+ -+ bch2_move_stats_init(stats, "migrate"); -+ stats->data_type = BCH_DATA_journal; -+ ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); -+ -+ ret = bch2_move_btree(c, -+ op.start_btree, op.start_pos, -+ op.end_btree, op.end_pos, -+ migrate_btree_pred, &op, stats) ?: ret; -+ ret = bch2_replicas_gc2(c) ?: ret; -+ -+ ret = bch2_move_data(c, -+ (struct bbpos) { op.start_btree, op.start_pos }, -+ (struct bbpos) { op.end_btree, op.end_pos }, -+ NULL, -+ stats, -+ writepoint_hashed((unsigned long) current), -+ true, -+ migrate_pred, &op) ?: ret; -+ ret = bch2_replicas_gc2(c) ?: ret; -+ -+ bch2_move_stats_exit(stats, c); -+ break; -+ case BCH_DATA_OP_REWRITE_OLD_NODES: -+ bch2_move_stats_init(stats, "rewrite_old_nodes"); -+ ret = bch2_scan_old_btree_nodes(c, stats); -+ bch2_move_stats_exit(stats, c); -+ break; -+ default: -+ ret = -EINVAL; -+ } -+ -+ return ret; -+} -+ -+void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats) -+{ -+ prt_printf(out, "%s: data type=%s pos=", -+ stats->name, -+ bch2_data_types[stats->data_type]); -+ bch2_bbpos_to_text(out, stats->pos); -+ prt_newline(out); -+ printbuf_indent_add(out, 2); -+ -+ prt_str(out, "keys moved: "); -+ prt_u64(out, atomic64_read(&stats->keys_moved)); -+ prt_newline(out); -+ -+ prt_str(out, "keys raced: "); -+ prt_u64(out, atomic64_read(&stats->keys_raced)); -+ prt_newline(out); -+ -+ prt_str(out, "bytes seen: "); -+ prt_human_readable_u64(out, atomic64_read(&stats->sectors_seen) << 9); -+ prt_newline(out); -+ -+ prt_str(out, "bytes moved: "); -+ prt_human_readable_u64(out, atomic64_read(&stats->sectors_moved) << 9); -+ prt_newline(out); -+ -+ prt_str(out, "bytes raced: "); -+ prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9); -+ prt_newline(out); -+ -+ printbuf_indent_sub(out, 2); -+} -+ -+static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt) -+{ -+ struct moving_io *io; -+ -+ bch2_move_stats_to_text(out, ctxt->stats); -+ printbuf_indent_add(out, 2); -+ -+ prt_printf(out, "reads: ios %u/%u sectors %u/%u", -+ atomic_read(&ctxt->read_ios), -+ c->opts.move_ios_in_flight, -+ atomic_read(&ctxt->read_sectors), -+ c->opts.move_bytes_in_flight >> 9); -+ prt_newline(out); -+ -+ prt_printf(out, "writes: ios %u/%u sectors %u/%u", -+ atomic_read(&ctxt->write_ios), -+ c->opts.move_ios_in_flight, -+ atomic_read(&ctxt->write_sectors), -+ c->opts.move_bytes_in_flight >> 9); -+ prt_newline(out); -+ -+ printbuf_indent_add(out, 2); -+ -+ mutex_lock(&ctxt->lock); -+ list_for_each_entry(io, &ctxt->ios, io_list) -+ bch2_write_op_to_text(out, &io->write.op); -+ mutex_unlock(&ctxt->lock); -+ -+ printbuf_indent_sub(out, 4); -+} -+ -+void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ struct moving_context *ctxt; -+ -+ mutex_lock(&c->moving_context_lock); -+ list_for_each_entry(ctxt, &c->moving_context_list, list) -+ bch2_moving_ctxt_to_text(out, c, ctxt); -+ mutex_unlock(&c->moving_context_lock); -+} -+ -+void bch2_fs_move_init(struct bch_fs *c) -+{ -+ INIT_LIST_HEAD(&c->moving_context_list); -+ mutex_init(&c->moving_context_lock); -+} -diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h -new file mode 100644 -index 000000000000..07cf9d42643b ---- /dev/null -+++ b/fs/bcachefs/move.h -@@ -0,0 +1,139 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_MOVE_H -+#define _BCACHEFS_MOVE_H -+ -+#include "bbpos.h" -+#include "bcachefs_ioctl.h" -+#include "btree_iter.h" -+#include "buckets.h" -+#include "data_update.h" -+#include "move_types.h" -+ -+struct bch_read_bio; -+ -+struct moving_context { -+ struct btree_trans *trans; -+ struct list_head list; -+ void *fn; -+ -+ struct bch_ratelimit *rate; -+ struct bch_move_stats *stats; -+ struct write_point_specifier wp; -+ bool wait_on_copygc; -+ bool write_error; -+ -+ /* For waiting on outstanding reads and writes: */ -+ struct closure cl; -+ -+ struct mutex lock; -+ struct list_head reads; -+ struct list_head ios; -+ -+ /* in flight sectors: */ -+ atomic_t read_sectors; -+ atomic_t write_sectors; -+ atomic_t read_ios; -+ atomic_t write_ios; -+ -+ wait_queue_head_t wait; -+}; -+ -+#define move_ctxt_wait_event(_ctxt, _cond) \ -+do { \ -+ bool cond_finished = false; \ -+ bch2_moving_ctxt_do_pending_writes(_ctxt); \ -+ \ -+ if (_cond) \ -+ break; \ -+ bch2_trans_unlock_long((_ctxt)->trans); \ -+ __wait_event((_ctxt)->wait, \ -+ bch2_moving_ctxt_next_pending_write(_ctxt) || \ -+ (cond_finished = (_cond))); \ -+ if (cond_finished) \ -+ break; \ -+} while (1) -+ -+typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c, -+ struct bch_io_opts *, struct data_update_opts *); -+ -+void bch2_moving_ctxt_exit(struct moving_context *); -+void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *, -+ struct bch_ratelimit *, struct bch_move_stats *, -+ struct write_point_specifier, bool); -+struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *); -+void bch2_moving_ctxt_do_pending_writes(struct moving_context *); -+void bch2_move_ctxt_wait_for_io(struct moving_context *); -+int bch2_move_ratelimit(struct moving_context *); -+ -+/* Inodes in different snapshots may have different IO options: */ -+struct snapshot_io_opts_entry { -+ u32 snapshot; -+ struct bch_io_opts io_opts; -+}; -+ -+struct per_snapshot_io_opts { -+ u64 cur_inum; -+ struct bch_io_opts fs_io_opts; -+ DARRAY(struct snapshot_io_opts_entry) d; -+}; -+ -+static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c) -+{ -+ memset(io_opts, 0, sizeof(*io_opts)); -+ io_opts->fs_io_opts = bch2_opts_to_inode_opts(c->opts); -+} -+ -+static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts) -+{ -+ darray_exit(&io_opts->d); -+} -+ -+struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *, -+ struct per_snapshot_io_opts *, struct bkey_s_c); -+int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, struct bkey_s_c); -+ -+int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); -+ -+int bch2_move_extent(struct moving_context *, -+ struct move_bucket_in_flight *, -+ struct btree_iter *, -+ struct bkey_s_c, -+ struct bch_io_opts, -+ struct data_update_opts); -+ -+int __bch2_move_data(struct moving_context *, -+ struct bbpos, -+ struct bbpos, -+ move_pred_fn, void *); -+int bch2_move_data(struct bch_fs *, -+ struct bbpos start, -+ struct bbpos end, -+ struct bch_ratelimit *, -+ struct bch_move_stats *, -+ struct write_point_specifier, -+ bool, -+ move_pred_fn, void *); -+ -+int __bch2_evacuate_bucket(struct moving_context *, -+ struct move_bucket_in_flight *, -+ struct bpos, int, -+ struct data_update_opts); -+int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int, -+ struct data_update_opts, -+ struct bch_ratelimit *, -+ struct bch_move_stats *, -+ struct write_point_specifier, -+ bool); -+int bch2_data_job(struct bch_fs *, -+ struct bch_move_stats *, -+ struct bch_ioctl_data); -+ -+void bch2_move_stats_to_text(struct printbuf *, struct bch_move_stats *); -+void bch2_move_stats_exit(struct bch_move_stats *, struct bch_fs *); -+void bch2_move_stats_init(struct bch_move_stats *, char *); -+ -+void bch2_fs_moving_ctxts_to_text(struct printbuf *, struct bch_fs *); -+ -+void bch2_fs_move_init(struct bch_fs *); -+ -+#endif /* _BCACHEFS_MOVE_H */ -diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h -new file mode 100644 -index 000000000000..e22841ef31e4 ---- /dev/null -+++ b/fs/bcachefs/move_types.h -@@ -0,0 +1,36 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_MOVE_TYPES_H -+#define _BCACHEFS_MOVE_TYPES_H -+ -+#include "bbpos_types.h" -+ -+struct bch_move_stats { -+ enum bch_data_type data_type; -+ struct bbpos pos; -+ char name[32]; -+ -+ atomic64_t keys_moved; -+ atomic64_t keys_raced; -+ atomic64_t sectors_seen; -+ atomic64_t sectors_moved; -+ atomic64_t sectors_raced; -+}; -+ -+struct move_bucket_key { -+ struct bpos bucket; -+ u8 gen; -+}; -+ -+struct move_bucket { -+ struct move_bucket_key k; -+ unsigned sectors; -+}; -+ -+struct move_bucket_in_flight { -+ struct move_bucket_in_flight *next; -+ struct rhash_head hash; -+ struct move_bucket bucket; -+ atomic_t count; -+}; -+ -+#endif /* _BCACHEFS_MOVE_TYPES_H */ -diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c -new file mode 100644 -index 000000000000..0a0576326c5b ---- /dev/null -+++ b/fs/bcachefs/movinggc.c -@@ -0,0 +1,431 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Moving/copying garbage collector -+ * -+ * Copyright 2012 Google, Inc. -+ */ -+ -+#include "bcachefs.h" -+#include "alloc_background.h" -+#include "alloc_foreground.h" -+#include "btree_iter.h" -+#include "btree_update.h" -+#include "btree_write_buffer.h" -+#include "buckets.h" -+#include "clock.h" -+#include "errcode.h" -+#include "error.h" -+#include "lru.h" -+#include "move.h" -+#include "movinggc.h" -+#include "trace.h" -+ -+#include -+#include -+#include -+#include -+#include -+ -+struct buckets_in_flight { -+ struct rhashtable table; -+ struct move_bucket_in_flight *first; -+ struct move_bucket_in_flight *last; -+ size_t nr; -+ size_t sectors; -+}; -+ -+static const struct rhashtable_params bch_move_bucket_params = { -+ .head_offset = offsetof(struct move_bucket_in_flight, hash), -+ .key_offset = offsetof(struct move_bucket_in_flight, bucket.k), -+ .key_len = sizeof(struct move_bucket_key), -+}; -+ -+static struct move_bucket_in_flight * -+move_bucket_in_flight_add(struct buckets_in_flight *list, struct move_bucket b) -+{ -+ struct move_bucket_in_flight *new = kzalloc(sizeof(*new), GFP_KERNEL); -+ int ret; -+ -+ if (!new) -+ return ERR_PTR(-ENOMEM); -+ -+ new->bucket = b; -+ -+ ret = rhashtable_lookup_insert_fast(&list->table, &new->hash, -+ bch_move_bucket_params); -+ if (ret) { -+ kfree(new); -+ return ERR_PTR(ret); -+ } -+ -+ if (!list->first) -+ list->first = new; -+ else -+ list->last->next = new; -+ -+ list->last = new; -+ list->nr++; -+ list->sectors += b.sectors; -+ return new; -+} -+ -+static int bch2_bucket_is_movable(struct btree_trans *trans, -+ struct move_bucket *b, u64 time) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bch_alloc_v4 _a; -+ const struct bch_alloc_v4 *a; -+ int ret; -+ -+ if (bch2_bucket_is_open(trans->c, -+ b->k.bucket.inode, -+ b->k.bucket.offset)) -+ return 0; -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, -+ b->k.bucket, BTREE_ITER_CACHED); -+ ret = bkey_err(k); -+ if (ret) -+ return ret; -+ -+ a = bch2_alloc_to_v4(k, &_a); -+ b->k.gen = a->gen; -+ b->sectors = a->dirty_sectors; -+ -+ ret = data_type_movable(a->data_type) && -+ a->fragmentation_lru && -+ a->fragmentation_lru <= time; -+ -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static void move_buckets_wait(struct moving_context *ctxt, -+ struct buckets_in_flight *list, -+ bool flush) -+{ -+ struct move_bucket_in_flight *i; -+ int ret; -+ -+ while ((i = list->first)) { -+ if (flush) -+ move_ctxt_wait_event(ctxt, !atomic_read(&i->count)); -+ -+ if (atomic_read(&i->count)) -+ break; -+ -+ list->first = i->next; -+ if (!list->first) -+ list->last = NULL; -+ -+ list->nr--; -+ list->sectors -= i->bucket.sectors; -+ -+ ret = rhashtable_remove_fast(&list->table, &i->hash, -+ bch_move_bucket_params); -+ BUG_ON(ret); -+ kfree(i); -+ } -+ -+ bch2_trans_unlock_long(ctxt->trans); -+} -+ -+static bool bucket_in_flight(struct buckets_in_flight *list, -+ struct move_bucket_key k) -+{ -+ return rhashtable_lookup_fast(&list->table, &k, bch_move_bucket_params); -+} -+ -+typedef DARRAY(struct move_bucket) move_buckets; -+ -+static int bch2_copygc_get_buckets(struct moving_context *ctxt, -+ struct buckets_in_flight *buckets_in_flight, -+ move_buckets *buckets) -+{ -+ struct btree_trans *trans = ctxt->trans; -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4); -+ size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0; -+ int ret; -+ -+ move_buckets_wait(ctxt, buckets_in_flight, false); -+ -+ ret = bch2_btree_write_buffer_flush(trans); -+ if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_flush()", -+ __func__, bch2_err_str(ret))) -+ return ret; -+ -+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru, -+ lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0), -+ lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX), -+ 0, k, ({ -+ struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) }; -+ int ret2 = 0; -+ -+ saw++; -+ -+ if (!bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p))) -+ not_movable++; -+ else if (bucket_in_flight(buckets_in_flight, b.k)) -+ in_flight++; -+ else { -+ ret2 = darray_push(buckets, b) ?: buckets->nr >= nr_to_get; -+ if (ret2 >= 0) -+ sectors += b.sectors; -+ } -+ ret2; -+ })); -+ -+ pr_debug("have: %zu (%zu) saw %zu in flight %zu not movable %zu got %zu (%zu)/%zu buckets ret %i", -+ buckets_in_flight->nr, buckets_in_flight->sectors, -+ saw, in_flight, not_movable, buckets->nr, sectors, nr_to_get, ret); -+ -+ return ret < 0 ? ret : 0; -+} -+ -+noinline -+static int bch2_copygc(struct moving_context *ctxt, -+ struct buckets_in_flight *buckets_in_flight, -+ bool *did_work) -+{ -+ struct btree_trans *trans = ctxt->trans; -+ struct bch_fs *c = trans->c; -+ struct data_update_opts data_opts = { -+ .btree_insert_flags = BCH_WATERMARK_copygc, -+ }; -+ move_buckets buckets = { 0 }; -+ struct move_bucket_in_flight *f; -+ struct move_bucket *i; -+ u64 moved = atomic64_read(&ctxt->stats->sectors_moved); -+ int ret = 0; -+ -+ ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight, &buckets); -+ if (ret) -+ goto err; -+ -+ darray_for_each(buckets, i) { -+ if (unlikely(freezing(current))) -+ break; -+ -+ f = move_bucket_in_flight_add(buckets_in_flight, *i); -+ ret = PTR_ERR_OR_ZERO(f); -+ if (ret == -EEXIST) { /* rare race: copygc_get_buckets returned same bucket more than once */ -+ ret = 0; -+ continue; -+ } -+ if (ret == -ENOMEM) { /* flush IO, continue later */ -+ ret = 0; -+ break; -+ } -+ -+ ret = __bch2_evacuate_bucket(ctxt, f, f->bucket.k.bucket, -+ f->bucket.k.gen, data_opts); -+ if (ret) -+ goto err; -+ -+ *did_work = true; -+ } -+err: -+ darray_exit(&buckets); -+ -+ /* no entries in LRU btree found, or got to end: */ -+ if (bch2_err_matches(ret, ENOENT)) -+ ret = 0; -+ -+ if (ret < 0 && !bch2_err_matches(ret, EROFS)) -+ bch_err_msg(c, ret, "from bch2_move_data()"); -+ -+ moved = atomic64_read(&ctxt->stats->sectors_moved) - moved; -+ trace_and_count(c, copygc, c, moved, 0, 0, 0); -+ return ret; -+} -+ -+/* -+ * Copygc runs when the amount of fragmented data is above some arbitrary -+ * threshold: -+ * -+ * The threshold at the limit - when the device is full - is the amount of space -+ * we reserved in bch2_recalc_capacity; we can't have more than that amount of -+ * disk space stranded due to fragmentation and store everything we have -+ * promised to store. -+ * -+ * But we don't want to be running copygc unnecessarily when the device still -+ * has plenty of free space - rather, we want copygc to smoothly run every so -+ * often and continually reduce the amount of fragmented space as the device -+ * fills up. So, we increase the threshold by half the current free space. -+ */ -+unsigned long bch2_copygc_wait_amount(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ unsigned dev_idx; -+ s64 wait = S64_MAX, fragmented_allowed, fragmented; -+ unsigned i; -+ -+ for_each_rw_member(ca, c, dev_idx) { -+ struct bch_dev_usage usage = bch2_dev_usage_read(ca); -+ -+ fragmented_allowed = ((__dev_buckets_available(ca, usage, BCH_WATERMARK_stripe) * -+ ca->mi.bucket_size) >> 1); -+ fragmented = 0; -+ -+ for (i = 0; i < BCH_DATA_NR; i++) -+ if (data_type_movable(i)) -+ fragmented += usage.d[i].fragmented; -+ -+ wait = min(wait, max(0LL, fragmented_allowed - fragmented)); -+ } -+ -+ return wait; -+} -+ -+void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ prt_printf(out, "Currently waiting for: "); -+ prt_human_readable_u64(out, max(0LL, c->copygc_wait - -+ atomic64_read(&c->io_clock[WRITE].now)) << 9); -+ prt_newline(out); -+ -+ prt_printf(out, "Currently waiting since: "); -+ prt_human_readable_u64(out, max(0LL, -+ atomic64_read(&c->io_clock[WRITE].now) - -+ c->copygc_wait_at) << 9); -+ prt_newline(out); -+ -+ prt_printf(out, "Currently calculated wait: "); -+ prt_human_readable_u64(out, bch2_copygc_wait_amount(c)); -+ prt_newline(out); -+} -+ -+static int bch2_copygc_thread(void *arg) -+{ -+ struct bch_fs *c = arg; -+ struct moving_context ctxt; -+ struct bch_move_stats move_stats; -+ struct io_clock *clock = &c->io_clock[WRITE]; -+ struct buckets_in_flight *buckets; -+ u64 last, wait; -+ int ret = 0; -+ -+ buckets = kzalloc(sizeof(struct buckets_in_flight), GFP_KERNEL); -+ if (!buckets) -+ return -ENOMEM; -+ ret = rhashtable_init(&buckets->table, &bch_move_bucket_params); -+ if (ret) { -+ kfree(buckets); -+ bch_err_msg(c, ret, "allocating copygc buckets in flight"); -+ return ret; -+ } -+ -+ set_freezable(); -+ -+ bch2_move_stats_init(&move_stats, "copygc"); -+ bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats, -+ writepoint_ptr(&c->copygc_write_point), -+ false); -+ -+ while (!ret && !kthread_should_stop()) { -+ bool did_work = false; -+ -+ bch2_trans_unlock_long(ctxt.trans); -+ cond_resched(); -+ -+ if (!c->copy_gc_enabled) { -+ move_buckets_wait(&ctxt, buckets, true); -+ kthread_wait_freezable(c->copy_gc_enabled); -+ } -+ -+ if (unlikely(freezing(current))) { -+ move_buckets_wait(&ctxt, buckets, true); -+ __refrigerator(false); -+ continue; -+ } -+ -+ last = atomic64_read(&clock->now); -+ wait = bch2_copygc_wait_amount(c); -+ -+ if (wait > clock->max_slop) { -+ c->copygc_wait_at = last; -+ c->copygc_wait = last + wait; -+ move_buckets_wait(&ctxt, buckets, true); -+ trace_and_count(c, copygc_wait, c, wait, last + wait); -+ bch2_kthread_io_clock_wait(clock, last + wait, -+ MAX_SCHEDULE_TIMEOUT); -+ continue; -+ } -+ -+ c->copygc_wait = 0; -+ -+ c->copygc_running = true; -+ ret = bch2_copygc(&ctxt, buckets, &did_work); -+ c->copygc_running = false; -+ -+ wake_up(&c->copygc_running_wq); -+ -+ if (!wait && !did_work) { -+ u64 min_member_capacity = bch2_min_rw_member_capacity(c); -+ -+ if (min_member_capacity == U64_MAX) -+ min_member_capacity = 128 * 2048; -+ -+ bch2_trans_unlock_long(ctxt.trans); -+ bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6), -+ MAX_SCHEDULE_TIMEOUT); -+ } -+ } -+ -+ move_buckets_wait(&ctxt, buckets, true); -+ -+ rhashtable_destroy(&buckets->table); -+ kfree(buckets); -+ bch2_moving_ctxt_exit(&ctxt); -+ bch2_move_stats_exit(&move_stats, c); -+ -+ return 0; -+} -+ -+void bch2_copygc_stop(struct bch_fs *c) -+{ -+ if (c->copygc_thread) { -+ kthread_stop(c->copygc_thread); -+ put_task_struct(c->copygc_thread); -+ } -+ c->copygc_thread = NULL; -+} -+ -+int bch2_copygc_start(struct bch_fs *c) -+{ -+ struct task_struct *t; -+ int ret; -+ -+ if (c->copygc_thread) -+ return 0; -+ -+ if (c->opts.nochanges) -+ return 0; -+ -+ if (bch2_fs_init_fault("copygc_start")) -+ return -ENOMEM; -+ -+ t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name); -+ ret = PTR_ERR_OR_ZERO(t); -+ if (ret) { -+ bch_err_msg(c, ret, "creating copygc thread"); -+ return ret; -+ } -+ -+ get_task_struct(t); -+ -+ c->copygc_thread = t; -+ wake_up_process(c->copygc_thread); -+ -+ return 0; -+} -+ -+void bch2_fs_copygc_init(struct bch_fs *c) -+{ -+ init_waitqueue_head(&c->copygc_running_wq); -+ c->copygc_running = false; -+} -diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h -new file mode 100644 -index 000000000000..ea181fef5bc9 ---- /dev/null -+++ b/fs/bcachefs/movinggc.h -@@ -0,0 +1,12 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_MOVINGGC_H -+#define _BCACHEFS_MOVINGGC_H -+ -+unsigned long bch2_copygc_wait_amount(struct bch_fs *); -+void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *); -+ -+void bch2_copygc_stop(struct bch_fs *); -+int bch2_copygc_start(struct bch_fs *); -+void bch2_fs_copygc_init(struct bch_fs *); -+ -+#endif /* _BCACHEFS_MOVINGGC_H */ -diff --git a/fs/bcachefs/nocow_locking.c b/fs/bcachefs/nocow_locking.c -new file mode 100644 -index 000000000000..3c21981a4a1c ---- /dev/null -+++ b/fs/bcachefs/nocow_locking.c -@@ -0,0 +1,144 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "bkey_methods.h" -+#include "nocow_locking.h" -+#include "util.h" -+ -+#include -+ -+bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, struct bpos bucket) -+{ -+ u64 dev_bucket = bucket_to_u64(bucket); -+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); -+ unsigned i; -+ -+ for (i = 0; i < ARRAY_SIZE(l->b); i++) -+ if (l->b[i] == dev_bucket && atomic_read(&l->l[i])) -+ return true; -+ return false; -+} -+ -+#define sign(v) (v < 0 ? -1 : v > 0 ? 1 : 0) -+ -+void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos bucket, int flags) -+{ -+ u64 dev_bucket = bucket_to_u64(bucket); -+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); -+ int lock_val = flags ? 1 : -1; -+ unsigned i; -+ -+ for (i = 0; i < ARRAY_SIZE(l->b); i++) -+ if (l->b[i] == dev_bucket) { -+ int v = atomic_sub_return(lock_val, &l->l[i]); -+ -+ BUG_ON(v && sign(v) != lock_val); -+ if (!v) -+ closure_wake_up(&l->wait); -+ return; -+ } -+ -+ BUG(); -+} -+ -+bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l, -+ u64 dev_bucket, int flags) -+{ -+ int v, lock_val = flags ? 1 : -1; -+ unsigned i; -+ -+ spin_lock(&l->lock); -+ -+ for (i = 0; i < ARRAY_SIZE(l->b); i++) -+ if (l->b[i] == dev_bucket) -+ goto got_entry; -+ -+ for (i = 0; i < ARRAY_SIZE(l->b); i++) -+ if (!atomic_read(&l->l[i])) { -+ l->b[i] = dev_bucket; -+ goto take_lock; -+ } -+fail: -+ spin_unlock(&l->lock); -+ return false; -+got_entry: -+ v = atomic_read(&l->l[i]); -+ if (lock_val > 0 ? v < 0 : v > 0) -+ goto fail; -+take_lock: -+ v = atomic_read(&l->l[i]); -+ /* Overflow? */ -+ if (v && sign(v + lock_val) != sign(v)) -+ goto fail; -+ -+ atomic_add(lock_val, &l->l[i]); -+ spin_unlock(&l->lock); -+ return true; -+} -+ -+void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t, -+ struct nocow_lock_bucket *l, -+ u64 dev_bucket, int flags) -+{ -+ if (!__bch2_bucket_nocow_trylock(l, dev_bucket, flags)) { -+ struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks); -+ u64 start_time = local_clock(); -+ -+ __closure_wait_event(&l->wait, __bch2_bucket_nocow_trylock(l, dev_bucket, flags)); -+ bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time); -+ } -+} -+ -+void bch2_nocow_locks_to_text(struct printbuf *out, struct bucket_nocow_lock_table *t) -+ -+{ -+ unsigned i, nr_zero = 0; -+ struct nocow_lock_bucket *l; -+ -+ for (l = t->l; l < t->l + ARRAY_SIZE(t->l); l++) { -+ unsigned v = 0; -+ -+ for (i = 0; i < ARRAY_SIZE(l->l); i++) -+ v |= atomic_read(&l->l[i]); -+ -+ if (!v) { -+ nr_zero++; -+ continue; -+ } -+ -+ if (nr_zero) -+ prt_printf(out, "(%u empty entries)\n", nr_zero); -+ nr_zero = 0; -+ -+ for (i = 0; i < ARRAY_SIZE(l->l); i++) { -+ int v = atomic_read(&l->l[i]); -+ if (v) { -+ bch2_bpos_to_text(out, u64_to_bucket(l->b[i])); -+ prt_printf(out, ": %s %u ", v < 0 ? "copy" : "update", abs(v)); -+ } -+ } -+ prt_newline(out); -+ } -+ -+ if (nr_zero) -+ prt_printf(out, "(%u empty entries)\n", nr_zero); -+} -+ -+void bch2_fs_nocow_locking_exit(struct bch_fs *c) -+{ -+ struct bucket_nocow_lock_table *t = &c->nocow_locks; -+ -+ for (struct nocow_lock_bucket *l = t->l; l < t->l + ARRAY_SIZE(t->l); l++) -+ for (unsigned j = 0; j < ARRAY_SIZE(l->l); j++) -+ BUG_ON(atomic_read(&l->l[j])); -+} -+ -+int bch2_fs_nocow_locking_init(struct bch_fs *c) -+{ -+ struct bucket_nocow_lock_table *t = &c->nocow_locks; -+ -+ for (struct nocow_lock_bucket *l = t->l; l < t->l + ARRAY_SIZE(t->l); l++) -+ spin_lock_init(&l->lock); -+ -+ return 0; -+} -diff --git a/fs/bcachefs/nocow_locking.h b/fs/bcachefs/nocow_locking.h -new file mode 100644 -index 000000000000..f9d6a426a960 ---- /dev/null -+++ b/fs/bcachefs/nocow_locking.h -@@ -0,0 +1,50 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_NOCOW_LOCKING_H -+#define _BCACHEFS_NOCOW_LOCKING_H -+ -+#include "bcachefs.h" -+#include "alloc_background.h" -+#include "nocow_locking_types.h" -+ -+#include -+ -+static inline struct nocow_lock_bucket *bucket_nocow_lock(struct bucket_nocow_lock_table *t, -+ u64 dev_bucket) -+{ -+ unsigned h = hash_64(dev_bucket, BUCKET_NOCOW_LOCKS_BITS); -+ -+ return t->l + (h & (BUCKET_NOCOW_LOCKS - 1)); -+} -+ -+#define BUCKET_NOCOW_LOCK_UPDATE (1 << 0) -+ -+bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *, struct bpos); -+void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *, struct bpos, int); -+bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *, u64, int); -+void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *, -+ struct nocow_lock_bucket *, u64, int); -+ -+static inline void bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t, -+ struct bpos bucket, int flags) -+{ -+ u64 dev_bucket = bucket_to_u64(bucket); -+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); -+ -+ __bch2_bucket_nocow_lock(t, l, dev_bucket, flags); -+} -+ -+static inline bool bch2_bucket_nocow_trylock(struct bucket_nocow_lock_table *t, -+ struct bpos bucket, int flags) -+{ -+ u64 dev_bucket = bucket_to_u64(bucket); -+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); -+ -+ return __bch2_bucket_nocow_trylock(l, dev_bucket, flags); -+} -+ -+void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *); -+ -+void bch2_fs_nocow_locking_exit(struct bch_fs *); -+int bch2_fs_nocow_locking_init(struct bch_fs *); -+ -+#endif /* _BCACHEFS_NOCOW_LOCKING_H */ -diff --git a/fs/bcachefs/nocow_locking_types.h b/fs/bcachefs/nocow_locking_types.h -new file mode 100644 -index 000000000000..bd12bf677924 ---- /dev/null -+++ b/fs/bcachefs/nocow_locking_types.h -@@ -0,0 +1,20 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_NOCOW_LOCKING_TYPES_H -+#define _BCACHEFS_NOCOW_LOCKING_TYPES_H -+ -+#define BUCKET_NOCOW_LOCKS_BITS 10 -+#define BUCKET_NOCOW_LOCKS (1U << BUCKET_NOCOW_LOCKS_BITS) -+ -+struct nocow_lock_bucket { -+ struct closure_waitlist wait; -+ spinlock_t lock; -+ u64 b[4]; -+ atomic_t l[4]; -+} __aligned(SMP_CACHE_BYTES); -+ -+struct bucket_nocow_lock_table { -+ struct nocow_lock_bucket l[BUCKET_NOCOW_LOCKS]; -+}; -+ -+#endif /* _BCACHEFS_NOCOW_LOCKING_TYPES_H */ -+ -diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c -new file mode 100644 -index 000000000000..8dd4046cca41 ---- /dev/null -+++ b/fs/bcachefs/opts.c -@@ -0,0 +1,602 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include -+ -+#include "bcachefs.h" -+#include "compress.h" -+#include "disk_groups.h" -+#include "error.h" -+#include "opts.h" -+#include "super-io.h" -+#include "util.h" -+ -+#define x(t, n, ...) [n] = #t, -+ -+const char * const bch2_error_actions[] = { -+ BCH_ERROR_ACTIONS() -+ NULL -+}; -+ -+const char * const bch2_fsck_fix_opts[] = { -+ BCH_FIX_ERRORS_OPTS() -+ NULL -+}; -+ -+const char * const bch2_version_upgrade_opts[] = { -+ BCH_VERSION_UPGRADE_OPTS() -+ NULL -+}; -+ -+const char * const bch2_sb_features[] = { -+ BCH_SB_FEATURES() -+ NULL -+}; -+ -+const char * const bch2_sb_compat[] = { -+ BCH_SB_COMPAT() -+ NULL -+}; -+ -+const char * const __bch2_btree_ids[] = { -+ BCH_BTREE_IDS() -+ NULL -+}; -+ -+const char * const bch2_csum_types[] = { -+ BCH_CSUM_TYPES() -+ NULL -+}; -+ -+const char * const bch2_csum_opts[] = { -+ BCH_CSUM_OPTS() -+ NULL -+}; -+ -+const char * const bch2_compression_types[] = { -+ BCH_COMPRESSION_TYPES() -+ NULL -+}; -+ -+const char * const bch2_compression_opts[] = { -+ BCH_COMPRESSION_OPTS() -+ NULL -+}; -+ -+const char * const bch2_str_hash_types[] = { -+ BCH_STR_HASH_TYPES() -+ NULL -+}; -+ -+const char * const bch2_str_hash_opts[] = { -+ BCH_STR_HASH_OPTS() -+ NULL -+}; -+ -+const char * const bch2_data_types[] = { -+ BCH_DATA_TYPES() -+ NULL -+}; -+ -+const char * const bch2_member_states[] = { -+ BCH_MEMBER_STATES() -+ NULL -+}; -+ -+const char * const bch2_jset_entry_types[] = { -+ BCH_JSET_ENTRY_TYPES() -+ NULL -+}; -+ -+const char * const bch2_fs_usage_types[] = { -+ BCH_FS_USAGE_TYPES() -+ NULL -+}; -+ -+#undef x -+ -+static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res, -+ struct printbuf *err) -+{ -+ if (!val) { -+ *res = FSCK_FIX_yes; -+ } else { -+ int ret = match_string(bch2_fsck_fix_opts, -1, val); -+ -+ if (ret < 0 && err) -+ prt_str(err, "fix_errors: invalid selection"); -+ if (ret < 0) -+ return ret; -+ *res = ret; -+ } -+ -+ return 0; -+} -+ -+static void bch2_opt_fix_errors_to_text(struct printbuf *out, -+ struct bch_fs *c, -+ struct bch_sb *sb, -+ u64 v) -+{ -+ prt_str(out, bch2_fsck_fix_opts[v]); -+} -+ -+#define bch2_opt_fix_errors (struct bch_opt_fn) { \ -+ .parse = bch2_opt_fix_errors_parse, \ -+ .to_text = bch2_opt_fix_errors_to_text, \ -+} -+ -+const char * const bch2_d_types[BCH_DT_MAX] = { -+ [DT_UNKNOWN] = "unknown", -+ [DT_FIFO] = "fifo", -+ [DT_CHR] = "chr", -+ [DT_DIR] = "dir", -+ [DT_BLK] = "blk", -+ [DT_REG] = "reg", -+ [DT_LNK] = "lnk", -+ [DT_SOCK] = "sock", -+ [DT_WHT] = "whiteout", -+ [DT_SUBVOL] = "subvol", -+}; -+ -+u64 BCH2_NO_SB_OPT(const struct bch_sb *sb) -+{ -+ BUG(); -+} -+ -+void SET_BCH2_NO_SB_OPT(struct bch_sb *sb, u64 v) -+{ -+ BUG(); -+} -+ -+void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src) -+{ -+#define x(_name, ...) \ -+ if (opt_defined(src, _name)) \ -+ opt_set(*dst, _name, src._name); -+ -+ BCH_OPTS() -+#undef x -+} -+ -+bool bch2_opt_defined_by_id(const struct bch_opts *opts, enum bch_opt_id id) -+{ -+ switch (id) { -+#define x(_name, ...) \ -+ case Opt_##_name: \ -+ return opt_defined(*opts, _name); -+ BCH_OPTS() -+#undef x -+ default: -+ BUG(); -+ } -+} -+ -+u64 bch2_opt_get_by_id(const struct bch_opts *opts, enum bch_opt_id id) -+{ -+ switch (id) { -+#define x(_name, ...) \ -+ case Opt_##_name: \ -+ return opts->_name; -+ BCH_OPTS() -+#undef x -+ default: -+ BUG(); -+ } -+} -+ -+void bch2_opt_set_by_id(struct bch_opts *opts, enum bch_opt_id id, u64 v) -+{ -+ switch (id) { -+#define x(_name, ...) \ -+ case Opt_##_name: \ -+ opt_set(*opts, _name, v); \ -+ break; -+ BCH_OPTS() -+#undef x -+ default: -+ BUG(); -+ } -+} -+ -+const struct bch_option bch2_opt_table[] = { -+#define OPT_BOOL() .type = BCH_OPT_BOOL, .min = 0, .max = 2 -+#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, \ -+ .min = _min, .max = _max -+#define OPT_STR(_choices) .type = BCH_OPT_STR, \ -+ .min = 0, .max = ARRAY_SIZE(_choices), \ -+ .choices = _choices -+#define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn -+ -+#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \ -+ [Opt_##_name] = { \ -+ .attr = { \ -+ .name = #_name, \ -+ .mode = (_flags) & OPT_RUNTIME ? 0644 : 0444, \ -+ }, \ -+ .flags = _flags, \ -+ .hint = _hint, \ -+ .help = _help, \ -+ .get_sb = _sb_opt, \ -+ .set_sb = SET_##_sb_opt, \ -+ _type \ -+ }, -+ -+ BCH_OPTS() -+#undef x -+}; -+ -+int bch2_opt_lookup(const char *name) -+{ -+ const struct bch_option *i; -+ -+ for (i = bch2_opt_table; -+ i < bch2_opt_table + ARRAY_SIZE(bch2_opt_table); -+ i++) -+ if (!strcmp(name, i->attr.name)) -+ return i - bch2_opt_table; -+ -+ return -1; -+} -+ -+struct synonym { -+ const char *s1, *s2; -+}; -+ -+static const struct synonym bch_opt_synonyms[] = { -+ { "quota", "usrquota" }, -+}; -+ -+static int bch2_mount_opt_lookup(const char *name) -+{ -+ const struct synonym *i; -+ -+ for (i = bch_opt_synonyms; -+ i < bch_opt_synonyms + ARRAY_SIZE(bch_opt_synonyms); -+ i++) -+ if (!strcmp(name, i->s1)) -+ name = i->s2; -+ -+ return bch2_opt_lookup(name); -+} -+ -+int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err) -+{ -+ if (v < opt->min) { -+ if (err) -+ prt_printf(err, "%s: too small (min %llu)", -+ opt->attr.name, opt->min); -+ return -BCH_ERR_ERANGE_option_too_small; -+ } -+ -+ if (opt->max && v >= opt->max) { -+ if (err) -+ prt_printf(err, "%s: too big (max %llu)", -+ opt->attr.name, opt->max); -+ return -BCH_ERR_ERANGE_option_too_big; -+ } -+ -+ if ((opt->flags & OPT_SB_FIELD_SECTORS) && (v & 511)) { -+ if (err) -+ prt_printf(err, "%s: not a multiple of 512", -+ opt->attr.name); -+ return -EINVAL; -+ } -+ -+ if ((opt->flags & OPT_MUST_BE_POW_2) && !is_power_of_2(v)) { -+ if (err) -+ prt_printf(err, "%s: must be a power of two", -+ opt->attr.name); -+ return -EINVAL; -+ } -+ -+ if (opt->fn.validate) -+ return opt->fn.validate(v, err); -+ -+ return 0; -+} -+ -+int bch2_opt_parse(struct bch_fs *c, -+ const struct bch_option *opt, -+ const char *val, u64 *res, -+ struct printbuf *err) -+{ -+ ssize_t ret; -+ -+ switch (opt->type) { -+ case BCH_OPT_BOOL: -+ if (val) { -+ ret = kstrtou64(val, 10, res); -+ } else { -+ ret = 0; -+ *res = 1; -+ } -+ -+ if (ret < 0 || (*res != 0 && *res != 1)) { -+ if (err) -+ prt_printf(err, "%s: must be bool", opt->attr.name); -+ return ret; -+ } -+ break; -+ case BCH_OPT_UINT: -+ if (!val) { -+ prt_printf(err, "%s: required value", -+ opt->attr.name); -+ return -EINVAL; -+ } -+ -+ ret = opt->flags & OPT_HUMAN_READABLE -+ ? bch2_strtou64_h(val, res) -+ : kstrtou64(val, 10, res); -+ if (ret < 0) { -+ if (err) -+ prt_printf(err, "%s: must be a number", -+ opt->attr.name); -+ return ret; -+ } -+ break; -+ case BCH_OPT_STR: -+ if (!val) { -+ prt_printf(err, "%s: required value", -+ opt->attr.name); -+ return -EINVAL; -+ } -+ -+ ret = match_string(opt->choices, -1, val); -+ if (ret < 0) { -+ if (err) -+ prt_printf(err, "%s: invalid selection", -+ opt->attr.name); -+ return ret; -+ } -+ -+ *res = ret; -+ break; -+ case BCH_OPT_FN: -+ ret = opt->fn.parse(c, val, res, err); -+ if (ret < 0) { -+ if (err) -+ prt_printf(err, "%s: parse error", -+ opt->attr.name); -+ return ret; -+ } -+ } -+ -+ return bch2_opt_validate(opt, *res, err); -+} -+ -+void bch2_opt_to_text(struct printbuf *out, -+ struct bch_fs *c, struct bch_sb *sb, -+ const struct bch_option *opt, u64 v, -+ unsigned flags) -+{ -+ if (flags & OPT_SHOW_MOUNT_STYLE) { -+ if (opt->type == BCH_OPT_BOOL) { -+ prt_printf(out, "%s%s", -+ v ? "" : "no", -+ opt->attr.name); -+ return; -+ } -+ -+ prt_printf(out, "%s=", opt->attr.name); -+ } -+ -+ switch (opt->type) { -+ case BCH_OPT_BOOL: -+ case BCH_OPT_UINT: -+ if (opt->flags & OPT_HUMAN_READABLE) -+ prt_human_readable_u64(out, v); -+ else -+ prt_printf(out, "%lli", v); -+ break; -+ case BCH_OPT_STR: -+ if (flags & OPT_SHOW_FULL_LIST) -+ prt_string_option(out, opt->choices, v); -+ else -+ prt_str(out, opt->choices[v]); -+ break; -+ case BCH_OPT_FN: -+ opt->fn.to_text(out, c, sb, v); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v) -+{ -+ int ret = 0; -+ -+ switch (id) { -+ case Opt_compression: -+ case Opt_background_compression: -+ ret = bch2_check_set_has_compressed_data(c, v); -+ break; -+ case Opt_erasure_code: -+ if (v) -+ bch2_check_set_feature(c, BCH_FEATURE_ec); -+ break; -+ } -+ -+ return ret; -+} -+ -+int bch2_opts_check_may_set(struct bch_fs *c) -+{ -+ unsigned i; -+ int ret; -+ -+ for (i = 0; i < bch2_opts_nr; i++) { -+ ret = bch2_opt_check_may_set(c, i, -+ bch2_opt_get_by_id(&c->opts, i)); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts, -+ char *options) -+{ -+ char *copied_opts, *copied_opts_start; -+ char *opt, *name, *val; -+ int ret, id; -+ struct printbuf err = PRINTBUF; -+ u64 v; -+ -+ if (!options) -+ return 0; -+ -+ /* -+ * sys_fsconfig() is now occasionally providing us with option lists -+ * starting with a comma - weird. -+ */ -+ if (*options == ',') -+ options++; -+ -+ copied_opts = kstrdup(options, GFP_KERNEL); -+ if (!copied_opts) -+ return -1; -+ copied_opts_start = copied_opts; -+ -+ while ((opt = strsep(&copied_opts, ",")) != NULL) { -+ name = strsep(&opt, "="); -+ val = opt; -+ -+ id = bch2_mount_opt_lookup(name); -+ -+ /* Check for the form "noopt", negation of a boolean opt: */ -+ if (id < 0 && -+ !val && -+ !strncmp("no", name, 2)) { -+ id = bch2_mount_opt_lookup(name + 2); -+ val = "0"; -+ } -+ -+ /* Unknown options are ignored: */ -+ if (id < 0) -+ continue; -+ -+ if (!(bch2_opt_table[id].flags & OPT_MOUNT)) -+ goto bad_opt; -+ -+ if (id == Opt_acl && -+ !IS_ENABLED(CONFIG_BCACHEFS_POSIX_ACL)) -+ goto bad_opt; -+ -+ if ((id == Opt_usrquota || -+ id == Opt_grpquota) && -+ !IS_ENABLED(CONFIG_BCACHEFS_QUOTA)) -+ goto bad_opt; -+ -+ ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err); -+ if (ret < 0) -+ goto bad_val; -+ -+ bch2_opt_set_by_id(opts, id, v); -+ } -+ -+ ret = 0; -+ goto out; -+ -+bad_opt: -+ pr_err("Bad mount option %s", name); -+ ret = -1; -+ goto out; -+bad_val: -+ pr_err("Invalid mount option %s", err.buf); -+ ret = -1; -+ goto out; -+out: -+ kfree(copied_opts_start); -+ printbuf_exit(&err); -+ return ret; -+} -+ -+u64 bch2_opt_from_sb(struct bch_sb *sb, enum bch_opt_id id) -+{ -+ const struct bch_option *opt = bch2_opt_table + id; -+ u64 v; -+ -+ v = opt->get_sb(sb); -+ -+ if (opt->flags & OPT_SB_FIELD_ILOG2) -+ v = 1ULL << v; -+ -+ if (opt->flags & OPT_SB_FIELD_SECTORS) -+ v <<= 9; -+ -+ return v; -+} -+ -+/* -+ * Initial options from superblock - here we don't want any options undefined, -+ * any options the superblock doesn't specify are set to 0: -+ */ -+int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb) -+{ -+ unsigned id; -+ -+ for (id = 0; id < bch2_opts_nr; id++) { -+ const struct bch_option *opt = bch2_opt_table + id; -+ -+ if (opt->get_sb == BCH2_NO_SB_OPT) -+ continue; -+ -+ bch2_opt_set_by_id(opts, id, bch2_opt_from_sb(sb, id)); -+ } -+ -+ return 0; -+} -+ -+void __bch2_opt_set_sb(struct bch_sb *sb, const struct bch_option *opt, u64 v) -+{ -+ if (opt->set_sb == SET_BCH2_NO_SB_OPT) -+ return; -+ -+ if (opt->flags & OPT_SB_FIELD_SECTORS) -+ v >>= 9; -+ -+ if (opt->flags & OPT_SB_FIELD_ILOG2) -+ v = ilog2(v); -+ -+ opt->set_sb(sb, v); -+} -+ -+void bch2_opt_set_sb(struct bch_fs *c, const struct bch_option *opt, u64 v) -+{ -+ if (opt->set_sb == SET_BCH2_NO_SB_OPT) -+ return; -+ -+ mutex_lock(&c->sb_lock); -+ __bch2_opt_set_sb(c->disk_sb.sb, opt, v); -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+} -+ -+/* io opts: */ -+ -+struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts src) -+{ -+ return (struct bch_io_opts) { -+#define x(_name, _bits) ._name = src._name, -+ BCH_INODE_OPTS() -+#undef x -+ }; -+} -+ -+bool bch2_opt_is_inode_opt(enum bch_opt_id id) -+{ -+ static const enum bch_opt_id inode_opt_list[] = { -+#define x(_name, _bits) Opt_##_name, -+ BCH_INODE_OPTS() -+#undef x -+ }; -+ unsigned i; -+ -+ for (i = 0; i < ARRAY_SIZE(inode_opt_list); i++) -+ if (inode_opt_list[i] == id) -+ return true; -+ -+ return false; -+} -diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h -new file mode 100644 -index 000000000000..8526f177450a ---- /dev/null -+++ b/fs/bcachefs/opts.h -@@ -0,0 +1,564 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_OPTS_H -+#define _BCACHEFS_OPTS_H -+ -+#include -+#include -+#include -+#include -+#include "bcachefs_format.h" -+ -+struct bch_fs; -+ -+extern const char * const bch2_error_actions[]; -+extern const char * const bch2_fsck_fix_opts[]; -+extern const char * const bch2_version_upgrade_opts[]; -+extern const char * const bch2_sb_features[]; -+extern const char * const bch2_sb_compat[]; -+extern const char * const __bch2_btree_ids[]; -+extern const char * const bch2_csum_types[]; -+extern const char * const bch2_csum_opts[]; -+extern const char * const bch2_compression_types[]; -+extern const char * const bch2_compression_opts[]; -+extern const char * const bch2_str_hash_types[]; -+extern const char * const bch2_str_hash_opts[]; -+extern const char * const bch2_data_types[]; -+extern const char * const bch2_member_states[]; -+extern const char * const bch2_jset_entry_types[]; -+extern const char * const bch2_fs_usage_types[]; -+extern const char * const bch2_d_types[]; -+ -+static inline const char *bch2_d_type_str(unsigned d_type) -+{ -+ return (d_type < BCH_DT_MAX ? bch2_d_types[d_type] : NULL) ?: "(bad d_type)"; -+} -+ -+/* -+ * Mount options; we also store defaults in the superblock. -+ * -+ * Also exposed via sysfs: if an option is writeable, and it's also stored in -+ * the superblock, changing it via sysfs (currently? might change this) also -+ * updates the superblock. -+ * -+ * We store options as signed integers, where -1 means undefined. This means we -+ * can pass the mount options to bch2_fs_alloc() as a whole struct, and then only -+ * apply the options from that struct that are defined. -+ */ -+ -+/* dummy option, for options that aren't stored in the superblock */ -+u64 BCH2_NO_SB_OPT(const struct bch_sb *); -+void SET_BCH2_NO_SB_OPT(struct bch_sb *, u64); -+ -+/* When can be set: */ -+enum opt_flags { -+ OPT_FS = (1 << 0), /* Filesystem option */ -+ OPT_DEVICE = (1 << 1), /* Device option */ -+ OPT_INODE = (1 << 2), /* Inode option */ -+ OPT_FORMAT = (1 << 3), /* May be specified at format time */ -+ OPT_MOUNT = (1 << 4), /* May be specified at mount time */ -+ OPT_RUNTIME = (1 << 5), /* May be specified at runtime */ -+ OPT_HUMAN_READABLE = (1 << 6), -+ OPT_MUST_BE_POW_2 = (1 << 7), /* Must be power of 2 */ -+ OPT_SB_FIELD_SECTORS = (1 << 8),/* Superblock field is >> 9 of actual value */ -+ OPT_SB_FIELD_ILOG2 = (1 << 9), /* Superblock field is ilog2 of actual value */ -+}; -+ -+enum opt_type { -+ BCH_OPT_BOOL, -+ BCH_OPT_UINT, -+ BCH_OPT_STR, -+ BCH_OPT_FN, -+}; -+ -+struct bch_opt_fn { -+ int (*parse)(struct bch_fs *, const char *, u64 *, struct printbuf *); -+ void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); -+ int (*validate)(u64, struct printbuf *); -+}; -+ -+/** -+ * x(name, shortopt, type, in mem type, mode, sb_opt) -+ * -+ * @name - name of mount option, sysfs attribute, and struct bch_opts -+ * member -+ * -+ * @mode - when opt may be set -+ * -+ * @sb_option - name of corresponding superblock option -+ * -+ * @type - one of OPT_BOOL, OPT_UINT, OPT_STR -+ */ -+ -+/* -+ * XXX: add fields for -+ * - default value -+ * - helptext -+ */ -+ -+#ifdef __KERNEL__ -+#define RATELIMIT_ERRORS_DEFAULT true -+#else -+#define RATELIMIT_ERRORS_DEFAULT false -+#endif -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+#define BCACHEFS_VERBOSE_DEFAULT true -+#else -+#define BCACHEFS_VERBOSE_DEFAULT false -+#endif -+ -+#define BCH_FIX_ERRORS_OPTS() \ -+ x(exit, 0) \ -+ x(yes, 1) \ -+ x(no, 2) \ -+ x(ask, 3) -+ -+enum fsck_err_opts { -+#define x(t, n) FSCK_FIX_##t, -+ BCH_FIX_ERRORS_OPTS() -+#undef x -+}; -+ -+#define BCH_OPTS() \ -+ x(block_size, u16, \ -+ OPT_FS|OPT_FORMAT| \ -+ OPT_HUMAN_READABLE|OPT_MUST_BE_POW_2|OPT_SB_FIELD_SECTORS, \ -+ OPT_UINT(512, 1U << 16), \ -+ BCH_SB_BLOCK_SIZE, 8, \ -+ "size", NULL) \ -+ x(btree_node_size, u32, \ -+ OPT_FS|OPT_FORMAT| \ -+ OPT_HUMAN_READABLE|OPT_MUST_BE_POW_2|OPT_SB_FIELD_SECTORS, \ -+ OPT_UINT(512, 1U << 20), \ -+ BCH_SB_BTREE_NODE_SIZE, 512, \ -+ "size", "Btree node size, default 256k") \ -+ x(errors, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_STR(bch2_error_actions), \ -+ BCH_SB_ERROR_ACTION, BCH_ON_ERROR_ro, \ -+ NULL, "Action to take on filesystem error") \ -+ x(metadata_replicas, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_UINT(1, BCH_REPLICAS_MAX), \ -+ BCH_SB_META_REPLICAS_WANT, 1, \ -+ "#", "Number of metadata replicas") \ -+ x(data_replicas, u8, \ -+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_UINT(1, BCH_REPLICAS_MAX), \ -+ BCH_SB_DATA_REPLICAS_WANT, 1, \ -+ "#", "Number of data replicas") \ -+ x(metadata_replicas_required, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \ -+ OPT_UINT(1, BCH_REPLICAS_MAX), \ -+ BCH_SB_META_REPLICAS_REQ, 1, \ -+ "#", NULL) \ -+ x(data_replicas_required, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \ -+ OPT_UINT(1, BCH_REPLICAS_MAX), \ -+ BCH_SB_DATA_REPLICAS_REQ, 1, \ -+ "#", NULL) \ -+ x(encoded_extent_max, u32, \ -+ OPT_FS|OPT_FORMAT| \ -+ OPT_HUMAN_READABLE|OPT_MUST_BE_POW_2|OPT_SB_FIELD_SECTORS|OPT_SB_FIELD_ILOG2,\ -+ OPT_UINT(4096, 2U << 20), \ -+ BCH_SB_ENCODED_EXTENT_MAX_BITS, 64 << 10, \ -+ "size", "Maximum size of checksummed/compressed extents")\ -+ x(metadata_checksum, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_STR(bch2_csum_opts), \ -+ BCH_SB_META_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \ -+ NULL, NULL) \ -+ x(data_checksum, u8, \ -+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_STR(bch2_csum_opts), \ -+ BCH_SB_DATA_CSUM_TYPE, BCH_CSUM_OPT_crc32c, \ -+ NULL, NULL) \ -+ x(compression, u8, \ -+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_FN(bch2_opt_compression), \ -+ BCH_SB_COMPRESSION_TYPE, BCH_COMPRESSION_OPT_none, \ -+ NULL, NULL) \ -+ x(background_compression, u8, \ -+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_FN(bch2_opt_compression), \ -+ BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_none, \ -+ NULL, NULL) \ -+ x(str_hash, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_STR(bch2_str_hash_opts), \ -+ BCH_SB_STR_HASH_TYPE, BCH_STR_HASH_OPT_siphash, \ -+ NULL, "Hash function for directory entries and xattrs")\ -+ x(metadata_target, u16, \ -+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_FN(bch2_opt_target), \ -+ BCH_SB_METADATA_TARGET, 0, \ -+ "(target)", "Device or label for metadata writes") \ -+ x(foreground_target, u16, \ -+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_FN(bch2_opt_target), \ -+ BCH_SB_FOREGROUND_TARGET, 0, \ -+ "(target)", "Device or label for foreground writes") \ -+ x(background_target, u16, \ -+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_FN(bch2_opt_target), \ -+ BCH_SB_BACKGROUND_TARGET, 0, \ -+ "(target)", "Device or label to move data to in the background")\ -+ x(promote_target, u16, \ -+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_FN(bch2_opt_target), \ -+ BCH_SB_PROMOTE_TARGET, 0, \ -+ "(target)", "Device or label to promote data to on read") \ -+ x(erasure_code, u16, \ -+ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_BOOL(), \ -+ BCH_SB_ERASURE_CODE, false, \ -+ NULL, "Enable erasure coding (DO NOT USE YET)") \ -+ x(inodes_32bit, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_BOOL(), \ -+ BCH_SB_INODE_32BIT, true, \ -+ NULL, "Constrain inode numbers to 32 bits") \ -+ x(shard_inode_numbers, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_BOOL(), \ -+ BCH_SB_SHARD_INUMS, true, \ -+ NULL, "Shard new inode numbers by CPU id") \ -+ x(inodes_use_key_cache, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH_SB_INODES_USE_KEY_CACHE, true, \ -+ NULL, "Use the btree key cache for the inodes btree") \ -+ x(btree_node_mem_ptr_optimization, u8, \ -+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, true, \ -+ NULL, "Stash pointer to in memory btree node in btree ptr")\ -+ x(btree_write_buffer_size, u32, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_UINT(16, (1U << 20) - 1), \ -+ BCH2_NO_SB_OPT, 1U << 13, \ -+ NULL, "Number of btree write buffer entries") \ -+ x(gc_reserve_percent, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_UINT(5, 21), \ -+ BCH_SB_GC_RESERVE, 8, \ -+ "%", "Percentage of disk space to reserve for copygc")\ -+ x(gc_reserve_bytes, u64, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME| \ -+ OPT_HUMAN_READABLE|OPT_SB_FIELD_SECTORS, \ -+ OPT_UINT(0, U64_MAX), \ -+ BCH_SB_GC_RESERVE_BYTES, 0, \ -+ "%", "Amount of disk space to reserve for copygc\n" \ -+ "Takes precedence over gc_reserve_percent if set")\ -+ x(root_reserve_percent, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \ -+ OPT_UINT(0, 100), \ -+ BCH_SB_ROOT_RESERVE, 0, \ -+ "%", "Percentage of disk space to reserve for superuser")\ -+ x(wide_macs, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_BOOL(), \ -+ BCH_SB_128_BIT_MACS, false, \ -+ NULL, "Store full 128 bits of cryptographic MACs, instead of 80")\ -+ x(inline_data, u8, \ -+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, true, \ -+ NULL, "Enable inline data extents") \ -+ x(acl, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH_SB_POSIX_ACL, true, \ -+ NULL, "Enable POSIX acls") \ -+ x(usrquota, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH_SB_USRQUOTA, false, \ -+ NULL, "Enable user quotas") \ -+ x(grpquota, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH_SB_GRPQUOTA, false, \ -+ NULL, "Enable group quotas") \ -+ x(prjquota, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH_SB_PRJQUOTA, false, \ -+ NULL, "Enable project quotas") \ -+ x(degraded, u8, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, "Allow mounting in degraded mode") \ -+ x(very_degraded, u8, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, "Allow mounting in when data will be missing") \ -+ x(discard, u8, \ -+ OPT_FS|OPT_MOUNT|OPT_DEVICE, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, true, \ -+ NULL, "Enable discard/TRIM support") \ -+ x(verbose, u8, \ -+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, BCACHEFS_VERBOSE_DEFAULT, \ -+ NULL, "Extra debugging information during mount/recovery")\ -+ x(journal_flush_delay, u32, \ -+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_UINT(1, U32_MAX), \ -+ BCH_SB_JOURNAL_FLUSH_DELAY, 1000, \ -+ NULL, "Delay in milliseconds before automatic journal commits")\ -+ x(journal_flush_disabled, u8, \ -+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_BOOL(), \ -+ BCH_SB_JOURNAL_FLUSH_DISABLED,false, \ -+ NULL, "Disable journal flush on sync/fsync\n" \ -+ "If enabled, writes can be lost, but only since the\n"\ -+ "last journal write (default 1 second)") \ -+ x(journal_reclaim_delay, u32, \ -+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_UINT(0, U32_MAX), \ -+ BCH_SB_JOURNAL_RECLAIM_DELAY, 100, \ -+ NULL, "Delay in milliseconds before automatic journal reclaim")\ -+ x(move_bytes_in_flight, u32, \ -+ OPT_HUMAN_READABLE|OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_UINT(1024, U32_MAX), \ -+ BCH2_NO_SB_OPT, 1U << 20, \ -+ NULL, "Maximum Amount of IO to keep in flight by the move path")\ -+ x(move_ios_in_flight, u32, \ -+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_UINT(1, 1024), \ -+ BCH2_NO_SB_OPT, 32, \ -+ NULL, "Maximum number of IOs to keep in flight by the move path")\ -+ x(fsck, u8, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, "Run fsck on mount") \ -+ x(fix_errors, u8, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_FN(bch2_opt_fix_errors), \ -+ BCH2_NO_SB_OPT, FSCK_FIX_exit, \ -+ NULL, "Fix errors during fsck without asking") \ -+ x(ratelimit_errors, u8, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, RATELIMIT_ERRORS_DEFAULT, \ -+ NULL, "Ratelimit error messages during fsck") \ -+ x(nochanges, u8, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, "Super read only mode - no writes at all will be issued,\n"\ -+ "even if we have to replay the journal") \ -+ x(norecovery, u8, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, "Don't replay the journal") \ -+ x(keep_journal, u8, \ -+ 0, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, "Don't free journal entries/keys after startup")\ -+ x(read_entire_journal, u8, \ -+ 0, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, "Read all journal entries, not just dirty ones")\ -+ x(read_journal_only, u8, \ -+ 0, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, "Only read the journal, skip the rest of recovery")\ -+ x(journal_transaction_names, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ -+ OPT_BOOL(), \ -+ BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \ -+ NULL, "Log transaction function names in journal") \ -+ x(noexcl, u8, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, "Don't open device in exclusive mode") \ -+ x(direct_io, u8, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, true, \ -+ NULL, "Use O_DIRECT (userspace only)") \ -+ x(sb, u64, \ -+ OPT_MOUNT, \ -+ OPT_UINT(0, S64_MAX), \ -+ BCH2_NO_SB_OPT, BCH_SB_SECTOR, \ -+ "offset", "Sector offset of superblock") \ -+ x(read_only, u8, \ -+ OPT_FS, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, NULL) \ -+ x(nostart, u8, \ -+ 0, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, "Don\'t start filesystem, only open devices") \ -+ x(reconstruct_alloc, u8, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, "Reconstruct alloc btree") \ -+ x(version_upgrade, u8, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_STR(bch2_version_upgrade_opts), \ -+ BCH_SB_VERSION_UPGRADE, BCH_VERSION_UPGRADE_compatible, \ -+ NULL, "Set superblock to latest version,\n" \ -+ "allowing any new features to be used") \ -+ x(buckets_nouse, u8, \ -+ 0, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, "Allocate the buckets_nouse bitmap") \ -+ x(project, u8, \ -+ OPT_INODE, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, NULL) \ -+ x(nocow, u8, \ -+ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ -+ OPT_BOOL(), \ -+ BCH_SB_NOCOW, false, \ -+ NULL, "Nocow mode: Writes will be done in place when possible.\n"\ -+ "Snapshots and reflink will still caused writes to be COW\n"\ -+ "Implicitly disables data checksumming, compression and encryption")\ -+ x(nocow_enabled, u8, \ -+ OPT_FS|OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, true, \ -+ NULL, "Enable nocow mode: enables runtime locking in\n"\ -+ "data move path needed if nocow will ever be in use\n")\ -+ x(no_data_io, u8, \ -+ OPT_MOUNT, \ -+ OPT_BOOL(), \ -+ BCH2_NO_SB_OPT, false, \ -+ NULL, "Skip submit_bio() for data reads and writes, " \ -+ "for performance testing purposes") \ -+ x(fs_size, u64, \ -+ OPT_DEVICE, \ -+ OPT_UINT(0, S64_MAX), \ -+ BCH2_NO_SB_OPT, 0, \ -+ "size", "Size of filesystem on device") \ -+ x(bucket, u32, \ -+ OPT_DEVICE, \ -+ OPT_UINT(0, S64_MAX), \ -+ BCH2_NO_SB_OPT, 0, \ -+ "size", "Size of filesystem on device") \ -+ x(durability, u8, \ -+ OPT_DEVICE, \ -+ OPT_UINT(0, BCH_REPLICAS_MAX), \ -+ BCH2_NO_SB_OPT, 1, \ -+ "n", "Data written to this device will be considered\n"\ -+ "to have already been replicated n times") -+ -+struct bch_opts { -+#define x(_name, _bits, ...) unsigned _name##_defined:1; -+ BCH_OPTS() -+#undef x -+ -+#define x(_name, _bits, ...) _bits _name; -+ BCH_OPTS() -+#undef x -+}; -+ -+static const __maybe_unused struct bch_opts bch2_opts_default = { -+#define x(_name, _bits, _mode, _type, _sb_opt, _default, ...) \ -+ ._name##_defined = true, \ -+ ._name = _default, \ -+ -+ BCH_OPTS() -+#undef x -+}; -+ -+#define opt_defined(_opts, _name) ((_opts)._name##_defined) -+ -+#define opt_get(_opts, _name) \ -+ (opt_defined(_opts, _name) ? (_opts)._name : bch2_opts_default._name) -+ -+#define opt_set(_opts, _name, _v) \ -+do { \ -+ (_opts)._name##_defined = true; \ -+ (_opts)._name = _v; \ -+} while (0) -+ -+static inline struct bch_opts bch2_opts_empty(void) -+{ -+ return (struct bch_opts) { 0 }; -+} -+ -+void bch2_opts_apply(struct bch_opts *, struct bch_opts); -+ -+enum bch_opt_id { -+#define x(_name, ...) Opt_##_name, -+ BCH_OPTS() -+#undef x -+ bch2_opts_nr -+}; -+ -+struct bch_fs; -+struct printbuf; -+ -+struct bch_option { -+ struct attribute attr; -+ u64 (*get_sb)(const struct bch_sb *); -+ void (*set_sb)(struct bch_sb *, u64); -+ enum opt_type type; -+ enum opt_flags flags; -+ u64 min, max; -+ -+ const char * const *choices; -+ -+ struct bch_opt_fn fn; -+ -+ const char *hint; -+ const char *help; -+ -+}; -+ -+extern const struct bch_option bch2_opt_table[]; -+ -+bool bch2_opt_defined_by_id(const struct bch_opts *, enum bch_opt_id); -+u64 bch2_opt_get_by_id(const struct bch_opts *, enum bch_opt_id); -+void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64); -+ -+u64 bch2_opt_from_sb(struct bch_sb *, enum bch_opt_id); -+int bch2_opts_from_sb(struct bch_opts *, struct bch_sb *); -+void __bch2_opt_set_sb(struct bch_sb *, const struct bch_option *, u64); -+void bch2_opt_set_sb(struct bch_fs *, const struct bch_option *, u64); -+ -+int bch2_opt_lookup(const char *); -+int bch2_opt_validate(const struct bch_option *, u64, struct printbuf *); -+int bch2_opt_parse(struct bch_fs *, const struct bch_option *, -+ const char *, u64 *, struct printbuf *); -+ -+#define OPT_SHOW_FULL_LIST (1 << 0) -+#define OPT_SHOW_MOUNT_STYLE (1 << 1) -+ -+void bch2_opt_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, -+ const struct bch_option *, u64, unsigned); -+ -+int bch2_opt_check_may_set(struct bch_fs *, int, u64); -+int bch2_opts_check_may_set(struct bch_fs *); -+int bch2_parse_mount_opts(struct bch_fs *, struct bch_opts *, char *); -+ -+/* inode opts: */ -+ -+struct bch_io_opts { -+#define x(_name, _bits) u##_bits _name; -+ BCH_INODE_OPTS() -+#undef x -+}; -+ -+struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts); -+bool bch2_opt_is_inode_opt(enum bch_opt_id); -+ -+#endif /* _BCACHEFS_OPTS_H */ -diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c -new file mode 100644 -index 000000000000..5e653eb81d54 ---- /dev/null -+++ b/fs/bcachefs/printbuf.c -@@ -0,0 +1,425 @@ -+// SPDX-License-Identifier: LGPL-2.1+ -+/* Copyright (C) 2022 Kent Overstreet */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "printbuf.h" -+ -+static inline unsigned printbuf_linelen(struct printbuf *buf) -+{ -+ return buf->pos - buf->last_newline; -+} -+ -+int bch2_printbuf_make_room(struct printbuf *out, unsigned extra) -+{ -+ unsigned new_size; -+ char *buf; -+ -+ if (!out->heap_allocated) -+ return 0; -+ -+ /* Reserved space for terminating nul: */ -+ extra += 1; -+ -+ if (out->pos + extra < out->size) -+ return 0; -+ -+ new_size = roundup_pow_of_two(out->size + extra); -+ -+ /* -+ * Note: output buffer must be freeable with kfree(), it's not required -+ * that the user use printbuf_exit(). -+ */ -+ buf = krealloc(out->buf, new_size, !out->atomic ? GFP_KERNEL : GFP_NOWAIT); -+ -+ if (!buf) { -+ out->allocation_failure = true; -+ return -ENOMEM; -+ } -+ -+ out->buf = buf; -+ out->size = new_size; -+ return 0; -+} -+ -+void bch2_prt_vprintf(struct printbuf *out, const char *fmt, va_list args) -+{ -+ int len; -+ -+ do { -+ va_list args2; -+ -+ va_copy(args2, args); -+ len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args2); -+ } while (len + 1 >= printbuf_remaining(out) && -+ !bch2_printbuf_make_room(out, len + 1)); -+ -+ len = min_t(size_t, len, -+ printbuf_remaining(out) ? printbuf_remaining(out) - 1 : 0); -+ out->pos += len; -+} -+ -+void bch2_prt_printf(struct printbuf *out, const char *fmt, ...) -+{ -+ va_list args; -+ int len; -+ -+ do { -+ va_start(args, fmt); -+ len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args); -+ va_end(args); -+ } while (len + 1 >= printbuf_remaining(out) && -+ !bch2_printbuf_make_room(out, len + 1)); -+ -+ len = min_t(size_t, len, -+ printbuf_remaining(out) ? printbuf_remaining(out) - 1 : 0); -+ out->pos += len; -+} -+ -+/** -+ * bch2_printbuf_str() - returns printbuf's buf as a C string, guaranteed to be -+ * null terminated -+ * @buf: printbuf to terminate -+ * Returns: Printbuf contents, as a nul terminated C string -+ */ -+const char *bch2_printbuf_str(const struct printbuf *buf) -+{ -+ /* -+ * If we've written to a printbuf then it's guaranteed to be a null -+ * terminated string - but if we haven't, then we might not have -+ * allocated a buffer at all: -+ */ -+ return buf->pos -+ ? buf->buf -+ : ""; -+} -+ -+/** -+ * bch2_printbuf_exit() - exit a printbuf, freeing memory it owns and poisoning it -+ * against accidental use. -+ * @buf: printbuf to exit -+ */ -+void bch2_printbuf_exit(struct printbuf *buf) -+{ -+ if (buf->heap_allocated) { -+ kfree(buf->buf); -+ buf->buf = ERR_PTR(-EINTR); /* poison value */ -+ } -+} -+ -+void bch2_printbuf_tabstops_reset(struct printbuf *buf) -+{ -+ buf->nr_tabstops = 0; -+} -+ -+void bch2_printbuf_tabstop_pop(struct printbuf *buf) -+{ -+ if (buf->nr_tabstops) -+ --buf->nr_tabstops; -+} -+ -+/* -+ * bch2_printbuf_tabstop_set() - add a tabstop, n spaces from the previous tabstop -+ * -+ * @buf: printbuf to control -+ * @spaces: number of spaces from previous tabpstop -+ * -+ * In the future this function may allocate memory if setting more than -+ * PRINTBUF_INLINE_TABSTOPS or setting tabstops more than 255 spaces from start -+ * of line. -+ */ -+int bch2_printbuf_tabstop_push(struct printbuf *buf, unsigned spaces) -+{ -+ unsigned prev_tabstop = buf->nr_tabstops -+ ? buf->_tabstops[buf->nr_tabstops - 1] -+ : 0; -+ -+ if (WARN_ON(buf->nr_tabstops >= ARRAY_SIZE(buf->_tabstops))) -+ return -EINVAL; -+ -+ buf->_tabstops[buf->nr_tabstops++] = prev_tabstop + spaces; -+ buf->has_indent_or_tabstops = true; -+ return 0; -+} -+ -+/** -+ * bch2_printbuf_indent_add() - add to the current indent level -+ * -+ * @buf: printbuf to control -+ * @spaces: number of spaces to add to the current indent level -+ * -+ * Subsequent lines, and the current line if the output position is at the start -+ * of the current line, will be indented by @spaces more spaces. -+ */ -+void bch2_printbuf_indent_add(struct printbuf *buf, unsigned spaces) -+{ -+ if (WARN_ON_ONCE(buf->indent + spaces < buf->indent)) -+ spaces = 0; -+ -+ buf->indent += spaces; -+ prt_chars(buf, ' ', spaces); -+ -+ buf->has_indent_or_tabstops = true; -+} -+ -+/** -+ * bch2_printbuf_indent_sub() - subtract from the current indent level -+ * -+ * @buf: printbuf to control -+ * @spaces: number of spaces to subtract from the current indent level -+ * -+ * Subsequent lines, and the current line if the output position is at the start -+ * of the current line, will be indented by @spaces less spaces. -+ */ -+void bch2_printbuf_indent_sub(struct printbuf *buf, unsigned spaces) -+{ -+ if (WARN_ON_ONCE(spaces > buf->indent)) -+ spaces = buf->indent; -+ -+ if (buf->last_newline + buf->indent == buf->pos) { -+ buf->pos -= spaces; -+ printbuf_nul_terminate(buf); -+ } -+ buf->indent -= spaces; -+ -+ if (!buf->indent && !buf->nr_tabstops) -+ buf->has_indent_or_tabstops = false; -+} -+ -+void bch2_prt_newline(struct printbuf *buf) -+{ -+ unsigned i; -+ -+ bch2_printbuf_make_room(buf, 1 + buf->indent); -+ -+ __prt_char(buf, '\n'); -+ -+ buf->last_newline = buf->pos; -+ -+ for (i = 0; i < buf->indent; i++) -+ __prt_char(buf, ' '); -+ -+ printbuf_nul_terminate(buf); -+ -+ buf->last_field = buf->pos; -+ buf->cur_tabstop = 0; -+} -+ -+/* -+ * Returns spaces from start of line, if set, or 0 if unset: -+ */ -+static inline unsigned cur_tabstop(struct printbuf *buf) -+{ -+ return buf->cur_tabstop < buf->nr_tabstops -+ ? buf->_tabstops[buf->cur_tabstop] -+ : 0; -+} -+ -+static void __prt_tab(struct printbuf *out) -+{ -+ int spaces = max_t(int, 0, cur_tabstop(out) - printbuf_linelen(out)); -+ -+ prt_chars(out, ' ', spaces); -+ -+ out->last_field = out->pos; -+ out->cur_tabstop++; -+} -+ -+/** -+ * bch2_prt_tab() - Advance printbuf to the next tabstop -+ * @out: printbuf to control -+ * -+ * Advance output to the next tabstop by printing spaces. -+ */ -+void bch2_prt_tab(struct printbuf *out) -+{ -+ if (WARN_ON(!cur_tabstop(out))) -+ return; -+ -+ __prt_tab(out); -+} -+ -+static void __prt_tab_rjust(struct printbuf *buf) -+{ -+ unsigned move = buf->pos - buf->last_field; -+ int pad = (int) cur_tabstop(buf) - (int) printbuf_linelen(buf); -+ -+ if (pad > 0) { -+ bch2_printbuf_make_room(buf, pad); -+ -+ if (buf->last_field + pad < buf->size) -+ memmove(buf->buf + buf->last_field + pad, -+ buf->buf + buf->last_field, -+ min(move, buf->size - 1 - buf->last_field - pad)); -+ -+ if (buf->last_field < buf->size) -+ memset(buf->buf + buf->last_field, ' ', -+ min((unsigned) pad, buf->size - buf->last_field)); -+ -+ buf->pos += pad; -+ printbuf_nul_terminate(buf); -+ } -+ -+ buf->last_field = buf->pos; -+ buf->cur_tabstop++; -+} -+ -+/** -+ * bch2_prt_tab_rjust - Advance printbuf to the next tabstop, right justifying -+ * previous output -+ * -+ * @buf: printbuf to control -+ * -+ * Advance output to the next tabstop by inserting spaces immediately after the -+ * previous tabstop, right justifying previously outputted text. -+ */ -+void bch2_prt_tab_rjust(struct printbuf *buf) -+{ -+ if (WARN_ON(!cur_tabstop(buf))) -+ return; -+ -+ __prt_tab_rjust(buf); -+} -+ -+/** -+ * bch2_prt_bytes_indented() - Print an array of chars, handling embedded control characters -+ * -+ * @out: output printbuf -+ * @str: string to print -+ * @count: number of bytes to print -+ * -+ * The following contol characters are handled as so: -+ * \n: prt_newline newline that obeys current indent level -+ * \t: prt_tab advance to next tabstop -+ * \r: prt_tab_rjust advance to next tabstop, with right justification -+ */ -+void bch2_prt_bytes_indented(struct printbuf *out, const char *str, unsigned count) -+{ -+ const char *unprinted_start = str; -+ const char *end = str + count; -+ -+ if (!out->has_indent_or_tabstops || out->suppress_indent_tabstop_handling) { -+ prt_bytes(out, str, count); -+ return; -+ } -+ -+ while (str != end) { -+ switch (*str) { -+ case '\n': -+ prt_bytes(out, unprinted_start, str - unprinted_start); -+ unprinted_start = str + 1; -+ bch2_prt_newline(out); -+ break; -+ case '\t': -+ if (likely(cur_tabstop(out))) { -+ prt_bytes(out, unprinted_start, str - unprinted_start); -+ unprinted_start = str + 1; -+ __prt_tab(out); -+ } -+ break; -+ case '\r': -+ if (likely(cur_tabstop(out))) { -+ prt_bytes(out, unprinted_start, str - unprinted_start); -+ unprinted_start = str + 1; -+ __prt_tab_rjust(out); -+ } -+ break; -+ } -+ -+ str++; -+ } -+ -+ prt_bytes(out, unprinted_start, str - unprinted_start); -+} -+ -+/** -+ * bch2_prt_human_readable_u64() - Print out a u64 in human readable units -+ * @out: output printbuf -+ * @v: integer to print -+ * -+ * Units of 2^10 (default) or 10^3 are controlled via @out->si_units -+ */ -+void bch2_prt_human_readable_u64(struct printbuf *out, u64 v) -+{ -+ bch2_printbuf_make_room(out, 10); -+ out->pos += string_get_size(v, 1, !out->si_units, -+ out->buf + out->pos, -+ printbuf_remaining_size(out)); -+} -+ -+/** -+ * bch2_prt_human_readable_s64() - Print out a s64 in human readable units -+ * @out: output printbuf -+ * @v: integer to print -+ * -+ * Units of 2^10 (default) or 10^3 are controlled via @out->si_units -+ */ -+void bch2_prt_human_readable_s64(struct printbuf *out, s64 v) -+{ -+ if (v < 0) -+ prt_char(out, '-'); -+ bch2_prt_human_readable_u64(out, abs(v)); -+} -+ -+/** -+ * bch2_prt_units_u64() - Print out a u64 according to printbuf unit options -+ * @out: output printbuf -+ * @v: integer to print -+ * -+ * Units are either raw (default), or human reabable units (controlled via -+ * @buf->human_readable_units) -+ */ -+void bch2_prt_units_u64(struct printbuf *out, u64 v) -+{ -+ if (out->human_readable_units) -+ bch2_prt_human_readable_u64(out, v); -+ else -+ bch2_prt_printf(out, "%llu", v); -+} -+ -+/** -+ * bch2_prt_units_s64() - Print out a s64 according to printbuf unit options -+ * @out: output printbuf -+ * @v: integer to print -+ * -+ * Units are either raw (default), or human reabable units (controlled via -+ * @buf->human_readable_units) -+ */ -+void bch2_prt_units_s64(struct printbuf *out, s64 v) -+{ -+ if (v < 0) -+ prt_char(out, '-'); -+ bch2_prt_units_u64(out, abs(v)); -+} -+ -+void bch2_prt_string_option(struct printbuf *out, -+ const char * const list[], -+ size_t selected) -+{ -+ size_t i; -+ -+ for (i = 0; list[i]; i++) -+ bch2_prt_printf(out, i == selected ? "[%s] " : "%s ", list[i]); -+} -+ -+void bch2_prt_bitflags(struct printbuf *out, -+ const char * const list[], u64 flags) -+{ -+ unsigned bit, nr = 0; -+ bool first = true; -+ -+ while (list[nr]) -+ nr++; -+ -+ while (flags && (bit = __ffs64(flags)) < nr) { -+ if (!first) -+ bch2_prt_printf(out, ","); -+ first = false; -+ bch2_prt_printf(out, "%s", list[bit]); -+ flags ^= BIT_ULL(bit); -+ } -+} -diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h -new file mode 100644 -index 000000000000..2191423d9f22 ---- /dev/null -+++ b/fs/bcachefs/printbuf.h -@@ -0,0 +1,284 @@ -+/* SPDX-License-Identifier: LGPL-2.1+ */ -+/* Copyright (C) 2022 Kent Overstreet */ -+ -+#ifndef _BCACHEFS_PRINTBUF_H -+#define _BCACHEFS_PRINTBUF_H -+ -+/* -+ * Printbufs: Simple strings for printing to, with optional heap allocation -+ * -+ * This code has provisions for use in userspace, to aid in making other code -+ * portable between kernelspace and userspace. -+ * -+ * Basic example: -+ * struct printbuf buf = PRINTBUF; -+ * -+ * prt_printf(&buf, "foo="); -+ * foo_to_text(&buf, foo); -+ * printk("%s", buf.buf); -+ * printbuf_exit(&buf); -+ * -+ * Or -+ * struct printbuf buf = PRINTBUF_EXTERN(char_buf, char_buf_size) -+ * -+ * We can now write pretty printers instead of writing code that dumps -+ * everything to the kernel log buffer, and then those pretty-printers can be -+ * used by other code that outputs to kernel log, sysfs, debugfs, etc. -+ * -+ * Memory allocation: Outputing to a printbuf may allocate memory. This -+ * allocation is done with GFP_KERNEL, by default: use the newer -+ * memalloc_*_(save|restore) functions as needed. -+ * -+ * Since no equivalent yet exists for GFP_ATOMIC/GFP_NOWAIT, memory allocations -+ * will be done with GFP_NOWAIT if printbuf->atomic is nonzero. -+ * -+ * It's allowed to grab the output buffer and free it later with kfree() instead -+ * of using printbuf_exit(), if the user just needs a heap allocated string at -+ * the end. -+ * -+ * Memory allocation failures: We don't return errors directly, because on -+ * memory allocation failure we usually don't want to bail out and unwind - we -+ * want to print what we've got, on a best-effort basis. But code that does want -+ * to return -ENOMEM may check printbuf.allocation_failure. -+ * -+ * Indenting, tabstops: -+ * -+ * To aid is writing multi-line pretty printers spread across multiple -+ * functions, printbufs track the current indent level. -+ * -+ * printbuf_indent_push() and printbuf_indent_pop() increase and decrease the current indent -+ * level, respectively. -+ * -+ * To use tabstops, set printbuf->tabstops[]; they are in units of spaces, from -+ * start of line. Once set, prt_tab() will output spaces up to the next tabstop. -+ * prt_tab_rjust() will also advance the current line of text up to the next -+ * tabstop, but it does so by shifting text since the previous tabstop up to the -+ * next tabstop - right justifying it. -+ * -+ * Make sure you use prt_newline() instead of \n in the format string for indent -+ * level and tabstops to work corretly. -+ * -+ * Output units: printbuf->units exists to tell pretty-printers how to output -+ * numbers: a raw value (e.g. directly from a superblock field), as bytes, or as -+ * human readable bytes. prt_units() obeys it. -+ */ -+ -+#include -+#include -+ -+enum printbuf_si { -+ PRINTBUF_UNITS_2, /* use binary powers of 2^10 */ -+ PRINTBUF_UNITS_10, /* use powers of 10^3 (standard SI) */ -+}; -+ -+#define PRINTBUF_INLINE_TABSTOPS 6 -+ -+struct printbuf { -+ char *buf; -+ unsigned size; -+ unsigned pos; -+ unsigned last_newline; -+ unsigned last_field; -+ unsigned indent; -+ /* -+ * If nonzero, allocations will be done with GFP_ATOMIC: -+ */ -+ u8 atomic; -+ bool allocation_failure:1; -+ bool heap_allocated:1; -+ enum printbuf_si si_units:1; -+ bool human_readable_units:1; -+ bool has_indent_or_tabstops:1; -+ bool suppress_indent_tabstop_handling:1; -+ u8 nr_tabstops; -+ -+ /* -+ * Do not modify directly: use printbuf_tabstop_add(), -+ * printbuf_tabstop_get() -+ */ -+ u8 cur_tabstop; -+ u8 _tabstops[PRINTBUF_INLINE_TABSTOPS]; -+}; -+ -+int bch2_printbuf_make_room(struct printbuf *, unsigned); -+__printf(2, 3) void bch2_prt_printf(struct printbuf *out, const char *fmt, ...); -+__printf(2, 0) void bch2_prt_vprintf(struct printbuf *out, const char *fmt, va_list); -+const char *bch2_printbuf_str(const struct printbuf *); -+void bch2_printbuf_exit(struct printbuf *); -+ -+void bch2_printbuf_tabstops_reset(struct printbuf *); -+void bch2_printbuf_tabstop_pop(struct printbuf *); -+int bch2_printbuf_tabstop_push(struct printbuf *, unsigned); -+ -+void bch2_printbuf_indent_add(struct printbuf *, unsigned); -+void bch2_printbuf_indent_sub(struct printbuf *, unsigned); -+ -+void bch2_prt_newline(struct printbuf *); -+void bch2_prt_tab(struct printbuf *); -+void bch2_prt_tab_rjust(struct printbuf *); -+ -+void bch2_prt_bytes_indented(struct printbuf *, const char *, unsigned); -+void bch2_prt_human_readable_u64(struct printbuf *, u64); -+void bch2_prt_human_readable_s64(struct printbuf *, s64); -+void bch2_prt_units_u64(struct printbuf *, u64); -+void bch2_prt_units_s64(struct printbuf *, s64); -+void bch2_prt_string_option(struct printbuf *, const char * const[], size_t); -+void bch2_prt_bitflags(struct printbuf *, const char * const[], u64); -+ -+/* Initializer for a heap allocated printbuf: */ -+#define PRINTBUF ((struct printbuf) { .heap_allocated = true }) -+ -+/* Initializer a printbuf that points to an external buffer: */ -+#define PRINTBUF_EXTERN(_buf, _size) \ -+((struct printbuf) { \ -+ .buf = _buf, \ -+ .size = _size, \ -+}) -+ -+/* -+ * Returns size remaining of output buffer: -+ */ -+static inline unsigned printbuf_remaining_size(struct printbuf *out) -+{ -+ return out->pos < out->size ? out->size - out->pos : 0; -+} -+ -+/* -+ * Returns number of characters we can print to the output buffer - i.e. -+ * excluding the terminating nul: -+ */ -+static inline unsigned printbuf_remaining(struct printbuf *out) -+{ -+ return out->pos < out->size ? out->size - out->pos - 1 : 0; -+} -+ -+static inline unsigned printbuf_written(struct printbuf *out) -+{ -+ return out->size ? min(out->pos, out->size - 1) : 0; -+} -+ -+/* -+ * Returns true if output was truncated: -+ */ -+static inline bool printbuf_overflowed(struct printbuf *out) -+{ -+ return out->pos >= out->size; -+} -+ -+static inline void printbuf_nul_terminate(struct printbuf *out) -+{ -+ bch2_printbuf_make_room(out, 1); -+ -+ if (out->pos < out->size) -+ out->buf[out->pos] = 0; -+ else if (out->size) -+ out->buf[out->size - 1] = 0; -+} -+ -+/* Doesn't call bch2_printbuf_make_room(), doesn't nul terminate: */ -+static inline void __prt_char_reserved(struct printbuf *out, char c) -+{ -+ if (printbuf_remaining(out)) -+ out->buf[out->pos] = c; -+ out->pos++; -+} -+ -+/* Doesn't nul terminate: */ -+static inline void __prt_char(struct printbuf *out, char c) -+{ -+ bch2_printbuf_make_room(out, 1); -+ __prt_char_reserved(out, c); -+} -+ -+static inline void prt_char(struct printbuf *out, char c) -+{ -+ __prt_char(out, c); -+ printbuf_nul_terminate(out); -+} -+ -+static inline void __prt_chars_reserved(struct printbuf *out, char c, unsigned n) -+{ -+ unsigned i, can_print = min(n, printbuf_remaining(out)); -+ -+ for (i = 0; i < can_print; i++) -+ out->buf[out->pos++] = c; -+ out->pos += n - can_print; -+} -+ -+static inline void prt_chars(struct printbuf *out, char c, unsigned n) -+{ -+ bch2_printbuf_make_room(out, n); -+ __prt_chars_reserved(out, c, n); -+ printbuf_nul_terminate(out); -+} -+ -+static inline void prt_bytes(struct printbuf *out, const void *b, unsigned n) -+{ -+ unsigned i, can_print; -+ -+ bch2_printbuf_make_room(out, n); -+ -+ can_print = min(n, printbuf_remaining(out)); -+ -+ for (i = 0; i < can_print; i++) -+ out->buf[out->pos++] = ((char *) b)[i]; -+ out->pos += n - can_print; -+ -+ printbuf_nul_terminate(out); -+} -+ -+static inline void prt_str(struct printbuf *out, const char *str) -+{ -+ prt_bytes(out, str, strlen(str)); -+} -+ -+static inline void prt_str_indented(struct printbuf *out, const char *str) -+{ -+ bch2_prt_bytes_indented(out, str, strlen(str)); -+} -+ -+static inline void prt_hex_byte(struct printbuf *out, u8 byte) -+{ -+ bch2_printbuf_make_room(out, 2); -+ __prt_char_reserved(out, hex_asc_hi(byte)); -+ __prt_char_reserved(out, hex_asc_lo(byte)); -+ printbuf_nul_terminate(out); -+} -+ -+static inline void prt_hex_byte_upper(struct printbuf *out, u8 byte) -+{ -+ bch2_printbuf_make_room(out, 2); -+ __prt_char_reserved(out, hex_asc_upper_hi(byte)); -+ __prt_char_reserved(out, hex_asc_upper_lo(byte)); -+ printbuf_nul_terminate(out); -+} -+ -+/** -+ * printbuf_reset - re-use a printbuf without freeing and re-initializing it: -+ */ -+static inline void printbuf_reset(struct printbuf *buf) -+{ -+ buf->pos = 0; -+ buf->allocation_failure = 0; -+ buf->indent = 0; -+ buf->nr_tabstops = 0; -+ buf->cur_tabstop = 0; -+} -+ -+/** -+ * printbuf_atomic_inc - mark as entering an atomic section -+ */ -+static inline void printbuf_atomic_inc(struct printbuf *buf) -+{ -+ buf->atomic++; -+} -+ -+/** -+ * printbuf_atomic_inc - mark as leaving an atomic section -+ */ -+static inline void printbuf_atomic_dec(struct printbuf *buf) -+{ -+ buf->atomic--; -+} -+ -+#endif /* _BCACHEFS_PRINTBUF_H */ -diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c -new file mode 100644 -index 000000000000..a54647c36b85 ---- /dev/null -+++ b/fs/bcachefs/quota.c -@@ -0,0 +1,979 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include "bcachefs.h" -+#include "btree_update.h" -+#include "errcode.h" -+#include "error.h" -+#include "inode.h" -+#include "quota.h" -+#include "snapshot.h" -+#include "super-io.h" -+ -+static const char * const bch2_quota_types[] = { -+ "user", -+ "group", -+ "project", -+}; -+ -+static const char * const bch2_quota_counters[] = { -+ "space", -+ "inodes", -+}; -+ -+static int bch2_sb_quota_validate(struct bch_sb *sb, struct bch_sb_field *f, -+ struct printbuf *err) -+{ -+ struct bch_sb_field_quota *q = field_to_type(f, quota); -+ -+ if (vstruct_bytes(&q->field) < sizeof(*q)) { -+ prt_printf(err, "wrong size (got %zu should be %zu)", -+ vstruct_bytes(&q->field), sizeof(*q)); -+ return -BCH_ERR_invalid_sb_quota; -+ } -+ -+ return 0; -+} -+ -+static void bch2_sb_quota_to_text(struct printbuf *out, struct bch_sb *sb, -+ struct bch_sb_field *f) -+{ -+ struct bch_sb_field_quota *q = field_to_type(f, quota); -+ unsigned qtyp, counter; -+ -+ for (qtyp = 0; qtyp < ARRAY_SIZE(q->q); qtyp++) { -+ prt_printf(out, "%s: flags %llx", -+ bch2_quota_types[qtyp], -+ le64_to_cpu(q->q[qtyp].flags)); -+ -+ for (counter = 0; counter < Q_COUNTERS; counter++) -+ prt_printf(out, " %s timelimit %u warnlimit %u", -+ bch2_quota_counters[counter], -+ le32_to_cpu(q->q[qtyp].c[counter].timelimit), -+ le32_to_cpu(q->q[qtyp].c[counter].warnlimit)); -+ -+ prt_newline(out); -+ } -+} -+ -+const struct bch_sb_field_ops bch_sb_field_ops_quota = { -+ .validate = bch2_sb_quota_validate, -+ .to_text = bch2_sb_quota_to_text, -+}; -+ -+int bch2_quota_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, c, err, -+ quota_type_invalid, -+ "invalid quota type (%llu >= %u)", -+ k.k->p.inode, QTYP_NR); -+fsck_err: -+ return ret; -+} -+ -+void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_quota dq = bkey_s_c_to_quota(k); -+ unsigned i; -+ -+ for (i = 0; i < Q_COUNTERS; i++) -+ prt_printf(out, "%s hardlimit %llu softlimit %llu", -+ bch2_quota_counters[i], -+ le64_to_cpu(dq.v->c[i].hardlimit), -+ le64_to_cpu(dq.v->c[i].softlimit)); -+} -+ -+#ifdef CONFIG_BCACHEFS_QUOTA -+ -+#include -+#include -+#include -+ -+static void qc_info_to_text(struct printbuf *out, struct qc_info *i) -+{ -+ printbuf_tabstops_reset(out); -+ printbuf_tabstop_push(out, 20); -+ -+ prt_str(out, "i_fieldmask"); -+ prt_tab(out); -+ prt_printf(out, "%x", i->i_fieldmask); -+ prt_newline(out); -+ -+ prt_str(out, "i_flags"); -+ prt_tab(out); -+ prt_printf(out, "%u", i->i_flags); -+ prt_newline(out); -+ -+ prt_str(out, "i_spc_timelimit"); -+ prt_tab(out); -+ prt_printf(out, "%u", i->i_spc_timelimit); -+ prt_newline(out); -+ -+ prt_str(out, "i_ino_timelimit"); -+ prt_tab(out); -+ prt_printf(out, "%u", i->i_ino_timelimit); -+ prt_newline(out); -+ -+ prt_str(out, "i_rt_spc_timelimit"); -+ prt_tab(out); -+ prt_printf(out, "%u", i->i_rt_spc_timelimit); -+ prt_newline(out); -+ -+ prt_str(out, "i_spc_warnlimit"); -+ prt_tab(out); -+ prt_printf(out, "%u", i->i_spc_warnlimit); -+ prt_newline(out); -+ -+ prt_str(out, "i_ino_warnlimit"); -+ prt_tab(out); -+ prt_printf(out, "%u", i->i_ino_warnlimit); -+ prt_newline(out); -+ -+ prt_str(out, "i_rt_spc_warnlimit"); -+ prt_tab(out); -+ prt_printf(out, "%u", i->i_rt_spc_warnlimit); -+ prt_newline(out); -+} -+ -+static void qc_dqblk_to_text(struct printbuf *out, struct qc_dqblk *q) -+{ -+ printbuf_tabstops_reset(out); -+ printbuf_tabstop_push(out, 20); -+ -+ prt_str(out, "d_fieldmask"); -+ prt_tab(out); -+ prt_printf(out, "%x", q->d_fieldmask); -+ prt_newline(out); -+ -+ prt_str(out, "d_spc_hardlimit"); -+ prt_tab(out); -+ prt_printf(out, "%llu", q->d_spc_hardlimit); -+ prt_newline(out); -+ -+ prt_str(out, "d_spc_softlimit"); -+ prt_tab(out); -+ prt_printf(out, "%llu", q->d_spc_softlimit); -+ prt_newline(out); -+ -+ prt_str(out, "d_ino_hardlimit"); -+ prt_tab(out); -+ prt_printf(out, "%llu", q->d_ino_hardlimit); -+ prt_newline(out); -+ -+ prt_str(out, "d_ino_softlimit"); -+ prt_tab(out); -+ prt_printf(out, "%llu", q->d_ino_softlimit); -+ prt_newline(out); -+ -+ prt_str(out, "d_space"); -+ prt_tab(out); -+ prt_printf(out, "%llu", q->d_space); -+ prt_newline(out); -+ -+ prt_str(out, "d_ino_count"); -+ prt_tab(out); -+ prt_printf(out, "%llu", q->d_ino_count); -+ prt_newline(out); -+ -+ prt_str(out, "d_ino_timer"); -+ prt_tab(out); -+ prt_printf(out, "%llu", q->d_ino_timer); -+ prt_newline(out); -+ -+ prt_str(out, "d_spc_timer"); -+ prt_tab(out); -+ prt_printf(out, "%llu", q->d_spc_timer); -+ prt_newline(out); -+ -+ prt_str(out, "d_ino_warns"); -+ prt_tab(out); -+ prt_printf(out, "%i", q->d_ino_warns); -+ prt_newline(out); -+ -+ prt_str(out, "d_spc_warns"); -+ prt_tab(out); -+ prt_printf(out, "%i", q->d_spc_warns); -+ prt_newline(out); -+} -+ -+static inline unsigned __next_qtype(unsigned i, unsigned qtypes) -+{ -+ qtypes >>= i; -+ return qtypes ? i + __ffs(qtypes) : QTYP_NR; -+} -+ -+#define for_each_set_qtype(_c, _i, _q, _qtypes) \ -+ for (_i = 0; \ -+ (_i = __next_qtype(_i, _qtypes), \ -+ _q = &(_c)->quotas[_i], \ -+ _i < QTYP_NR); \ -+ _i++) -+ -+static bool ignore_hardlimit(struct bch_memquota_type *q) -+{ -+ if (capable(CAP_SYS_RESOURCE)) -+ return true; -+#if 0 -+ struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type]; -+ -+ return capable(CAP_SYS_RESOURCE) && -+ (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || -+ !(info->dqi_flags & DQF_ROOT_SQUASH)); -+#endif -+ return false; -+} -+ -+enum quota_msg { -+ SOFTWARN, /* Softlimit reached */ -+ SOFTLONGWARN, /* Grace time expired */ -+ HARDWARN, /* Hardlimit reached */ -+ -+ HARDBELOW, /* Usage got below inode hardlimit */ -+ SOFTBELOW, /* Usage got below inode softlimit */ -+}; -+ -+static int quota_nl[][Q_COUNTERS] = { -+ [HARDWARN][Q_SPC] = QUOTA_NL_BHARDWARN, -+ [SOFTLONGWARN][Q_SPC] = QUOTA_NL_BSOFTLONGWARN, -+ [SOFTWARN][Q_SPC] = QUOTA_NL_BSOFTWARN, -+ [HARDBELOW][Q_SPC] = QUOTA_NL_BHARDBELOW, -+ [SOFTBELOW][Q_SPC] = QUOTA_NL_BSOFTBELOW, -+ -+ [HARDWARN][Q_INO] = QUOTA_NL_IHARDWARN, -+ [SOFTLONGWARN][Q_INO] = QUOTA_NL_ISOFTLONGWARN, -+ [SOFTWARN][Q_INO] = QUOTA_NL_ISOFTWARN, -+ [HARDBELOW][Q_INO] = QUOTA_NL_IHARDBELOW, -+ [SOFTBELOW][Q_INO] = QUOTA_NL_ISOFTBELOW, -+}; -+ -+struct quota_msgs { -+ u8 nr; -+ struct { -+ u8 qtype; -+ u8 msg; -+ } m[QTYP_NR * Q_COUNTERS]; -+}; -+ -+static void prepare_msg(unsigned qtype, -+ enum quota_counters counter, -+ struct quota_msgs *msgs, -+ enum quota_msg msg_type) -+{ -+ BUG_ON(msgs->nr >= ARRAY_SIZE(msgs->m)); -+ -+ msgs->m[msgs->nr].qtype = qtype; -+ msgs->m[msgs->nr].msg = quota_nl[msg_type][counter]; -+ msgs->nr++; -+} -+ -+static void prepare_warning(struct memquota_counter *qc, -+ unsigned qtype, -+ enum quota_counters counter, -+ struct quota_msgs *msgs, -+ enum quota_msg msg_type) -+{ -+ if (qc->warning_issued & (1 << msg_type)) -+ return; -+ -+ prepare_msg(qtype, counter, msgs, msg_type); -+} -+ -+static void flush_warnings(struct bch_qid qid, -+ struct super_block *sb, -+ struct quota_msgs *msgs) -+{ -+ unsigned i; -+ -+ for (i = 0; i < msgs->nr; i++) -+ quota_send_warning(make_kqid(&init_user_ns, msgs->m[i].qtype, qid.q[i]), -+ sb->s_dev, msgs->m[i].msg); -+} -+ -+static int bch2_quota_check_limit(struct bch_fs *c, -+ unsigned qtype, -+ struct bch_memquota *mq, -+ struct quota_msgs *msgs, -+ enum quota_counters counter, -+ s64 v, -+ enum quota_acct_mode mode) -+{ -+ struct bch_memquota_type *q = &c->quotas[qtype]; -+ struct memquota_counter *qc = &mq->c[counter]; -+ u64 n = qc->v + v; -+ -+ BUG_ON((s64) n < 0); -+ -+ if (mode == KEY_TYPE_QUOTA_NOCHECK) -+ return 0; -+ -+ if (v <= 0) { -+ if (n < qc->hardlimit && -+ (qc->warning_issued & (1 << HARDWARN))) { -+ qc->warning_issued &= ~(1 << HARDWARN); -+ prepare_msg(qtype, counter, msgs, HARDBELOW); -+ } -+ -+ if (n < qc->softlimit && -+ (qc->warning_issued & (1 << SOFTWARN))) { -+ qc->warning_issued &= ~(1 << SOFTWARN); -+ prepare_msg(qtype, counter, msgs, SOFTBELOW); -+ } -+ -+ qc->warning_issued = 0; -+ return 0; -+ } -+ -+ if (qc->hardlimit && -+ qc->hardlimit < n && -+ !ignore_hardlimit(q)) { -+ prepare_warning(qc, qtype, counter, msgs, HARDWARN); -+ return -EDQUOT; -+ } -+ -+ if (qc->softlimit && -+ qc->softlimit < n) { -+ if (qc->timer == 0) { -+ qc->timer = ktime_get_real_seconds() + q->limits[counter].timelimit; -+ prepare_warning(qc, qtype, counter, msgs, SOFTWARN); -+ } else if (ktime_get_real_seconds() >= qc->timer && -+ !ignore_hardlimit(q)) { -+ prepare_warning(qc, qtype, counter, msgs, SOFTLONGWARN); -+ return -EDQUOT; -+ } -+ } -+ -+ return 0; -+} -+ -+int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid, -+ enum quota_counters counter, s64 v, -+ enum quota_acct_mode mode) -+{ -+ unsigned qtypes = enabled_qtypes(c); -+ struct bch_memquota_type *q; -+ struct bch_memquota *mq[QTYP_NR]; -+ struct quota_msgs msgs; -+ unsigned i; -+ int ret = 0; -+ -+ memset(&msgs, 0, sizeof(msgs)); -+ -+ for_each_set_qtype(c, i, q, qtypes) { -+ mq[i] = genradix_ptr_alloc(&q->table, qid.q[i], GFP_KERNEL); -+ if (!mq[i]) -+ return -ENOMEM; -+ } -+ -+ for_each_set_qtype(c, i, q, qtypes) -+ mutex_lock_nested(&q->lock, i); -+ -+ for_each_set_qtype(c, i, q, qtypes) { -+ ret = bch2_quota_check_limit(c, i, mq[i], &msgs, counter, v, mode); -+ if (ret) -+ goto err; -+ } -+ -+ for_each_set_qtype(c, i, q, qtypes) -+ mq[i]->c[counter].v += v; -+err: -+ for_each_set_qtype(c, i, q, qtypes) -+ mutex_unlock(&q->lock); -+ -+ flush_warnings(qid, c->vfs_sb, &msgs); -+ -+ return ret; -+} -+ -+static void __bch2_quota_transfer(struct bch_memquota *src_q, -+ struct bch_memquota *dst_q, -+ enum quota_counters counter, s64 v) -+{ -+ BUG_ON(v > src_q->c[counter].v); -+ BUG_ON(v + dst_q->c[counter].v < v); -+ -+ src_q->c[counter].v -= v; -+ dst_q->c[counter].v += v; -+} -+ -+int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes, -+ struct bch_qid dst, -+ struct bch_qid src, u64 space, -+ enum quota_acct_mode mode) -+{ -+ struct bch_memquota_type *q; -+ struct bch_memquota *src_q[3], *dst_q[3]; -+ struct quota_msgs msgs; -+ unsigned i; -+ int ret = 0; -+ -+ qtypes &= enabled_qtypes(c); -+ -+ memset(&msgs, 0, sizeof(msgs)); -+ -+ for_each_set_qtype(c, i, q, qtypes) { -+ src_q[i] = genradix_ptr_alloc(&q->table, src.q[i], GFP_KERNEL); -+ dst_q[i] = genradix_ptr_alloc(&q->table, dst.q[i], GFP_KERNEL); -+ if (!src_q[i] || !dst_q[i]) -+ return -ENOMEM; -+ } -+ -+ for_each_set_qtype(c, i, q, qtypes) -+ mutex_lock_nested(&q->lock, i); -+ -+ for_each_set_qtype(c, i, q, qtypes) { -+ ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC, -+ dst_q[i]->c[Q_SPC].v + space, -+ mode); -+ if (ret) -+ goto err; -+ -+ ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO, -+ dst_q[i]->c[Q_INO].v + 1, -+ mode); -+ if (ret) -+ goto err; -+ } -+ -+ for_each_set_qtype(c, i, q, qtypes) { -+ __bch2_quota_transfer(src_q[i], dst_q[i], Q_SPC, space); -+ __bch2_quota_transfer(src_q[i], dst_q[i], Q_INO, 1); -+ } -+ -+err: -+ for_each_set_qtype(c, i, q, qtypes) -+ mutex_unlock(&q->lock); -+ -+ flush_warnings(dst, c->vfs_sb, &msgs); -+ -+ return ret; -+} -+ -+static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k, -+ struct qc_dqblk *qdq) -+{ -+ struct bkey_s_c_quota dq; -+ struct bch_memquota_type *q; -+ struct bch_memquota *mq; -+ unsigned i; -+ -+ BUG_ON(k.k->p.inode >= QTYP_NR); -+ -+ if (!((1U << k.k->p.inode) & enabled_qtypes(c))) -+ return 0; -+ -+ switch (k.k->type) { -+ case KEY_TYPE_quota: -+ dq = bkey_s_c_to_quota(k); -+ q = &c->quotas[k.k->p.inode]; -+ -+ mutex_lock(&q->lock); -+ mq = genradix_ptr_alloc(&q->table, k.k->p.offset, GFP_KERNEL); -+ if (!mq) { -+ mutex_unlock(&q->lock); -+ return -ENOMEM; -+ } -+ -+ for (i = 0; i < Q_COUNTERS; i++) { -+ mq->c[i].hardlimit = le64_to_cpu(dq.v->c[i].hardlimit); -+ mq->c[i].softlimit = le64_to_cpu(dq.v->c[i].softlimit); -+ } -+ -+ if (qdq && qdq->d_fieldmask & QC_SPC_TIMER) -+ mq->c[Q_SPC].timer = qdq->d_spc_timer; -+ if (qdq && qdq->d_fieldmask & QC_SPC_WARNS) -+ mq->c[Q_SPC].warns = qdq->d_spc_warns; -+ if (qdq && qdq->d_fieldmask & QC_INO_TIMER) -+ mq->c[Q_INO].timer = qdq->d_ino_timer; -+ if (qdq && qdq->d_fieldmask & QC_INO_WARNS) -+ mq->c[Q_INO].warns = qdq->d_ino_warns; -+ -+ mutex_unlock(&q->lock); -+ } -+ -+ return 0; -+} -+ -+void bch2_fs_quota_exit(struct bch_fs *c) -+{ -+ unsigned i; -+ -+ for (i = 0; i < ARRAY_SIZE(c->quotas); i++) -+ genradix_free(&c->quotas[i].table); -+} -+ -+void bch2_fs_quota_init(struct bch_fs *c) -+{ -+ unsigned i; -+ -+ for (i = 0; i < ARRAY_SIZE(c->quotas); i++) -+ mutex_init(&c->quotas[i].lock); -+} -+ -+static struct bch_sb_field_quota *bch2_sb_get_or_create_quota(struct bch_sb_handle *sb) -+{ -+ struct bch_sb_field_quota *sb_quota = bch2_sb_field_get(sb->sb, quota); -+ -+ if (sb_quota) -+ return sb_quota; -+ -+ sb_quota = bch2_sb_field_resize(sb, quota, sizeof(*sb_quota) / sizeof(u64)); -+ if (sb_quota) { -+ unsigned qtype, qc; -+ -+ for (qtype = 0; qtype < QTYP_NR; qtype++) -+ for (qc = 0; qc < Q_COUNTERS; qc++) -+ sb_quota->q[qtype].c[qc].timelimit = -+ cpu_to_le32(7 * 24 * 60 * 60); -+ } -+ -+ return sb_quota; -+} -+ -+static void bch2_sb_quota_read(struct bch_fs *c) -+{ -+ struct bch_sb_field_quota *sb_quota; -+ unsigned i, j; -+ -+ sb_quota = bch2_sb_field_get(c->disk_sb.sb, quota); -+ if (!sb_quota) -+ return; -+ -+ for (i = 0; i < QTYP_NR; i++) { -+ struct bch_memquota_type *q = &c->quotas[i]; -+ -+ for (j = 0; j < Q_COUNTERS; j++) { -+ q->limits[j].timelimit = -+ le32_to_cpu(sb_quota->q[i].c[j].timelimit); -+ q->limits[j].warnlimit = -+ le32_to_cpu(sb_quota->q[i].c[j].warnlimit); -+ } -+ } -+} -+ -+static int bch2_fs_quota_read_inode(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_inode_unpacked u; -+ struct bch_snapshot_tree s_t; -+ int ret; -+ -+ ret = bch2_snapshot_tree_lookup(trans, -+ bch2_snapshot_tree(c, k.k->p.snapshot), &s_t); -+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, -+ "%s: snapshot tree %u not found", __func__, -+ snapshot_t(c, k.k->p.snapshot)->tree); -+ if (ret) -+ return ret; -+ -+ if (!s_t.master_subvol) -+ goto advance; -+ -+ ret = bch2_inode_find_by_inum_nowarn_trans(trans, -+ (subvol_inum) { -+ le32_to_cpu(s_t.master_subvol), -+ k.k->p.offset, -+ }, &u); -+ /* -+ * Inode might be deleted in this snapshot - the easiest way to handle -+ * that is to just skip it here: -+ */ -+ if (bch2_err_matches(ret, ENOENT)) -+ goto advance; -+ -+ if (ret) -+ return ret; -+ -+ bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors, -+ KEY_TYPE_QUOTA_NOCHECK); -+ bch2_quota_acct(c, bch_qid(&u), Q_INO, 1, -+ KEY_TYPE_QUOTA_NOCHECK); -+advance: -+ bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos)); -+ return 0; -+} -+ -+int bch2_fs_quota_read(struct bch_fs *c) -+{ -+ struct bch_sb_field_quota *sb_quota; -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ mutex_lock(&c->sb_lock); -+ sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); -+ if (!sb_quota) { -+ mutex_unlock(&c->sb_lock); -+ return -BCH_ERR_ENOSPC_sb_quota; -+ } -+ -+ bch2_sb_quota_read(c); -+ mutex_unlock(&c->sb_lock); -+ -+ trans = bch2_trans_get(c); -+ -+ ret = for_each_btree_key2(trans, iter, BTREE_ID_quotas, -+ POS_MIN, BTREE_ITER_PREFETCH, k, -+ __bch2_quota_set(c, k, NULL)) ?: -+ for_each_btree_key2(trans, iter, BTREE_ID_inodes, -+ POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, -+ bch2_fs_quota_read_inode(trans, &iter, k)); -+ -+ bch2_trans_put(trans); -+ -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+/* Enable/disable/delete quotas for an entire filesystem: */ -+ -+static int bch2_quota_enable(struct super_block *sb, unsigned uflags) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ struct bch_sb_field_quota *sb_quota; -+ int ret = 0; -+ -+ if (sb->s_flags & SB_RDONLY) -+ return -EROFS; -+ -+ /* Accounting must be enabled at mount time: */ -+ if (uflags & (FS_QUOTA_UDQ_ACCT|FS_QUOTA_GDQ_ACCT|FS_QUOTA_PDQ_ACCT)) -+ return -EINVAL; -+ -+ /* Can't enable enforcement without accounting: */ -+ if ((uflags & FS_QUOTA_UDQ_ENFD) && !c->opts.usrquota) -+ return -EINVAL; -+ -+ if ((uflags & FS_QUOTA_GDQ_ENFD) && !c->opts.grpquota) -+ return -EINVAL; -+ -+ if (uflags & FS_QUOTA_PDQ_ENFD && !c->opts.prjquota) -+ return -EINVAL; -+ -+ mutex_lock(&c->sb_lock); -+ sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); -+ if (!sb_quota) { -+ ret = -BCH_ERR_ENOSPC_sb_quota; -+ goto unlock; -+ } -+ -+ if (uflags & FS_QUOTA_UDQ_ENFD) -+ SET_BCH_SB_USRQUOTA(c->disk_sb.sb, true); -+ -+ if (uflags & FS_QUOTA_GDQ_ENFD) -+ SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, true); -+ -+ if (uflags & FS_QUOTA_PDQ_ENFD) -+ SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, true); -+ -+ bch2_write_super(c); -+unlock: -+ mutex_unlock(&c->sb_lock); -+ -+ return bch2_err_class(ret); -+} -+ -+static int bch2_quota_disable(struct super_block *sb, unsigned uflags) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ -+ if (sb->s_flags & SB_RDONLY) -+ return -EROFS; -+ -+ mutex_lock(&c->sb_lock); -+ if (uflags & FS_QUOTA_UDQ_ENFD) -+ SET_BCH_SB_USRQUOTA(c->disk_sb.sb, false); -+ -+ if (uflags & FS_QUOTA_GDQ_ENFD) -+ SET_BCH_SB_GRPQUOTA(c->disk_sb.sb, false); -+ -+ if (uflags & FS_QUOTA_PDQ_ENFD) -+ SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, false); -+ -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ -+ return 0; -+} -+ -+static int bch2_quota_remove(struct super_block *sb, unsigned uflags) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ int ret; -+ -+ if (sb->s_flags & SB_RDONLY) -+ return -EROFS; -+ -+ if (uflags & FS_USER_QUOTA) { -+ if (c->opts.usrquota) -+ return -EINVAL; -+ -+ ret = bch2_btree_delete_range(c, BTREE_ID_quotas, -+ POS(QTYP_USR, 0), -+ POS(QTYP_USR, U64_MAX), -+ 0, NULL); -+ if (ret) -+ return ret; -+ } -+ -+ if (uflags & FS_GROUP_QUOTA) { -+ if (c->opts.grpquota) -+ return -EINVAL; -+ -+ ret = bch2_btree_delete_range(c, BTREE_ID_quotas, -+ POS(QTYP_GRP, 0), -+ POS(QTYP_GRP, U64_MAX), -+ 0, NULL); -+ if (ret) -+ return ret; -+ } -+ -+ if (uflags & FS_PROJ_QUOTA) { -+ if (c->opts.prjquota) -+ return -EINVAL; -+ -+ ret = bch2_btree_delete_range(c, BTREE_ID_quotas, -+ POS(QTYP_PRJ, 0), -+ POS(QTYP_PRJ, U64_MAX), -+ 0, NULL); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Return quota status information, such as enforcements, quota file inode -+ * numbers etc. -+ */ -+static int bch2_quota_get_state(struct super_block *sb, struct qc_state *state) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ unsigned qtypes = enabled_qtypes(c); -+ unsigned i; -+ -+ memset(state, 0, sizeof(*state)); -+ -+ for (i = 0; i < QTYP_NR; i++) { -+ state->s_state[i].flags |= QCI_SYSFILE; -+ -+ if (!(qtypes & (1 << i))) -+ continue; -+ -+ state->s_state[i].flags |= QCI_ACCT_ENABLED; -+ -+ state->s_state[i].spc_timelimit = c->quotas[i].limits[Q_SPC].timelimit; -+ state->s_state[i].spc_warnlimit = c->quotas[i].limits[Q_SPC].warnlimit; -+ -+ state->s_state[i].ino_timelimit = c->quotas[i].limits[Q_INO].timelimit; -+ state->s_state[i].ino_warnlimit = c->quotas[i].limits[Q_INO].warnlimit; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Adjust quota timers & warnings -+ */ -+static int bch2_quota_set_info(struct super_block *sb, int type, -+ struct qc_info *info) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ struct bch_sb_field_quota *sb_quota; -+ int ret = 0; -+ -+ if (0) { -+ struct printbuf buf = PRINTBUF; -+ -+ qc_info_to_text(&buf, info); -+ pr_info("setting:\n%s", buf.buf); -+ printbuf_exit(&buf); -+ } -+ -+ if (sb->s_flags & SB_RDONLY) -+ return -EROFS; -+ -+ if (type >= QTYP_NR) -+ return -EINVAL; -+ -+ if (!((1 << type) & enabled_qtypes(c))) -+ return -ESRCH; -+ -+ if (info->i_fieldmask & -+ ~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS)) -+ return -EINVAL; -+ -+ mutex_lock(&c->sb_lock); -+ sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); -+ if (!sb_quota) { -+ ret = -BCH_ERR_ENOSPC_sb_quota; -+ goto unlock; -+ } -+ -+ if (info->i_fieldmask & QC_SPC_TIMER) -+ sb_quota->q[type].c[Q_SPC].timelimit = -+ cpu_to_le32(info->i_spc_timelimit); -+ -+ if (info->i_fieldmask & QC_SPC_WARNS) -+ sb_quota->q[type].c[Q_SPC].warnlimit = -+ cpu_to_le32(info->i_spc_warnlimit); -+ -+ if (info->i_fieldmask & QC_INO_TIMER) -+ sb_quota->q[type].c[Q_INO].timelimit = -+ cpu_to_le32(info->i_ino_timelimit); -+ -+ if (info->i_fieldmask & QC_INO_WARNS) -+ sb_quota->q[type].c[Q_INO].warnlimit = -+ cpu_to_le32(info->i_ino_warnlimit); -+ -+ bch2_sb_quota_read(c); -+ -+ bch2_write_super(c); -+unlock: -+ mutex_unlock(&c->sb_lock); -+ -+ return bch2_err_class(ret); -+} -+ -+/* Get/set individual quotas: */ -+ -+static void __bch2_quota_get(struct qc_dqblk *dst, struct bch_memquota *src) -+{ -+ dst->d_space = src->c[Q_SPC].v << 9; -+ dst->d_spc_hardlimit = src->c[Q_SPC].hardlimit << 9; -+ dst->d_spc_softlimit = src->c[Q_SPC].softlimit << 9; -+ dst->d_spc_timer = src->c[Q_SPC].timer; -+ dst->d_spc_warns = src->c[Q_SPC].warns; -+ -+ dst->d_ino_count = src->c[Q_INO].v; -+ dst->d_ino_hardlimit = src->c[Q_INO].hardlimit; -+ dst->d_ino_softlimit = src->c[Q_INO].softlimit; -+ dst->d_ino_timer = src->c[Q_INO].timer; -+ dst->d_ino_warns = src->c[Q_INO].warns; -+} -+ -+static int bch2_get_quota(struct super_block *sb, struct kqid kqid, -+ struct qc_dqblk *qdq) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ struct bch_memquota_type *q = &c->quotas[kqid.type]; -+ qid_t qid = from_kqid(&init_user_ns, kqid); -+ struct bch_memquota *mq; -+ -+ memset(qdq, 0, sizeof(*qdq)); -+ -+ mutex_lock(&q->lock); -+ mq = genradix_ptr(&q->table, qid); -+ if (mq) -+ __bch2_quota_get(qdq, mq); -+ mutex_unlock(&q->lock); -+ -+ return 0; -+} -+ -+static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid, -+ struct qc_dqblk *qdq) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ struct bch_memquota_type *q = &c->quotas[kqid->type]; -+ qid_t qid = from_kqid(&init_user_ns, *kqid); -+ struct genradix_iter iter; -+ struct bch_memquota *mq; -+ int ret = 0; -+ -+ mutex_lock(&q->lock); -+ -+ genradix_for_each_from(&q->table, iter, mq, qid) -+ if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) { -+ __bch2_quota_get(qdq, mq); -+ *kqid = make_kqid(current_user_ns(), kqid->type, iter.pos); -+ goto found; -+ } -+ -+ ret = -ENOENT; -+found: -+ mutex_unlock(&q->lock); -+ return bch2_err_class(ret); -+} -+ -+static int bch2_set_quota_trans(struct btree_trans *trans, -+ struct bkey_i_quota *new_quota, -+ struct qc_dqblk *qdq) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_quotas, new_quota->k.p, -+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT); -+ ret = bkey_err(k); -+ if (unlikely(ret)) -+ return ret; -+ -+ if (k.k->type == KEY_TYPE_quota) -+ new_quota->v = *bkey_s_c_to_quota(k).v; -+ -+ if (qdq->d_fieldmask & QC_SPC_SOFT) -+ new_quota->v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9); -+ if (qdq->d_fieldmask & QC_SPC_HARD) -+ new_quota->v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9); -+ -+ if (qdq->d_fieldmask & QC_INO_SOFT) -+ new_quota->v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit); -+ if (qdq->d_fieldmask & QC_INO_HARD) -+ new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit); -+ -+ ret = bch2_trans_update(trans, &iter, &new_quota->k_i, 0); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int bch2_set_quota(struct super_block *sb, struct kqid qid, -+ struct qc_dqblk *qdq) -+{ -+ struct bch_fs *c = sb->s_fs_info; -+ struct bkey_i_quota new_quota; -+ int ret; -+ -+ if (0) { -+ struct printbuf buf = PRINTBUF; -+ -+ qc_dqblk_to_text(&buf, qdq); -+ pr_info("setting:\n%s", buf.buf); -+ printbuf_exit(&buf); -+ } -+ -+ if (sb->s_flags & SB_RDONLY) -+ return -EROFS; -+ -+ bkey_quota_init(&new_quota.k_i); -+ new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid)); -+ -+ ret = bch2_trans_do(c, NULL, NULL, 0, -+ bch2_set_quota_trans(trans, &new_quota, qdq)) ?: -+ __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq); -+ -+ return bch2_err_class(ret); -+} -+ -+const struct quotactl_ops bch2_quotactl_operations = { -+ .quota_enable = bch2_quota_enable, -+ .quota_disable = bch2_quota_disable, -+ .rm_xquota = bch2_quota_remove, -+ -+ .get_state = bch2_quota_get_state, -+ .set_info = bch2_quota_set_info, -+ -+ .get_dqblk = bch2_get_quota, -+ .get_nextdqblk = bch2_get_next_quota, -+ .set_dqblk = bch2_set_quota, -+}; -+ -+#endif /* CONFIG_BCACHEFS_QUOTA */ -diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h -new file mode 100644 -index 000000000000..884f601f41c4 ---- /dev/null -+++ b/fs/bcachefs/quota.h -@@ -0,0 +1,74 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_QUOTA_H -+#define _BCACHEFS_QUOTA_H -+ -+#include "inode.h" -+#include "quota_types.h" -+ -+enum bkey_invalid_flags; -+extern const struct bch_sb_field_ops bch_sb_field_ops_quota; -+ -+int bch2_quota_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+ -+#define bch2_bkey_ops_quota ((struct bkey_ops) { \ -+ .key_invalid = bch2_quota_invalid, \ -+ .val_to_text = bch2_quota_to_text, \ -+ .min_val_size = 32, \ -+}) -+ -+static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u) -+{ -+ return (struct bch_qid) { -+ .q[QTYP_USR] = u->bi_uid, -+ .q[QTYP_GRP] = u->bi_gid, -+ .q[QTYP_PRJ] = u->bi_project ? u->bi_project - 1 : 0, -+ }; -+} -+ -+static inline unsigned enabled_qtypes(struct bch_fs *c) -+{ -+ return ((c->opts.usrquota << QTYP_USR)| -+ (c->opts.grpquota << QTYP_GRP)| -+ (c->opts.prjquota << QTYP_PRJ)); -+} -+ -+#ifdef CONFIG_BCACHEFS_QUOTA -+ -+int bch2_quota_acct(struct bch_fs *, struct bch_qid, enum quota_counters, -+ s64, enum quota_acct_mode); -+ -+int bch2_quota_transfer(struct bch_fs *, unsigned, struct bch_qid, -+ struct bch_qid, u64, enum quota_acct_mode); -+ -+void bch2_fs_quota_exit(struct bch_fs *); -+void bch2_fs_quota_init(struct bch_fs *); -+int bch2_fs_quota_read(struct bch_fs *); -+ -+extern const struct quotactl_ops bch2_quotactl_operations; -+ -+#else -+ -+static inline int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid, -+ enum quota_counters counter, s64 v, -+ enum quota_acct_mode mode) -+{ -+ return 0; -+} -+ -+static inline int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes, -+ struct bch_qid dst, -+ struct bch_qid src, u64 space, -+ enum quota_acct_mode mode) -+{ -+ return 0; -+} -+ -+static inline void bch2_fs_quota_exit(struct bch_fs *c) {} -+static inline void bch2_fs_quota_init(struct bch_fs *c) {} -+static inline int bch2_fs_quota_read(struct bch_fs *c) { return 0; } -+ -+#endif -+ -+#endif /* _BCACHEFS_QUOTA_H */ -diff --git a/fs/bcachefs/quota_types.h b/fs/bcachefs/quota_types.h -new file mode 100644 -index 000000000000..6a136083d389 ---- /dev/null -+++ b/fs/bcachefs/quota_types.h -@@ -0,0 +1,43 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_QUOTA_TYPES_H -+#define _BCACHEFS_QUOTA_TYPES_H -+ -+#include -+ -+struct bch_qid { -+ u32 q[QTYP_NR]; -+}; -+ -+enum quota_acct_mode { -+ KEY_TYPE_QUOTA_PREALLOC, -+ KEY_TYPE_QUOTA_WARN, -+ KEY_TYPE_QUOTA_NOCHECK, -+}; -+ -+struct memquota_counter { -+ u64 v; -+ u64 hardlimit; -+ u64 softlimit; -+ s64 timer; -+ int warns; -+ int warning_issued; -+}; -+ -+struct bch_memquota { -+ struct memquota_counter c[Q_COUNTERS]; -+}; -+ -+typedef GENRADIX(struct bch_memquota) bch_memquota_table; -+ -+struct quota_limit { -+ u32 timelimit; -+ u32 warnlimit; -+}; -+ -+struct bch_memquota_type { -+ struct quota_limit limits[Q_COUNTERS]; -+ bch_memquota_table table; -+ struct mutex lock; -+}; -+ -+#endif /* _BCACHEFS_QUOTA_TYPES_H */ -diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c -new file mode 100644 -index 000000000000..3319190b8d9c ---- /dev/null -+++ b/fs/bcachefs/rebalance.c -@@ -0,0 +1,464 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "alloc_background.h" -+#include "alloc_foreground.h" -+#include "btree_iter.h" -+#include "btree_update.h" -+#include "btree_write_buffer.h" -+#include "buckets.h" -+#include "clock.h" -+#include "compress.h" -+#include "disk_groups.h" -+#include "errcode.h" -+#include "error.h" -+#include "inode.h" -+#include "move.h" -+#include "rebalance.h" -+#include "subvolume.h" -+#include "super-io.h" -+#include "trace.h" -+ -+#include -+#include -+#include -+ -+#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) -+ -+static const char * const bch2_rebalance_state_strs[] = { -+#define x(t) #t, -+ BCH_REBALANCE_STATES() -+ NULL -+#undef x -+}; -+ -+static int __bch2_set_rebalance_needs_scan(struct btree_trans *trans, u64 inum) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bkey_i_cookie *cookie; -+ u64 v; -+ int ret; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, -+ SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), -+ BTREE_ITER_INTENT); -+ k = bch2_btree_iter_peek_slot(&iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ v = k.k->type == KEY_TYPE_cookie -+ ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) -+ : 0; -+ -+ cookie = bch2_trans_kmalloc(trans, sizeof(*cookie)); -+ ret = PTR_ERR_OR_ZERO(cookie); -+ if (ret) -+ goto err; -+ -+ bkey_cookie_init(&cookie->k_i); -+ cookie->k.p = iter.pos; -+ cookie->v.cookie = cpu_to_le64(v + 1); -+ -+ ret = bch2_trans_update(trans, &iter, &cookie->k_i, 0); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) -+{ -+ int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, -+ __bch2_set_rebalance_needs_scan(trans, inum)); -+ rebalance_wakeup(c); -+ return ret; -+} -+ -+int bch2_set_fs_needs_rebalance(struct bch_fs *c) -+{ -+ return bch2_set_rebalance_needs_scan(c, 0); -+} -+ -+static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ u64 v; -+ int ret; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, -+ SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), -+ BTREE_ITER_INTENT); -+ k = bch2_btree_iter_peek_slot(&iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ v = k.k->type == KEY_TYPE_cookie -+ ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) -+ : 0; -+ -+ if (v == cookie) -+ ret = bch2_btree_delete_at(trans, &iter, 0); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static struct bkey_s_c next_rebalance_entry(struct btree_trans *trans, -+ struct btree_iter *work_iter) -+{ -+ return !kthread_should_stop() -+ ? bch2_btree_iter_peek(work_iter) -+ : bkey_s_c_null; -+} -+ -+static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0); -+ int ret = PTR_ERR_OR_ZERO(n); -+ if (ret) -+ return ret; -+ -+ extent_entry_drop(bkey_i_to_s(n), -+ (void *) bch2_bkey_rebalance_opts(bkey_i_to_s_c(n))); -+ return bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); -+} -+ -+static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, -+ struct bpos work_pos, -+ struct btree_iter *extent_iter, -+ struct data_update_opts *data_opts) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_s_c k; -+ -+ bch2_trans_iter_exit(trans, extent_iter); -+ bch2_trans_iter_init(trans, extent_iter, -+ work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink, -+ work_pos, -+ BTREE_ITER_ALL_SNAPSHOTS); -+ k = bch2_btree_iter_peek_slot(extent_iter); -+ if (bkey_err(k)) -+ return k; -+ -+ const struct bch_extent_rebalance *r = k.k ? bch2_bkey_rebalance_opts(k) : NULL; -+ if (!r) { -+ /* raced due to btree write buffer, nothing to do */ -+ return bkey_s_c_null; -+ } -+ -+ memset(data_opts, 0, sizeof(*data_opts)); -+ -+ data_opts->rewrite_ptrs = -+ bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression); -+ data_opts->target = r->target; -+ -+ if (!data_opts->rewrite_ptrs) { -+ /* -+ * device we would want to write to offline? devices in target -+ * changed? -+ * -+ * We'll now need a full scan before this extent is picked up -+ * again: -+ */ -+ int ret = bch2_bkey_clear_needs_rebalance(trans, extent_iter, k); -+ if (ret) -+ return bkey_s_c_err(ret); -+ return bkey_s_c_null; -+ } -+ -+ return k; -+} -+ -+noinline_for_stack -+static int do_rebalance_extent(struct moving_context *ctxt, -+ struct bpos work_pos, -+ struct btree_iter *extent_iter) -+{ -+ struct btree_trans *trans = ctxt->trans; -+ struct bch_fs *c = trans->c; -+ struct bch_fs_rebalance *r = &trans->c->rebalance; -+ struct data_update_opts data_opts; -+ struct bch_io_opts io_opts; -+ struct bkey_s_c k; -+ struct bkey_buf sk; -+ int ret; -+ -+ ctxt->stats = &r->work_stats; -+ r->state = BCH_REBALANCE_working; -+ -+ bch2_bkey_buf_init(&sk); -+ -+ ret = bkey_err(k = next_rebalance_extent(trans, work_pos, -+ extent_iter, &data_opts)); -+ if (ret || !k.k) -+ goto out; -+ -+ ret = bch2_move_get_io_opts_one(trans, &io_opts, k); -+ if (ret) -+ goto out; -+ -+ atomic64_add(k.k->size, &ctxt->stats->sectors_seen); -+ -+ /* -+ * The iterator gets unlocked by __bch2_read_extent - need to -+ * save a copy of @k elsewhere: -+ */ -+ bch2_bkey_buf_reassemble(&sk, c, k); -+ k = bkey_i_to_s_c(sk.k); -+ -+ ret = bch2_move_extent(ctxt, NULL, extent_iter, k, io_opts, data_opts); -+ if (ret) { -+ if (bch2_err_matches(ret, ENOMEM)) { -+ /* memory allocation failure, wait for some IO to finish */ -+ bch2_move_ctxt_wait_for_io(ctxt); -+ ret = -BCH_ERR_transaction_restart_nested; -+ } -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto out; -+ -+ /* skip it and continue, XXX signal failure */ -+ ret = 0; -+ } -+out: -+ bch2_bkey_buf_exit(&sk, c); -+ return ret; -+} -+ -+static bool rebalance_pred(struct bch_fs *c, void *arg, -+ struct bkey_s_c k, -+ struct bch_io_opts *io_opts, -+ struct data_update_opts *data_opts) -+{ -+ unsigned target, compression; -+ -+ if (k.k->p.inode) { -+ target = io_opts->background_target; -+ compression = io_opts->background_compression ?: io_opts->compression; -+ } else { -+ const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); -+ -+ target = r ? r->target : io_opts->background_target; -+ compression = r ? r->compression : -+ (io_opts->background_compression ?: io_opts->compression); -+ } -+ -+ data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression); -+ data_opts->target = target; -+ return data_opts->rewrite_ptrs != 0; -+} -+ -+static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie) -+{ -+ struct btree_trans *trans = ctxt->trans; -+ struct bch_fs_rebalance *r = &trans->c->rebalance; -+ int ret; -+ -+ bch2_move_stats_init(&r->scan_stats, "rebalance_scan"); -+ ctxt->stats = &r->scan_stats; -+ -+ if (!inum) { -+ r->scan_start = BBPOS_MIN; -+ r->scan_end = BBPOS_MAX; -+ } else { -+ r->scan_start = BBPOS(BTREE_ID_extents, POS(inum, 0)); -+ r->scan_end = BBPOS(BTREE_ID_extents, POS(inum, U64_MAX)); -+ } -+ -+ r->state = BCH_REBALANCE_scanning; -+ -+ ret = __bch2_move_data(ctxt, r->scan_start, r->scan_end, rebalance_pred, NULL) ?: -+ commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, -+ bch2_clear_rebalance_needs_scan(trans, inum, cookie)); -+ -+ bch2_move_stats_exit(&r->scan_stats, trans->c); -+ return ret; -+} -+ -+static void rebalance_wait(struct bch_fs *c) -+{ -+ struct bch_fs_rebalance *r = &c->rebalance; -+ struct io_clock *clock = &c->io_clock[WRITE]; -+ u64 now = atomic64_read(&clock->now); -+ u64 min_member_capacity = bch2_min_rw_member_capacity(c); -+ -+ if (min_member_capacity == U64_MAX) -+ min_member_capacity = 128 * 2048; -+ -+ r->wait_iotime_end = now + (min_member_capacity >> 6); -+ -+ if (r->state != BCH_REBALANCE_waiting) { -+ r->wait_iotime_start = now; -+ r->wait_wallclock_start = ktime_get_real_ns(); -+ r->state = BCH_REBALANCE_waiting; -+ } -+ -+ bch2_kthread_io_clock_wait(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT); -+} -+ -+static int do_rebalance(struct moving_context *ctxt) -+{ -+ struct btree_trans *trans = ctxt->trans; -+ struct bch_fs *c = trans->c; -+ struct bch_fs_rebalance *r = &c->rebalance; -+ struct btree_iter rebalance_work_iter, extent_iter = { NULL }; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ bch2_move_stats_init(&r->work_stats, "rebalance_work"); -+ bch2_move_stats_init(&r->scan_stats, "rebalance_scan"); -+ -+ bch2_trans_iter_init(trans, &rebalance_work_iter, -+ BTREE_ID_rebalance_work, POS_MIN, -+ BTREE_ITER_ALL_SNAPSHOTS); -+ -+ while (!bch2_move_ratelimit(ctxt) && -+ !kthread_wait_freezable(r->enabled)) { -+ bch2_trans_begin(trans); -+ -+ ret = bkey_err(k = next_rebalance_entry(trans, &rebalance_work_iter)); -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret || !k.k) -+ break; -+ -+ ret = k.k->type == KEY_TYPE_cookie -+ ? do_rebalance_scan(ctxt, k.k->p.inode, -+ le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)) -+ : do_rebalance_extent(ctxt, k.k->p, &extent_iter); -+ -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ continue; -+ if (ret) -+ break; -+ -+ bch2_btree_iter_advance(&rebalance_work_iter); -+ } -+ -+ bch2_trans_iter_exit(trans, &extent_iter); -+ bch2_trans_iter_exit(trans, &rebalance_work_iter); -+ bch2_move_stats_exit(&r->scan_stats, c); -+ -+ if (!ret && -+ !kthread_should_stop() && -+ !atomic64_read(&r->work_stats.sectors_seen) && -+ !atomic64_read(&r->scan_stats.sectors_seen)) { -+ bch2_trans_unlock_long(trans); -+ rebalance_wait(c); -+ } -+ -+ if (!bch2_err_matches(ret, EROFS)) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int bch2_rebalance_thread(void *arg) -+{ -+ struct bch_fs *c = arg; -+ struct bch_fs_rebalance *r = &c->rebalance; -+ struct moving_context ctxt; -+ int ret; -+ -+ set_freezable(); -+ -+ bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats, -+ writepoint_ptr(&c->rebalance_write_point), -+ true); -+ -+ while (!kthread_should_stop() && -+ !(ret = do_rebalance(&ctxt))) -+ ; -+ -+ bch2_moving_ctxt_exit(&ctxt); -+ -+ return 0; -+} -+ -+void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ struct bch_fs_rebalance *r = &c->rebalance; -+ -+ prt_str(out, bch2_rebalance_state_strs[r->state]); -+ prt_newline(out); -+ printbuf_indent_add(out, 2); -+ -+ switch (r->state) { -+ case BCH_REBALANCE_waiting: { -+ u64 now = atomic64_read(&c->io_clock[WRITE].now); -+ -+ prt_str(out, "io wait duration: "); -+ bch2_prt_human_readable_s64(out, r->wait_iotime_end - r->wait_iotime_start); -+ prt_newline(out); -+ -+ prt_str(out, "io wait remaining: "); -+ bch2_prt_human_readable_s64(out, r->wait_iotime_end - now); -+ prt_newline(out); -+ -+ prt_str(out, "duration waited: "); -+ bch2_pr_time_units(out, ktime_get_real_ns() - r->wait_wallclock_start); -+ prt_newline(out); -+ break; -+ } -+ case BCH_REBALANCE_working: -+ bch2_move_stats_to_text(out, &r->work_stats); -+ break; -+ case BCH_REBALANCE_scanning: -+ bch2_move_stats_to_text(out, &r->scan_stats); -+ break; -+ } -+ prt_newline(out); -+ printbuf_indent_sub(out, 2); -+} -+ -+void bch2_rebalance_stop(struct bch_fs *c) -+{ -+ struct task_struct *p; -+ -+ c->rebalance.pd.rate.rate = UINT_MAX; -+ bch2_ratelimit_reset(&c->rebalance.pd.rate); -+ -+ p = rcu_dereference_protected(c->rebalance.thread, 1); -+ c->rebalance.thread = NULL; -+ -+ if (p) { -+ /* for sychronizing with rebalance_wakeup() */ -+ synchronize_rcu(); -+ -+ kthread_stop(p); -+ put_task_struct(p); -+ } -+} -+ -+int bch2_rebalance_start(struct bch_fs *c) -+{ -+ struct task_struct *p; -+ int ret; -+ -+ if (c->rebalance.thread) -+ return 0; -+ -+ if (c->opts.nochanges) -+ return 0; -+ -+ p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); -+ ret = PTR_ERR_OR_ZERO(p); -+ if (ret) { -+ bch_err_msg(c, ret, "creating rebalance thread"); -+ return ret; -+ } -+ -+ get_task_struct(p); -+ rcu_assign_pointer(c->rebalance.thread, p); -+ wake_up_process(p); -+ return 0; -+} -+ -+void bch2_fs_rebalance_init(struct bch_fs *c) -+{ -+ bch2_pd_controller_init(&c->rebalance.pd); -+} -diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h -new file mode 100644 -index 000000000000..28a52638f16c ---- /dev/null -+++ b/fs/bcachefs/rebalance.h -@@ -0,0 +1,27 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_REBALANCE_H -+#define _BCACHEFS_REBALANCE_H -+ -+#include "rebalance_types.h" -+ -+int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); -+int bch2_set_fs_needs_rebalance(struct bch_fs *); -+ -+static inline void rebalance_wakeup(struct bch_fs *c) -+{ -+ struct task_struct *p; -+ -+ rcu_read_lock(); -+ p = rcu_dereference(c->rebalance.thread); -+ if (p) -+ wake_up_process(p); -+ rcu_read_unlock(); -+} -+ -+void bch2_rebalance_status_to_text(struct printbuf *, struct bch_fs *); -+ -+void bch2_rebalance_stop(struct bch_fs *); -+int bch2_rebalance_start(struct bch_fs *); -+void bch2_fs_rebalance_init(struct bch_fs *); -+ -+#endif /* _BCACHEFS_REBALANCE_H */ -diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h -new file mode 100644 -index 000000000000..0fffb536c1d0 ---- /dev/null -+++ b/fs/bcachefs/rebalance_types.h -@@ -0,0 +1,37 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_REBALANCE_TYPES_H -+#define _BCACHEFS_REBALANCE_TYPES_H -+ -+#include "bbpos_types.h" -+#include "move_types.h" -+ -+#define BCH_REBALANCE_STATES() \ -+ x(waiting) \ -+ x(working) \ -+ x(scanning) -+ -+enum bch_rebalance_states { -+#define x(t) BCH_REBALANCE_##t, -+ BCH_REBALANCE_STATES() -+#undef x -+}; -+ -+struct bch_fs_rebalance { -+ struct task_struct __rcu *thread; -+ struct bch_pd_controller pd; -+ -+ enum bch_rebalance_states state; -+ u64 wait_iotime_start; -+ u64 wait_iotime_end; -+ u64 wait_wallclock_start; -+ -+ struct bch_move_stats work_stats; -+ -+ struct bbpos scan_start; -+ struct bbpos scan_end; -+ struct bch_move_stats scan_stats; -+ -+ unsigned enabled:1; -+}; -+ -+#endif /* _BCACHEFS_REBALANCE_TYPES_H */ -diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c -new file mode 100644 -index 000000000000..9c30500ce920 ---- /dev/null -+++ b/fs/bcachefs/recovery.c -@@ -0,0 +1,1057 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "backpointers.h" -+#include "bkey_buf.h" -+#include "alloc_background.h" -+#include "btree_gc.h" -+#include "btree_journal_iter.h" -+#include "btree_update.h" -+#include "btree_update_interior.h" -+#include "btree_io.h" -+#include "buckets.h" -+#include "dirent.h" -+#include "ec.h" -+#include "errcode.h" -+#include "error.h" -+#include "fs-common.h" -+#include "fsck.h" -+#include "journal_io.h" -+#include "journal_reclaim.h" -+#include "journal_seq_blacklist.h" -+#include "lru.h" -+#include "logged_ops.h" -+#include "move.h" -+#include "quota.h" -+#include "rebalance.h" -+#include "recovery.h" -+#include "replicas.h" -+#include "sb-clean.h" -+#include "snapshot.h" -+#include "subvolume.h" -+#include "super-io.h" -+ -+#include -+#include -+ -+#define QSTR(n) { { { .len = strlen(n) } }, .name = n } -+ -+static bool btree_id_is_alloc(enum btree_id id) -+{ -+ switch (id) { -+ case BTREE_ID_alloc: -+ case BTREE_ID_backpointers: -+ case BTREE_ID_need_discard: -+ case BTREE_ID_freespace: -+ case BTREE_ID_bucket_gens: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+/* for -o reconstruct_alloc: */ -+static void drop_alloc_keys(struct journal_keys *keys) -+{ -+ size_t src, dst; -+ -+ for (src = 0, dst = 0; src < keys->nr; src++) -+ if (!btree_id_is_alloc(keys->d[src].btree_id)) -+ keys->d[dst++] = keys->d[src]; -+ -+ keys->nr = dst; -+} -+ -+/* -+ * Btree node pointers have a field to stack a pointer to the in memory btree -+ * node; we need to zero out this field when reading in btree nodes, or when -+ * reading in keys from the journal: -+ */ -+static void zero_out_btree_mem_ptr(struct journal_keys *keys) -+{ -+ struct journal_key *i; -+ -+ for (i = keys->d; i < keys->d + keys->nr; i++) -+ if (i->k->k.type == KEY_TYPE_btree_ptr_v2) -+ bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0; -+} -+ -+/* journal replay: */ -+ -+static void replay_now_at(struct journal *j, u64 seq) -+{ -+ BUG_ON(seq < j->replay_journal_seq); -+ -+ seq = min(seq, j->replay_journal_seq_end); -+ -+ while (j->replay_journal_seq < seq) -+ bch2_journal_pin_put(j, j->replay_journal_seq++); -+} -+ -+static int bch2_journal_replay_key(struct btree_trans *trans, -+ struct journal_key *k) -+{ -+ struct btree_iter iter; -+ unsigned iter_flags = -+ BTREE_ITER_INTENT| -+ BTREE_ITER_NOT_EXTENTS; -+ unsigned update_flags = BTREE_TRIGGER_NORUN; -+ int ret; -+ -+ /* -+ * BTREE_UPDATE_KEY_CACHE_RECLAIM disables key cache lookup/update to -+ * keep the key cache coherent with the underlying btree. Nothing -+ * besides the allocator is doing updates yet so we don't need key cache -+ * coherency for non-alloc btrees, and key cache fills for snapshots -+ * btrees use BTREE_ITER_FILTER_SNAPSHOTS, which isn't available until -+ * the snapshots recovery pass runs. -+ */ -+ if (!k->level && k->btree_id == BTREE_ID_alloc) -+ iter_flags |= BTREE_ITER_CACHED; -+ else -+ update_flags |= BTREE_UPDATE_KEY_CACHE_RECLAIM; -+ -+ bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, -+ BTREE_MAX_DEPTH, k->level, -+ iter_flags); -+ ret = bch2_btree_iter_traverse(&iter); -+ if (ret) -+ goto out; -+ -+ /* Must be checked with btree locked: */ -+ if (k->overwritten) -+ goto out; -+ -+ ret = bch2_trans_update(trans, &iter, k->k, update_flags); -+out: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int journal_sort_seq_cmp(const void *_l, const void *_r) -+{ -+ const struct journal_key *l = *((const struct journal_key **)_l); -+ const struct journal_key *r = *((const struct journal_key **)_r); -+ -+ return cmp_int(l->journal_seq, r->journal_seq); -+} -+ -+static int bch2_journal_replay(struct bch_fs *c) -+{ -+ struct journal_keys *keys = &c->journal_keys; -+ struct journal_key **keys_sorted, *k; -+ struct journal *j = &c->journal; -+ u64 start_seq = c->journal_replay_seq_start; -+ u64 end_seq = c->journal_replay_seq_start; -+ size_t i; -+ int ret; -+ -+ move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr); -+ keys->gap = keys->nr; -+ -+ keys_sorted = kvmalloc_array(keys->nr, sizeof(*keys_sorted), GFP_KERNEL); -+ if (!keys_sorted) -+ return -BCH_ERR_ENOMEM_journal_replay; -+ -+ for (i = 0; i < keys->nr; i++) -+ keys_sorted[i] = &keys->d[i]; -+ -+ sort(keys_sorted, keys->nr, -+ sizeof(keys_sorted[0]), -+ journal_sort_seq_cmp, NULL); -+ -+ if (keys->nr) { -+ ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", -+ keys->nr, start_seq, end_seq); -+ if (ret) -+ goto err; -+ } -+ -+ for (i = 0; i < keys->nr; i++) { -+ k = keys_sorted[i]; -+ -+ cond_resched(); -+ -+ replay_now_at(j, k->journal_seq); -+ -+ ret = bch2_trans_do(c, NULL, NULL, -+ BTREE_INSERT_LAZY_RW| -+ BTREE_INSERT_NOFAIL| -+ (!k->allocated -+ ? BTREE_INSERT_JOURNAL_REPLAY|BCH_WATERMARK_reclaim -+ : 0), -+ bch2_journal_replay_key(trans, k)); -+ if (ret) { -+ bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s", -+ bch2_btree_id_str(k->btree_id), k->level, bch2_err_str(ret)); -+ goto err; -+ } -+ } -+ -+ replay_now_at(j, j->replay_journal_seq_end); -+ j->replay_journal_seq = 0; -+ -+ bch2_journal_set_replay_done(j); -+ bch2_journal_flush_all_pins(j); -+ ret = bch2_journal_error(j); -+ -+ if (keys->nr && !ret) -+ bch2_journal_log_msg(c, "journal replay finished"); -+err: -+ kvfree(keys_sorted); -+ -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+/* journal replay early: */ -+ -+static int journal_replay_entry_early(struct bch_fs *c, -+ struct jset_entry *entry) -+{ -+ int ret = 0; -+ -+ switch (entry->type) { -+ case BCH_JSET_ENTRY_btree_root: { -+ struct btree_root *r; -+ -+ while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR) { -+ ret = darray_push(&c->btree_roots_extra, (struct btree_root) { NULL }); -+ if (ret) -+ return ret; -+ } -+ -+ r = bch2_btree_id_root(c, entry->btree_id); -+ -+ if (entry->u64s) { -+ r->level = entry->level; -+ bkey_copy(&r->key, (struct bkey_i *) entry->start); -+ r->error = 0; -+ } else { -+ r->error = -EIO; -+ } -+ r->alive = true; -+ break; -+ } -+ case BCH_JSET_ENTRY_usage: { -+ struct jset_entry_usage *u = -+ container_of(entry, struct jset_entry_usage, entry); -+ -+ switch (entry->btree_id) { -+ case BCH_FS_USAGE_reserved: -+ if (entry->level < BCH_REPLICAS_MAX) -+ c->usage_base->persistent_reserved[entry->level] = -+ le64_to_cpu(u->v); -+ break; -+ case BCH_FS_USAGE_inodes: -+ c->usage_base->nr_inodes = le64_to_cpu(u->v); -+ break; -+ case BCH_FS_USAGE_key_version: -+ atomic64_set(&c->key_version, -+ le64_to_cpu(u->v)); -+ break; -+ } -+ -+ break; -+ } -+ case BCH_JSET_ENTRY_data_usage: { -+ struct jset_entry_data_usage *u = -+ container_of(entry, struct jset_entry_data_usage, entry); -+ -+ ret = bch2_replicas_set_usage(c, &u->r, -+ le64_to_cpu(u->v)); -+ break; -+ } -+ case BCH_JSET_ENTRY_dev_usage: { -+ struct jset_entry_dev_usage *u = -+ container_of(entry, struct jset_entry_dev_usage, entry); -+ struct bch_dev *ca = bch_dev_bkey_exists(c, le32_to_cpu(u->dev)); -+ unsigned i, nr_types = jset_entry_dev_usage_nr_types(u); -+ -+ ca->usage_base->buckets_ec = le64_to_cpu(u->buckets_ec); -+ -+ for (i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) { -+ ca->usage_base->d[i].buckets = le64_to_cpu(u->d[i].buckets); -+ ca->usage_base->d[i].sectors = le64_to_cpu(u->d[i].sectors); -+ ca->usage_base->d[i].fragmented = le64_to_cpu(u->d[i].fragmented); -+ } -+ -+ break; -+ } -+ case BCH_JSET_ENTRY_blacklist: { -+ struct jset_entry_blacklist *bl_entry = -+ container_of(entry, struct jset_entry_blacklist, entry); -+ -+ ret = bch2_journal_seq_blacklist_add(c, -+ le64_to_cpu(bl_entry->seq), -+ le64_to_cpu(bl_entry->seq) + 1); -+ break; -+ } -+ case BCH_JSET_ENTRY_blacklist_v2: { -+ struct jset_entry_blacklist_v2 *bl_entry = -+ container_of(entry, struct jset_entry_blacklist_v2, entry); -+ -+ ret = bch2_journal_seq_blacklist_add(c, -+ le64_to_cpu(bl_entry->start), -+ le64_to_cpu(bl_entry->end) + 1); -+ break; -+ } -+ case BCH_JSET_ENTRY_clock: { -+ struct jset_entry_clock *clock = -+ container_of(entry, struct jset_entry_clock, entry); -+ -+ atomic64_set(&c->io_clock[clock->rw].now, le64_to_cpu(clock->time)); -+ } -+ } -+ -+ return ret; -+} -+ -+static int journal_replay_early(struct bch_fs *c, -+ struct bch_sb_field_clean *clean) -+{ -+ struct jset_entry *entry; -+ int ret; -+ -+ if (clean) { -+ for (entry = clean->start; -+ entry != vstruct_end(&clean->field); -+ entry = vstruct_next(entry)) { -+ ret = journal_replay_entry_early(c, entry); -+ if (ret) -+ return ret; -+ } -+ } else { -+ struct genradix_iter iter; -+ struct journal_replay *i, **_i; -+ -+ genradix_for_each(&c->journal_entries, iter, _i) { -+ i = *_i; -+ -+ if (!i || i->ignore) -+ continue; -+ -+ vstruct_for_each(&i->j, entry) { -+ ret = journal_replay_entry_early(c, entry); -+ if (ret) -+ return ret; -+ } -+ } -+ } -+ -+ bch2_fs_usage_initialize(c); -+ -+ return 0; -+} -+ -+/* sb clean section: */ -+ -+static int read_btree_roots(struct bch_fs *c) -+{ -+ unsigned i; -+ int ret = 0; -+ -+ for (i = 0; i < btree_id_nr_alive(c); i++) { -+ struct btree_root *r = bch2_btree_id_root(c, i); -+ -+ if (!r->alive) -+ continue; -+ -+ if (btree_id_is_alloc(i) && -+ c->opts.reconstruct_alloc) { -+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); -+ continue; -+ } -+ -+ if (r->error) { -+ __fsck_err(c, -+ btree_id_is_alloc(i) -+ ? FSCK_CAN_IGNORE : 0, -+ btree_root_bkey_invalid, -+ "invalid btree root %s", -+ bch2_btree_id_str(i)); -+ if (i == BTREE_ID_alloc) -+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); -+ } -+ -+ ret = bch2_btree_root_read(c, i, &r->key, r->level); -+ if (ret) { -+ fsck_err(c, -+ btree_root_read_error, -+ "error reading btree root %s", -+ bch2_btree_id_str(i)); -+ if (btree_id_is_alloc(i)) -+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); -+ ret = 0; -+ } -+ } -+ -+ for (i = 0; i < BTREE_ID_NR; i++) { -+ struct btree_root *r = bch2_btree_id_root(c, i); -+ -+ if (!r->b) { -+ r->alive = false; -+ r->level = 0; -+ bch2_btree_root_alloc(c, i); -+ } -+ } -+fsck_err: -+ return ret; -+} -+ -+static int bch2_initialize_subvolumes(struct bch_fs *c) -+{ -+ struct bkey_i_snapshot_tree root_tree; -+ struct bkey_i_snapshot root_snapshot; -+ struct bkey_i_subvolume root_volume; -+ int ret; -+ -+ bkey_snapshot_tree_init(&root_tree.k_i); -+ root_tree.k.p.offset = 1; -+ root_tree.v.master_subvol = cpu_to_le32(1); -+ root_tree.v.root_snapshot = cpu_to_le32(U32_MAX); -+ -+ bkey_snapshot_init(&root_snapshot.k_i); -+ root_snapshot.k.p.offset = U32_MAX; -+ root_snapshot.v.flags = 0; -+ root_snapshot.v.parent = 0; -+ root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL); -+ root_snapshot.v.tree = cpu_to_le32(1); -+ SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true); -+ -+ bkey_subvolume_init(&root_volume.k_i); -+ root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL; -+ root_volume.v.flags = 0; -+ root_volume.v.snapshot = cpu_to_le32(U32_MAX); -+ root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO); -+ -+ ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?: -+ bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?: -+ bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0); -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bch_inode_unpacked inode; -+ int ret; -+ -+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, -+ SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0); -+ ret = bkey_err(k); -+ if (ret) -+ return ret; -+ -+ if (!bkey_is_inode(k.k)) { -+ bch_err(trans->c, "root inode not found"); -+ ret = -BCH_ERR_ENOENT_inode; -+ goto err; -+ } -+ -+ ret = bch2_inode_unpack(k, &inode); -+ BUG_ON(ret); -+ -+ inode.bi_subvol = BCACHEFS_ROOT_SUBVOL; -+ -+ ret = bch2_inode_write(trans, &iter, &inode); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+/* set bi_subvol on root inode */ -+noinline_for_stack -+static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) -+{ -+ int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW, -+ __bch2_fs_upgrade_for_subvolumes(trans)); -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+const char * const bch2_recovery_passes[] = { -+#define x(_fn, _when) #_fn, -+ BCH_RECOVERY_PASSES() -+#undef x -+ NULL -+}; -+ -+static int bch2_check_allocations(struct bch_fs *c) -+{ -+ return bch2_gc(c, true, c->opts.norecovery); -+} -+ -+static int bch2_set_may_go_rw(struct bch_fs *c) -+{ -+ set_bit(BCH_FS_MAY_GO_RW, &c->flags); -+ return 0; -+} -+ -+struct recovery_pass_fn { -+ int (*fn)(struct bch_fs *); -+ unsigned when; -+}; -+ -+static struct recovery_pass_fn recovery_pass_fns[] = { -+#define x(_fn, _when) { .fn = bch2_##_fn, .when = _when }, -+ BCH_RECOVERY_PASSES() -+#undef x -+}; -+ -+static void check_version_upgrade(struct bch_fs *c) -+{ -+ unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version); -+ unsigned latest_version = bcachefs_metadata_version_current; -+ unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; -+ unsigned new_version = 0; -+ u64 recovery_passes; -+ -+ if (old_version < bcachefs_metadata_required_upgrade_below) { -+ if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || -+ latest_compatible < bcachefs_metadata_required_upgrade_below) -+ new_version = latest_version; -+ else -+ new_version = latest_compatible; -+ } else { -+ switch (c->opts.version_upgrade) { -+ case BCH_VERSION_UPGRADE_compatible: -+ new_version = latest_compatible; -+ break; -+ case BCH_VERSION_UPGRADE_incompatible: -+ new_version = latest_version; -+ break; -+ case BCH_VERSION_UPGRADE_none: -+ new_version = old_version; -+ break; -+ } -+ } -+ -+ if (new_version > old_version) { -+ struct printbuf buf = PRINTBUF; -+ -+ if (old_version < bcachefs_metadata_required_upgrade_below) -+ prt_str(&buf, "Version upgrade required:\n"); -+ -+ if (old_version != c->sb.version) { -+ prt_str(&buf, "Version upgrade from "); -+ bch2_version_to_text(&buf, c->sb.version_upgrade_complete); -+ prt_str(&buf, " to "); -+ bch2_version_to_text(&buf, c->sb.version); -+ prt_str(&buf, " incomplete\n"); -+ } -+ -+ prt_printf(&buf, "Doing %s version upgrade from ", -+ BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version) -+ ? "incompatible" : "compatible"); -+ bch2_version_to_text(&buf, old_version); -+ prt_str(&buf, " to "); -+ bch2_version_to_text(&buf, new_version); -+ prt_newline(&buf); -+ -+ recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version); -+ if (recovery_passes) { -+ if ((recovery_passes & RECOVERY_PASS_ALL_FSCK) == RECOVERY_PASS_ALL_FSCK) -+ prt_str(&buf, "fsck required"); -+ else { -+ prt_str(&buf, "running recovery passes: "); -+ prt_bitflags(&buf, bch2_recovery_passes, recovery_passes); -+ } -+ -+ c->recovery_passes_explicit |= recovery_passes; -+ c->opts.fix_errors = FSCK_FIX_yes; -+ } -+ -+ bch_info(c, "%s", buf.buf); -+ -+ mutex_lock(&c->sb_lock); -+ bch2_sb_upgrade(c, new_version); -+ mutex_unlock(&c->sb_lock); -+ -+ printbuf_exit(&buf); -+ } -+} -+ -+u64 bch2_fsck_recovery_passes(void) -+{ -+ u64 ret = 0; -+ -+ for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) -+ if (recovery_pass_fns[i].when & PASS_FSCK) -+ ret |= BIT_ULL(i); -+ return ret; -+} -+ -+static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) -+{ -+ struct recovery_pass_fn *p = recovery_pass_fns + c->curr_recovery_pass; -+ -+ if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read) -+ return false; -+ if (c->recovery_passes_explicit & BIT_ULL(pass)) -+ return true; -+ if ((p->when & PASS_FSCK) && c->opts.fsck) -+ return true; -+ if ((p->when & PASS_UNCLEAN) && !c->sb.clean) -+ return true; -+ if (p->when & PASS_ALWAYS) -+ return true; -+ return false; -+} -+ -+static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) -+{ -+ int ret; -+ -+ c->curr_recovery_pass = pass; -+ -+ if (should_run_recovery_pass(c, pass)) { -+ struct recovery_pass_fn *p = recovery_pass_fns + pass; -+ -+ if (!(p->when & PASS_SILENT)) -+ printk(KERN_INFO bch2_log_msg(c, "%s..."), -+ bch2_recovery_passes[pass]); -+ ret = p->fn(c); -+ if (ret) -+ return ret; -+ if (!(p->when & PASS_SILENT)) -+ printk(KERN_CONT " done\n"); -+ -+ c->recovery_passes_complete |= BIT_ULL(pass); -+ } -+ -+ return 0; -+} -+ -+static int bch2_run_recovery_passes(struct bch_fs *c) -+{ -+ int ret = 0; -+ -+ while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { -+ ret = bch2_run_recovery_pass(c, c->curr_recovery_pass); -+ if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) -+ continue; -+ if (ret) -+ break; -+ c->curr_recovery_pass++; -+ } -+ -+ return ret; -+} -+ -+int bch2_fs_recovery(struct bch_fs *c) -+{ -+ struct bch_sb_field_clean *clean = NULL; -+ struct jset *last_journal_entry = NULL; -+ u64 last_seq = 0, blacklist_seq, journal_seq; -+ bool write_sb = false; -+ int ret = 0; -+ -+ if (c->sb.clean) { -+ clean = bch2_read_superblock_clean(c); -+ ret = PTR_ERR_OR_ZERO(clean); -+ if (ret) -+ goto err; -+ -+ bch_info(c, "recovering from clean shutdown, journal seq %llu", -+ le64_to_cpu(clean->journal_seq)); -+ } else { -+ bch_info(c, "recovering from unclean shutdown"); -+ } -+ -+ if (!(c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))) { -+ bch_err(c, "feature new_extent_overwrite not set, filesystem no longer supported"); -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ if (!c->sb.clean && -+ !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) { -+ bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix"); -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ if (c->opts.fsck || !(c->opts.nochanges && c->opts.norecovery)) -+ check_version_upgrade(c); -+ -+ if (c->opts.fsck && c->opts.norecovery) { -+ bch_err(c, "cannot select both norecovery and fsck"); -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ ret = bch2_blacklist_table_initialize(c); -+ if (ret) { -+ bch_err(c, "error initializing blacklist table"); -+ goto err; -+ } -+ -+ if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) { -+ struct genradix_iter iter; -+ struct journal_replay **i; -+ -+ bch_verbose(c, "starting journal read"); -+ ret = bch2_journal_read(c, &last_seq, &blacklist_seq, &journal_seq); -+ if (ret) -+ goto err; -+ -+ /* -+ * note: cmd_list_journal needs the blacklist table fully up to date so -+ * it can asterisk ignored journal entries: -+ */ -+ if (c->opts.read_journal_only) -+ goto out; -+ -+ genradix_for_each_reverse(&c->journal_entries, iter, i) -+ if (*i && !(*i)->ignore) { -+ last_journal_entry = &(*i)->j; -+ break; -+ } -+ -+ if (mustfix_fsck_err_on(c->sb.clean && -+ last_journal_entry && -+ !journal_entry_empty(last_journal_entry), c, -+ clean_but_journal_not_empty, -+ "filesystem marked clean but journal not empty")) { -+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); -+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false); -+ c->sb.clean = false; -+ } -+ -+ if (!last_journal_entry) { -+ fsck_err_on(!c->sb.clean, c, -+ dirty_but_no_journal_entries, -+ "no journal entries found"); -+ if (clean) -+ goto use_clean; -+ -+ genradix_for_each_reverse(&c->journal_entries, iter, i) -+ if (*i) { -+ last_journal_entry = &(*i)->j; -+ (*i)->ignore = false; -+ /* -+ * This was probably a NO_FLUSH entry, -+ * so last_seq was garbage - but we know -+ * we're only using a single journal -+ * entry, set it here: -+ */ -+ (*i)->j.last_seq = (*i)->j.seq; -+ break; -+ } -+ } -+ -+ ret = bch2_journal_keys_sort(c); -+ if (ret) -+ goto err; -+ -+ if (c->sb.clean && last_journal_entry) { -+ ret = bch2_verify_superblock_clean(c, &clean, -+ last_journal_entry); -+ if (ret) -+ goto err; -+ } -+ } else { -+use_clean: -+ if (!clean) { -+ bch_err(c, "no superblock clean section found"); -+ ret = -BCH_ERR_fsck_repair_impossible; -+ goto err; -+ -+ } -+ blacklist_seq = journal_seq = le64_to_cpu(clean->journal_seq) + 1; -+ } -+ -+ c->journal_replay_seq_start = last_seq; -+ c->journal_replay_seq_end = blacklist_seq - 1; -+ -+ if (c->opts.reconstruct_alloc) { -+ c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); -+ drop_alloc_keys(&c->journal_keys); -+ } -+ -+ zero_out_btree_mem_ptr(&c->journal_keys); -+ -+ ret = journal_replay_early(c, clean); -+ if (ret) -+ goto err; -+ -+ /* -+ * After an unclean shutdown, skip then next few journal sequence -+ * numbers as they may have been referenced by btree writes that -+ * happened before their corresponding journal writes - those btree -+ * writes need to be ignored, by skipping and blacklisting the next few -+ * journal sequence numbers: -+ */ -+ if (!c->sb.clean) -+ journal_seq += 8; -+ -+ if (blacklist_seq != journal_seq) { -+ ret = bch2_journal_log_msg(c, "blacklisting entries %llu-%llu", -+ blacklist_seq, journal_seq) ?: -+ bch2_journal_seq_blacklist_add(c, -+ blacklist_seq, journal_seq); -+ if (ret) { -+ bch_err(c, "error creating new journal seq blacklist entry"); -+ goto err; -+ } -+ } -+ -+ ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu", -+ journal_seq, last_seq, blacklist_seq - 1) ?: -+ bch2_fs_journal_start(&c->journal, journal_seq); -+ if (ret) -+ goto err; -+ -+ if (c->opts.reconstruct_alloc) -+ bch2_journal_log_msg(c, "dropping alloc info"); -+ -+ /* -+ * Skip past versions that might have possibly been used (as nonces), -+ * but hadn't had their pointers written: -+ */ -+ if (c->sb.encryption_type && !c->sb.clean) -+ atomic64_add(1 << 16, &c->key_version); -+ -+ ret = read_btree_roots(c); -+ if (ret) -+ goto err; -+ -+ if (c->opts.fsck && -+ (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) || -+ BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb))) -+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); -+ -+ ret = bch2_run_recovery_passes(c); -+ if (ret) -+ goto err; -+ -+ /* If we fixed errors, verify that fs is actually clean now: */ -+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && -+ test_bit(BCH_FS_ERRORS_FIXED, &c->flags) && -+ !test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags) && -+ !test_bit(BCH_FS_ERROR, &c->flags)) { -+ bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean"); -+ clear_bit(BCH_FS_ERRORS_FIXED, &c->flags); -+ -+ c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; -+ -+ ret = bch2_run_recovery_passes(c); -+ if (ret) -+ goto err; -+ -+ if (test_bit(BCH_FS_ERRORS_FIXED, &c->flags) || -+ test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) { -+ bch_err(c, "Second fsck run was not clean"); -+ set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags); -+ } -+ -+ set_bit(BCH_FS_ERRORS_FIXED, &c->flags); -+ } -+ -+ if (enabled_qtypes(c)) { -+ bch_verbose(c, "reading quotas"); -+ ret = bch2_fs_quota_read(c); -+ if (ret) -+ goto err; -+ bch_verbose(c, "quotas done"); -+ } -+ -+ mutex_lock(&c->sb_lock); -+ if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != c->sb.version) { -+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, c->sb.version); -+ write_sb = true; -+ } -+ -+ if (!test_bit(BCH_FS_ERROR, &c->flags)) { -+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info); -+ write_sb = true; -+ } -+ -+ if (c->opts.fsck && -+ !test_bit(BCH_FS_ERROR, &c->flags) && -+ !test_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags)) { -+ SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0); -+ SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 0); -+ write_sb = true; -+ } -+ -+ if (write_sb) -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ -+ if (!(c->sb.compat & (1ULL << BCH_COMPAT_extents_above_btree_updates_done)) || -+ c->sb.version_min < bcachefs_metadata_version_btree_ptr_sectors_written) { -+ struct bch_move_stats stats; -+ -+ bch2_move_stats_init(&stats, "recovery"); -+ -+ bch_info(c, "scanning for old btree nodes"); -+ ret = bch2_fs_read_write(c) ?: -+ bch2_scan_old_btree_nodes(c, &stats); -+ if (ret) -+ goto err; -+ bch_info(c, "scanning for old btree nodes done"); -+ } -+ -+ if (c->journal_seq_blacklist_table && -+ c->journal_seq_blacklist_table->nr > 128) -+ queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work); -+ -+ ret = 0; -+out: -+ set_bit(BCH_FS_FSCK_DONE, &c->flags); -+ bch2_flush_fsck_errs(c); -+ -+ if (!c->opts.keep_journal && -+ test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) { -+ bch2_journal_keys_free(&c->journal_keys); -+ bch2_journal_entries_free(c); -+ } -+ kfree(clean); -+ -+ if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) { -+ bch2_fs_read_write_early(c); -+ bch2_delete_dead_snapshots_async(c); -+ } -+ -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+err: -+fsck_err: -+ bch2_fs_emergency_read_only(c); -+ goto out; -+} -+ -+int bch2_fs_initialize(struct bch_fs *c) -+{ -+ struct bch_inode_unpacked root_inode, lostfound_inode; -+ struct bkey_inode_buf packed_inode; -+ struct qstr lostfound = QSTR("lost+found"); -+ struct bch_dev *ca; -+ unsigned i; -+ int ret; -+ -+ bch_notice(c, "initializing new filesystem"); -+ -+ mutex_lock(&c->sb_lock); -+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); -+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); -+ -+ bch2_sb_maybe_downgrade(c); -+ -+ if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { -+ bch2_sb_upgrade(c, bcachefs_metadata_version_current); -+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); -+ bch2_write_super(c); -+ } -+ mutex_unlock(&c->sb_lock); -+ -+ c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns); -+ set_bit(BCH_FS_MAY_GO_RW, &c->flags); -+ set_bit(BCH_FS_FSCK_DONE, &c->flags); -+ -+ for (i = 0; i < BTREE_ID_NR; i++) -+ bch2_btree_root_alloc(c, i); -+ -+ for_each_member_device(ca, c, i) -+ bch2_dev_usage_init(ca); -+ -+ ret = bch2_fs_journal_alloc(c); -+ if (ret) -+ goto err; -+ -+ /* -+ * journal_res_get() will crash if called before this has -+ * set up the journal.pin FIFO and journal.cur pointer: -+ */ -+ bch2_fs_journal_start(&c->journal, 1); -+ bch2_journal_set_replay_done(&c->journal); -+ -+ ret = bch2_fs_read_write_early(c); -+ if (ret) -+ goto err; -+ -+ /* -+ * Write out the superblock and journal buckets, now that we can do -+ * btree updates -+ */ -+ bch_verbose(c, "marking superblocks"); -+ ret = bch2_trans_mark_dev_sbs(c); -+ bch_err_msg(c, ret, "marking superblocks"); -+ if (ret) -+ goto err; -+ -+ for_each_online_member(ca, c, i) -+ ca->new_fs_bucket_idx = 0; -+ -+ ret = bch2_fs_freespace_init(c); -+ if (ret) -+ goto err; -+ -+ ret = bch2_initialize_subvolumes(c); -+ if (ret) -+ goto err; -+ -+ bch_verbose(c, "reading snapshots table"); -+ ret = bch2_snapshots_read(c); -+ if (ret) -+ goto err; -+ bch_verbose(c, "reading snapshots done"); -+ -+ bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL); -+ root_inode.bi_inum = BCACHEFS_ROOT_INO; -+ root_inode.bi_subvol = BCACHEFS_ROOT_SUBVOL; -+ bch2_inode_pack(&packed_inode, &root_inode); -+ packed_inode.inode.k.p.snapshot = U32_MAX; -+ -+ ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, 0); -+ if (ret) { -+ bch_err_msg(c, ret, "creating root directory"); -+ goto err; -+ } -+ -+ bch2_inode_init_early(c, &lostfound_inode); -+ -+ ret = bch2_trans_do(c, NULL, NULL, 0, -+ bch2_create_trans(trans, -+ BCACHEFS_ROOT_SUBVOL_INUM, -+ &root_inode, &lostfound_inode, -+ &lostfound, -+ 0, 0, S_IFDIR|0700, 0, -+ NULL, NULL, (subvol_inum) { 0 }, 0)); -+ if (ret) { -+ bch_err_msg(c, ret, "creating lost+found"); -+ goto err; -+ } -+ -+ if (enabled_qtypes(c)) { -+ ret = bch2_fs_quota_read(c); -+ if (ret) -+ goto err; -+ } -+ -+ ret = bch2_journal_flush(&c->journal); -+ if (ret) { -+ bch_err_msg(c, ret, "writing first journal entry"); -+ goto err; -+ } -+ -+ mutex_lock(&c->sb_lock); -+ SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); -+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false); -+ -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ -+ return 0; -+err: -+ bch_err_fn(ca, ret); -+ return ret; -+} -diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h -new file mode 100644 -index 000000000000..852d30567da9 ---- /dev/null -+++ b/fs/bcachefs/recovery.h -@@ -0,0 +1,33 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_RECOVERY_H -+#define _BCACHEFS_RECOVERY_H -+ -+extern const char * const bch2_recovery_passes[]; -+ -+/* -+ * For when we need to rewind recovery passes and run a pass we skipped: -+ */ -+static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c, -+ enum bch_recovery_pass pass) -+{ -+ bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", -+ bch2_recovery_passes[pass], pass, -+ bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); -+ -+ c->recovery_passes_explicit |= BIT_ULL(pass); -+ -+ if (c->curr_recovery_pass >= pass) { -+ c->curr_recovery_pass = pass; -+ c->recovery_passes_complete &= (1ULL << pass) >> 1; -+ return -BCH_ERR_restart_recovery; -+ } else { -+ return 0; -+ } -+} -+ -+u64 bch2_fsck_recovery_passes(void); -+ -+int bch2_fs_recovery(struct bch_fs *); -+int bch2_fs_initialize(struct bch_fs *); -+ -+#endif /* _BCACHEFS_RECOVERY_H */ -diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h -new file mode 100644 -index 000000000000..515e3d62c2ac ---- /dev/null -+++ b/fs/bcachefs/recovery_types.h -@@ -0,0 +1,53 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_RECOVERY_TYPES_H -+#define _BCACHEFS_RECOVERY_TYPES_H -+ -+#define PASS_SILENT BIT(0) -+#define PASS_FSCK BIT(1) -+#define PASS_UNCLEAN BIT(2) -+#define PASS_ALWAYS BIT(3) -+ -+#define BCH_RECOVERY_PASSES() \ -+ x(alloc_read, PASS_ALWAYS) \ -+ x(stripes_read, PASS_ALWAYS) \ -+ x(initialize_subvolumes, 0) \ -+ x(snapshots_read, PASS_ALWAYS) \ -+ x(check_topology, 0) \ -+ x(check_allocations, PASS_FSCK) \ -+ x(trans_mark_dev_sbs, PASS_ALWAYS|PASS_SILENT) \ -+ x(fs_journal_alloc, PASS_ALWAYS|PASS_SILENT) \ -+ x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \ -+ x(journal_replay, PASS_ALWAYS) \ -+ x(check_alloc_info, PASS_FSCK) \ -+ x(check_lrus, PASS_FSCK) \ -+ x(check_btree_backpointers, PASS_FSCK) \ -+ x(check_backpointers_to_extents,PASS_FSCK) \ -+ x(check_extents_to_backpointers,PASS_FSCK) \ -+ x(check_alloc_to_lru_refs, PASS_FSCK) \ -+ x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \ -+ x(bucket_gens_init, 0) \ -+ x(check_snapshot_trees, PASS_FSCK) \ -+ x(check_snapshots, PASS_FSCK) \ -+ x(check_subvols, PASS_FSCK) \ -+ x(delete_dead_snapshots, PASS_FSCK) \ -+ x(fs_upgrade_for_subvolumes, 0) \ -+ x(resume_logged_ops, PASS_ALWAYS) \ -+ x(check_inodes, PASS_FSCK) \ -+ x(check_extents, PASS_FSCK) \ -+ x(check_indirect_extents, PASS_FSCK) \ -+ x(check_dirents, PASS_FSCK) \ -+ x(check_xattrs, PASS_FSCK) \ -+ x(check_root, PASS_FSCK) \ -+ x(check_directory_structure, PASS_FSCK) \ -+ x(check_nlinks, PASS_FSCK) \ -+ x(delete_dead_inodes, PASS_FSCK|PASS_UNCLEAN) \ -+ x(fix_reflink_p, 0) \ -+ x(set_fs_needs_rebalance, 0) \ -+ -+enum bch_recovery_pass { -+#define x(n, when) BCH_RECOVERY_PASS_##n, -+ BCH_RECOVERY_PASSES() -+#undef x -+}; -+ -+#endif /* _BCACHEFS_RECOVERY_TYPES_H */ -diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c -new file mode 100644 -index 000000000000..6e1bfe9feb59 ---- /dev/null -+++ b/fs/bcachefs/reflink.c -@@ -0,0 +1,406 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include "bcachefs.h" -+#include "bkey_buf.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "extents.h" -+#include "inode.h" -+#include "io_misc.h" -+#include "io_write.h" -+#include "rebalance.h" -+#include "reflink.h" -+#include "subvolume.h" -+#include "super-io.h" -+ -+#include -+ -+static inline unsigned bkey_type_to_indirect(const struct bkey *k) -+{ -+ switch (k->type) { -+ case KEY_TYPE_extent: -+ return KEY_TYPE_reflink_v; -+ case KEY_TYPE_inline_data: -+ return KEY_TYPE_indirect_inline_data; -+ default: -+ return 0; -+ } -+} -+ -+/* reflink pointers */ -+ -+int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); -+ -+ if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix && -+ le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad)) { -+ prt_printf(err, "idx < front_pad (%llu < %u)", -+ le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); -+ -+ prt_printf(out, "idx %llu front_pad %u back_pad %u", -+ le64_to_cpu(p.v->idx), -+ le32_to_cpu(p.v->front_pad), -+ le32_to_cpu(p.v->back_pad)); -+} -+ -+bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r) -+{ -+ struct bkey_s_reflink_p l = bkey_s_to_reflink_p(_l); -+ struct bkey_s_c_reflink_p r = bkey_s_c_to_reflink_p(_r); -+ -+ /* -+ * Disabled for now, the triggers code needs to be reworked for merging -+ * of reflink pointers to work: -+ */ -+ return false; -+ -+ if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx)) -+ return false; -+ -+ bch2_key_resize(l.k, l.k->size + r.k->size); -+ return true; -+} -+ -+/* indirect extents */ -+ -+int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ return bch2_bkey_ptrs_invalid(c, k, flags, err); -+} -+ -+void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k); -+ -+ prt_printf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount)); -+ -+ bch2_bkey_ptrs_to_text(out, c, k); -+} -+ -+#if 0 -+Currently disabled, needs to be debugged: -+ -+bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r) -+{ -+ struct bkey_s_reflink_v l = bkey_s_to_reflink_v(_l); -+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(_r); -+ -+ return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r); -+} -+#endif -+ -+static inline void check_indirect_extent_deleting(struct bkey_i *new, unsigned *flags) -+{ -+ if ((*flags & BTREE_TRIGGER_INSERT) && !*bkey_refcount(new)) { -+ new->k.type = KEY_TYPE_deleted; -+ new->k.size = 0; -+ set_bkey_val_u64s(&new->k, 0);; -+ *flags &= ~BTREE_TRIGGER_INSERT; -+ } -+} -+ -+int bch2_trans_mark_reflink_v(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, struct bkey_i *new, -+ unsigned flags) -+{ -+ check_indirect_extent_deleting(new, &flags); -+ -+ return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags); -+} -+ -+/* indirect inline data */ -+ -+int bch2_indirect_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ return 0; -+} -+ -+void bch2_indirect_inline_data_to_text(struct printbuf *out, -+ struct bch_fs *c, struct bkey_s_c k) -+{ -+ struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k); -+ unsigned datalen = bkey_inline_data_bytes(k.k); -+ -+ prt_printf(out, "refcount %llu datalen %u: %*phN", -+ le64_to_cpu(d.v->refcount), datalen, -+ min(datalen, 32U), d.v->data); -+} -+ -+int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans, -+ enum btree_id btree_id, unsigned level, -+ struct bkey_s_c old, struct bkey_i *new, -+ unsigned flags) -+{ -+ check_indirect_extent_deleting(new, &flags); -+ -+ return 0; -+} -+ -+static int bch2_make_extent_indirect(struct btree_trans *trans, -+ struct btree_iter *extent_iter, -+ struct bkey_i *orig) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter reflink_iter = { NULL }; -+ struct bkey_s_c k; -+ struct bkey_i *r_v; -+ struct bkey_i_reflink_p *r_p; -+ __le64 *refcount; -+ int ret; -+ -+ if (orig->k.type == KEY_TYPE_inline_data) -+ bch2_check_set_feature(c, BCH_FEATURE_reflink_inline_data); -+ -+ bch2_trans_iter_init(trans, &reflink_iter, BTREE_ID_reflink, POS_MAX, -+ BTREE_ITER_INTENT); -+ k = bch2_btree_iter_peek_prev(&reflink_iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); -+ ret = PTR_ERR_OR_ZERO(r_v); -+ if (ret) -+ goto err; -+ -+ bkey_init(&r_v->k); -+ r_v->k.type = bkey_type_to_indirect(&orig->k); -+ r_v->k.p = reflink_iter.pos; -+ bch2_key_resize(&r_v->k, orig->k.size); -+ r_v->k.version = orig->k.version; -+ -+ set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k)); -+ -+ refcount = bkey_refcount(r_v); -+ *refcount = 0; -+ memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k)); -+ -+ ret = bch2_trans_update(trans, &reflink_iter, r_v, 0); -+ if (ret) -+ goto err; -+ -+ /* -+ * orig is in a bkey_buf which statically allocates 5 64s for the val, -+ * so we know it will be big enough: -+ */ -+ orig->k.type = KEY_TYPE_reflink_p; -+ r_p = bkey_i_to_reflink_p(orig); -+ set_bkey_val_bytes(&r_p->k, sizeof(r_p->v)); -+ -+ /* FORTIFY_SOURCE is broken here, and doesn't provide unsafe_memset() */ -+#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) -+ __underlying_memset(&r_p->v, 0, sizeof(r_p->v)); -+#else -+ memset(&r_p->v, 0, sizeof(r_p->v)); -+#endif -+ -+ r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k)); -+ -+ ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+err: -+ bch2_trans_iter_exit(trans, &reflink_iter); -+ -+ return ret; -+} -+ -+static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) -+{ -+ struct bkey_s_c k; -+ int ret; -+ -+ for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret) { -+ if (bkey_extent_is_unwritten(k)) -+ continue; -+ -+ if (bkey_extent_is_data(k.k)) -+ return k; -+ } -+ -+ if (bkey_ge(iter->pos, end)) -+ bch2_btree_iter_set_pos(iter, end); -+ return ret ? bkey_s_c_err(ret) : bkey_s_c_null; -+} -+ -+s64 bch2_remap_range(struct bch_fs *c, -+ subvol_inum dst_inum, u64 dst_offset, -+ subvol_inum src_inum, u64 src_offset, -+ u64 remap_sectors, -+ u64 new_i_size, s64 *i_sectors_delta) -+{ -+ struct btree_trans *trans; -+ struct btree_iter dst_iter, src_iter; -+ struct bkey_s_c src_k; -+ struct bkey_buf new_dst, new_src; -+ struct bpos dst_start = POS(dst_inum.inum, dst_offset); -+ struct bpos src_start = POS(src_inum.inum, src_offset); -+ struct bpos dst_end = dst_start, src_end = src_start; -+ struct bch_io_opts opts; -+ struct bpos src_want; -+ u64 dst_done = 0; -+ u32 dst_snapshot, src_snapshot; -+ int ret = 0, ret2 = 0; -+ -+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_reflink)) -+ return -BCH_ERR_erofs_no_writes; -+ -+ bch2_check_set_feature(c, BCH_FEATURE_reflink); -+ -+ dst_end.offset += remap_sectors; -+ src_end.offset += remap_sectors; -+ -+ bch2_bkey_buf_init(&new_dst); -+ bch2_bkey_buf_init(&new_src); -+ trans = bch2_trans_get(c); -+ -+ ret = bch2_inum_opts_get(trans, src_inum, &opts); -+ if (ret) -+ goto err; -+ -+ bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start, -+ BTREE_ITER_INTENT); -+ bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start, -+ BTREE_ITER_INTENT); -+ -+ while ((ret == 0 || -+ bch2_err_matches(ret, BCH_ERR_transaction_restart)) && -+ bkey_lt(dst_iter.pos, dst_end)) { -+ struct disk_reservation disk_res = { 0 }; -+ -+ bch2_trans_begin(trans); -+ -+ if (fatal_signal_pending(current)) { -+ ret = -EINTR; -+ break; -+ } -+ -+ ret = bch2_subvolume_get_snapshot(trans, src_inum.subvol, -+ &src_snapshot); -+ if (ret) -+ continue; -+ -+ bch2_btree_iter_set_snapshot(&src_iter, src_snapshot); -+ -+ ret = bch2_subvolume_get_snapshot(trans, dst_inum.subvol, -+ &dst_snapshot); -+ if (ret) -+ continue; -+ -+ bch2_btree_iter_set_snapshot(&dst_iter, dst_snapshot); -+ -+ dst_done = dst_iter.pos.offset - dst_start.offset; -+ src_want = POS(src_start.inode, src_start.offset + dst_done); -+ bch2_btree_iter_set_pos(&src_iter, src_want); -+ -+ src_k = get_next_src(&src_iter, src_end); -+ ret = bkey_err(src_k); -+ if (ret) -+ continue; -+ -+ if (bkey_lt(src_want, src_iter.pos)) { -+ ret = bch2_fpunch_at(trans, &dst_iter, dst_inum, -+ min(dst_end.offset, -+ dst_iter.pos.offset + -+ src_iter.pos.offset - src_want.offset), -+ i_sectors_delta); -+ continue; -+ } -+ -+ if (src_k.k->type != KEY_TYPE_reflink_p) { -+ bch2_btree_iter_set_pos_to_extent_start(&src_iter); -+ -+ bch2_bkey_buf_reassemble(&new_src, c, src_k); -+ src_k = bkey_i_to_s_c(new_src.k); -+ -+ ret = bch2_make_extent_indirect(trans, &src_iter, -+ new_src.k); -+ if (ret) -+ continue; -+ -+ BUG_ON(src_k.k->type != KEY_TYPE_reflink_p); -+ } -+ -+ if (src_k.k->type == KEY_TYPE_reflink_p) { -+ struct bkey_s_c_reflink_p src_p = -+ bkey_s_c_to_reflink_p(src_k); -+ struct bkey_i_reflink_p *dst_p = -+ bkey_reflink_p_init(new_dst.k); -+ -+ u64 offset = le64_to_cpu(src_p.v->idx) + -+ (src_want.offset - -+ bkey_start_offset(src_k.k)); -+ -+ dst_p->v.idx = cpu_to_le64(offset); -+ } else { -+ BUG(); -+ } -+ -+ new_dst.k->k.p = dst_iter.pos; -+ bch2_key_resize(&new_dst.k->k, -+ min(src_k.k->p.offset - src_want.offset, -+ dst_end.offset - dst_iter.pos.offset)); -+ -+ ret = bch2_bkey_set_needs_rebalance(c, new_dst.k, -+ opts.background_target, -+ opts.background_compression) ?: -+ bch2_extent_update(trans, dst_inum, &dst_iter, -+ new_dst.k, &disk_res, -+ new_i_size, i_sectors_delta, -+ true); -+ bch2_disk_reservation_put(c, &disk_res); -+ } -+ bch2_trans_iter_exit(trans, &dst_iter); -+ bch2_trans_iter_exit(trans, &src_iter); -+ -+ BUG_ON(!ret && !bkey_eq(dst_iter.pos, dst_end)); -+ BUG_ON(bkey_gt(dst_iter.pos, dst_end)); -+ -+ dst_done = dst_iter.pos.offset - dst_start.offset; -+ new_i_size = min(dst_iter.pos.offset << 9, new_i_size); -+ -+ do { -+ struct bch_inode_unpacked inode_u; -+ struct btree_iter inode_iter = { NULL }; -+ -+ bch2_trans_begin(trans); -+ -+ ret2 = bch2_inode_peek(trans, &inode_iter, &inode_u, -+ dst_inum, BTREE_ITER_INTENT); -+ -+ if (!ret2 && -+ inode_u.bi_size < new_i_size) { -+ inode_u.bi_size = new_i_size; -+ ret2 = bch2_inode_write(trans, &inode_iter, &inode_u) ?: -+ bch2_trans_commit(trans, NULL, NULL, -+ BTREE_INSERT_NOFAIL); -+ } -+ -+ bch2_trans_iter_exit(trans, &inode_iter); -+ } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart)); -+err: -+ bch2_trans_put(trans); -+ bch2_bkey_buf_exit(&new_src, c); -+ bch2_bkey_buf_exit(&new_dst, c); -+ -+ bch2_write_ref_put(c, BCH_WRITE_REF_reflink); -+ -+ return dst_done ?: ret ?: ret2; -+} -diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h -new file mode 100644 -index 000000000000..8ccf3f9c4939 ---- /dev/null -+++ b/fs/bcachefs/reflink.h -@@ -0,0 +1,81 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_REFLINK_H -+#define _BCACHEFS_REFLINK_H -+ -+enum bkey_invalid_flags; -+ -+int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, -+ struct bkey_s_c); -+bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); -+ -+#define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \ -+ .key_invalid = bch2_reflink_p_invalid, \ -+ .val_to_text = bch2_reflink_p_to_text, \ -+ .key_merge = bch2_reflink_p_merge, \ -+ .trans_trigger = bch2_trans_mark_reflink_p, \ -+ .atomic_trigger = bch2_mark_reflink_p, \ -+ .min_val_size = 16, \ -+}) -+ -+int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, -+ struct bkey_s_c); -+int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned, -+ struct bkey_s_c, struct bkey_i *, unsigned); -+ -+#define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \ -+ .key_invalid = bch2_reflink_v_invalid, \ -+ .val_to_text = bch2_reflink_v_to_text, \ -+ .swab = bch2_ptr_swab, \ -+ .trans_trigger = bch2_trans_mark_reflink_v, \ -+ .atomic_trigger = bch2_mark_extent, \ -+ .min_val_size = 8, \ -+}) -+ -+int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_indirect_inline_data_to_text(struct printbuf *, -+ struct bch_fs *, struct bkey_s_c); -+int bch2_trans_mark_indirect_inline_data(struct btree_trans *, -+ enum btree_id, unsigned, -+ struct bkey_s_c, struct bkey_i *, -+ unsigned); -+ -+#define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \ -+ .key_invalid = bch2_indirect_inline_data_invalid, \ -+ .val_to_text = bch2_indirect_inline_data_to_text, \ -+ .trans_trigger = bch2_trans_mark_indirect_inline_data, \ -+ .min_val_size = 8, \ -+}) -+ -+static inline const __le64 *bkey_refcount_c(struct bkey_s_c k) -+{ -+ switch (k.k->type) { -+ case KEY_TYPE_reflink_v: -+ return &bkey_s_c_to_reflink_v(k).v->refcount; -+ case KEY_TYPE_indirect_inline_data: -+ return &bkey_s_c_to_indirect_inline_data(k).v->refcount; -+ default: -+ return NULL; -+ } -+} -+ -+static inline __le64 *bkey_refcount(struct bkey_i *k) -+{ -+ switch (k->k.type) { -+ case KEY_TYPE_reflink_v: -+ return &bkey_i_to_reflink_v(k)->v.refcount; -+ case KEY_TYPE_indirect_inline_data: -+ return &bkey_i_to_indirect_inline_data(k)->v.refcount; -+ default: -+ return NULL; -+ } -+} -+ -+s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64, -+ subvol_inum, u64, u64, u64, s64 *); -+ -+#endif /* _BCACHEFS_REFLINK_H */ -diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c -new file mode 100644 -index 000000000000..1c3ae13bfced ---- /dev/null -+++ b/fs/bcachefs/replicas.c -@@ -0,0 +1,1050 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "buckets.h" -+#include "journal.h" -+#include "replicas.h" -+#include "super-io.h" -+ -+static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, -+ struct bch_replicas_cpu *); -+ -+/* Replicas tracking - in memory: */ -+ -+static void verify_replicas_entry(struct bch_replicas_entry *e) -+{ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ unsigned i; -+ -+ BUG_ON(e->data_type >= BCH_DATA_NR); -+ BUG_ON(!e->nr_devs); -+ BUG_ON(e->nr_required > 1 && -+ e->nr_required >= e->nr_devs); -+ -+ for (i = 0; i + 1 < e->nr_devs; i++) -+ BUG_ON(e->devs[i] >= e->devs[i + 1]); -+#endif -+} -+ -+void bch2_replicas_entry_sort(struct bch_replicas_entry *e) -+{ -+ bubble_sort(e->devs, e->nr_devs, u8_cmp); -+} -+ -+static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) -+{ -+ eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL); -+} -+ -+static void bch2_replicas_entry_v0_to_text(struct printbuf *out, -+ struct bch_replicas_entry_v0 *e) -+{ -+ unsigned i; -+ -+ if (e->data_type < BCH_DATA_NR) -+ prt_printf(out, "%s", bch2_data_types[e->data_type]); -+ else -+ prt_printf(out, "(invalid data type %u)", e->data_type); -+ -+ prt_printf(out, ": %u [", e->nr_devs); -+ for (i = 0; i < e->nr_devs; i++) -+ prt_printf(out, i ? " %u" : "%u", e->devs[i]); -+ prt_printf(out, "]"); -+} -+ -+void bch2_replicas_entry_to_text(struct printbuf *out, -+ struct bch_replicas_entry *e) -+{ -+ unsigned i; -+ -+ if (e->data_type < BCH_DATA_NR) -+ prt_printf(out, "%s", bch2_data_types[e->data_type]); -+ else -+ prt_printf(out, "(invalid data type %u)", e->data_type); -+ -+ prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs); -+ for (i = 0; i < e->nr_devs; i++) -+ prt_printf(out, i ? " %u" : "%u", e->devs[i]); -+ prt_printf(out, "]"); -+} -+ -+void bch2_cpu_replicas_to_text(struct printbuf *out, -+ struct bch_replicas_cpu *r) -+{ -+ struct bch_replicas_entry *e; -+ bool first = true; -+ -+ for_each_cpu_replicas_entry(r, e) { -+ if (!first) -+ prt_printf(out, " "); -+ first = false; -+ -+ bch2_replicas_entry_to_text(out, e); -+ } -+} -+ -+static void extent_to_replicas(struct bkey_s_c k, -+ struct bch_replicas_entry *r) -+{ -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ -+ r->nr_required = 1; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ if (p.ptr.cached) -+ continue; -+ -+ if (!p.has_ec) -+ r->devs[r->nr_devs++] = p.ptr.dev; -+ else -+ r->nr_required = 0; -+ } -+} -+ -+static void stripe_to_replicas(struct bkey_s_c k, -+ struct bch_replicas_entry *r) -+{ -+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); -+ const struct bch_extent_ptr *ptr; -+ -+ r->nr_required = s.v->nr_blocks - s.v->nr_redundant; -+ -+ for (ptr = s.v->ptrs; -+ ptr < s.v->ptrs + s.v->nr_blocks; -+ ptr++) -+ r->devs[r->nr_devs++] = ptr->dev; -+} -+ -+void bch2_bkey_to_replicas(struct bch_replicas_entry *e, -+ struct bkey_s_c k) -+{ -+ e->nr_devs = 0; -+ -+ switch (k.k->type) { -+ case KEY_TYPE_btree_ptr: -+ case KEY_TYPE_btree_ptr_v2: -+ e->data_type = BCH_DATA_btree; -+ extent_to_replicas(k, e); -+ break; -+ case KEY_TYPE_extent: -+ case KEY_TYPE_reflink_v: -+ e->data_type = BCH_DATA_user; -+ extent_to_replicas(k, e); -+ break; -+ case KEY_TYPE_stripe: -+ e->data_type = BCH_DATA_parity; -+ stripe_to_replicas(k, e); -+ break; -+ } -+ -+ bch2_replicas_entry_sort(e); -+} -+ -+void bch2_devlist_to_replicas(struct bch_replicas_entry *e, -+ enum bch_data_type data_type, -+ struct bch_devs_list devs) -+{ -+ unsigned i; -+ -+ BUG_ON(!data_type || -+ data_type == BCH_DATA_sb || -+ data_type >= BCH_DATA_NR); -+ -+ e->data_type = data_type; -+ e->nr_devs = 0; -+ e->nr_required = 1; -+ -+ for (i = 0; i < devs.nr; i++) -+ e->devs[e->nr_devs++] = devs.devs[i]; -+ -+ bch2_replicas_entry_sort(e); -+} -+ -+static struct bch_replicas_cpu -+cpu_replicas_add_entry(struct bch_replicas_cpu *old, -+ struct bch_replicas_entry *new_entry) -+{ -+ unsigned i; -+ struct bch_replicas_cpu new = { -+ .nr = old->nr + 1, -+ .entry_size = max_t(unsigned, old->entry_size, -+ replicas_entry_bytes(new_entry)), -+ }; -+ -+ BUG_ON(!new_entry->data_type); -+ verify_replicas_entry(new_entry); -+ -+ new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL); -+ if (!new.entries) -+ return new; -+ -+ for (i = 0; i < old->nr; i++) -+ memcpy(cpu_replicas_entry(&new, i), -+ cpu_replicas_entry(old, i), -+ old->entry_size); -+ -+ memcpy(cpu_replicas_entry(&new, old->nr), -+ new_entry, -+ replicas_entry_bytes(new_entry)); -+ -+ bch2_cpu_replicas_sort(&new); -+ return new; -+} -+ -+static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, -+ struct bch_replicas_entry *search) -+{ -+ int idx, entry_size = replicas_entry_bytes(search); -+ -+ if (unlikely(entry_size > r->entry_size)) -+ return -1; -+ -+ verify_replicas_entry(search); -+ -+#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size) -+ idx = eytzinger0_find(r->entries, r->nr, r->entry_size, -+ entry_cmp, search); -+#undef entry_cmp -+ -+ return idx < r->nr ? idx : -1; -+} -+ -+int bch2_replicas_entry_idx(struct bch_fs *c, -+ struct bch_replicas_entry *search) -+{ -+ bch2_replicas_entry_sort(search); -+ -+ return __replicas_entry_idx(&c->replicas, search); -+} -+ -+static bool __replicas_has_entry(struct bch_replicas_cpu *r, -+ struct bch_replicas_entry *search) -+{ -+ return __replicas_entry_idx(r, search) >= 0; -+} -+ -+bool bch2_replicas_marked(struct bch_fs *c, -+ struct bch_replicas_entry *search) -+{ -+ bool marked; -+ -+ if (!search->nr_devs) -+ return true; -+ -+ verify_replicas_entry(search); -+ -+ percpu_down_read(&c->mark_lock); -+ marked = __replicas_has_entry(&c->replicas, search) && -+ (likely((!c->replicas_gc.entries)) || -+ __replicas_has_entry(&c->replicas_gc, search)); -+ percpu_up_read(&c->mark_lock); -+ -+ return marked; -+} -+ -+static void __replicas_table_update(struct bch_fs_usage *dst, -+ struct bch_replicas_cpu *dst_r, -+ struct bch_fs_usage *src, -+ struct bch_replicas_cpu *src_r) -+{ -+ int src_idx, dst_idx; -+ -+ *dst = *src; -+ -+ for (src_idx = 0; src_idx < src_r->nr; src_idx++) { -+ if (!src->replicas[src_idx]) -+ continue; -+ -+ dst_idx = __replicas_entry_idx(dst_r, -+ cpu_replicas_entry(src_r, src_idx)); -+ BUG_ON(dst_idx < 0); -+ -+ dst->replicas[dst_idx] = src->replicas[src_idx]; -+ } -+} -+ -+static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p, -+ struct bch_replicas_cpu *dst_r, -+ struct bch_fs_usage __percpu *src_p, -+ struct bch_replicas_cpu *src_r) -+{ -+ unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr; -+ struct bch_fs_usage *dst, *src = (void *) -+ bch2_acc_percpu_u64s((u64 __percpu *) src_p, src_nr); -+ -+ preempt_disable(); -+ dst = this_cpu_ptr(dst_p); -+ preempt_enable(); -+ -+ __replicas_table_update(dst, dst_r, src, src_r); -+} -+ -+/* -+ * Resize filesystem accounting: -+ */ -+static int replicas_table_update(struct bch_fs *c, -+ struct bch_replicas_cpu *new_r) -+{ -+ struct bch_fs_usage __percpu *new_usage[JOURNAL_BUF_NR]; -+ struct bch_fs_usage_online *new_scratch = NULL; -+ struct bch_fs_usage __percpu *new_gc = NULL; -+ struct bch_fs_usage *new_base = NULL; -+ unsigned i, bytes = sizeof(struct bch_fs_usage) + -+ sizeof(u64) * new_r->nr; -+ unsigned scratch_bytes = sizeof(struct bch_fs_usage_online) + -+ sizeof(u64) * new_r->nr; -+ int ret = 0; -+ -+ memset(new_usage, 0, sizeof(new_usage)); -+ -+ for (i = 0; i < ARRAY_SIZE(new_usage); i++) -+ if (!(new_usage[i] = __alloc_percpu_gfp(bytes, -+ sizeof(u64), GFP_KERNEL))) -+ goto err; -+ -+ if (!(new_base = kzalloc(bytes, GFP_KERNEL)) || -+ !(new_scratch = kmalloc(scratch_bytes, GFP_KERNEL)) || -+ (c->usage_gc && -+ !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_KERNEL)))) -+ goto err; -+ -+ for (i = 0; i < ARRAY_SIZE(new_usage); i++) -+ if (c->usage[i]) -+ __replicas_table_update_pcpu(new_usage[i], new_r, -+ c->usage[i], &c->replicas); -+ if (c->usage_base) -+ __replicas_table_update(new_base, new_r, -+ c->usage_base, &c->replicas); -+ if (c->usage_gc) -+ __replicas_table_update_pcpu(new_gc, new_r, -+ c->usage_gc, &c->replicas); -+ -+ for (i = 0; i < ARRAY_SIZE(new_usage); i++) -+ swap(c->usage[i], new_usage[i]); -+ swap(c->usage_base, new_base); -+ swap(c->usage_scratch, new_scratch); -+ swap(c->usage_gc, new_gc); -+ swap(c->replicas, *new_r); -+out: -+ free_percpu(new_gc); -+ kfree(new_scratch); -+ for (i = 0; i < ARRAY_SIZE(new_usage); i++) -+ free_percpu(new_usage[i]); -+ kfree(new_base); -+ return ret; -+err: -+ bch_err(c, "error updating replicas table: memory allocation failure"); -+ ret = -BCH_ERR_ENOMEM_replicas_table; -+ goto out; -+} -+ -+static unsigned reserve_journal_replicas(struct bch_fs *c, -+ struct bch_replicas_cpu *r) -+{ -+ struct bch_replicas_entry *e; -+ unsigned journal_res_u64s = 0; -+ -+ /* nr_inodes: */ -+ journal_res_u64s += -+ DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)); -+ -+ /* key_version: */ -+ journal_res_u64s += -+ DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)); -+ -+ /* persistent_reserved: */ -+ journal_res_u64s += -+ DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) * -+ BCH_REPLICAS_MAX; -+ -+ for_each_cpu_replicas_entry(r, e) -+ journal_res_u64s += -+ DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) + -+ e->nr_devs, sizeof(u64)); -+ return journal_res_u64s; -+} -+ -+noinline -+static int bch2_mark_replicas_slowpath(struct bch_fs *c, -+ struct bch_replicas_entry *new_entry) -+{ -+ struct bch_replicas_cpu new_r, new_gc; -+ int ret = 0; -+ -+ verify_replicas_entry(new_entry); -+ -+ memset(&new_r, 0, sizeof(new_r)); -+ memset(&new_gc, 0, sizeof(new_gc)); -+ -+ mutex_lock(&c->sb_lock); -+ -+ if (c->replicas_gc.entries && -+ !__replicas_has_entry(&c->replicas_gc, new_entry)) { -+ new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry); -+ if (!new_gc.entries) { -+ ret = -BCH_ERR_ENOMEM_cpu_replicas; -+ goto err; -+ } -+ } -+ -+ if (!__replicas_has_entry(&c->replicas, new_entry)) { -+ new_r = cpu_replicas_add_entry(&c->replicas, new_entry); -+ if (!new_r.entries) { -+ ret = -BCH_ERR_ENOMEM_cpu_replicas; -+ goto err; -+ } -+ -+ ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); -+ if (ret) -+ goto err; -+ -+ bch2_journal_entry_res_resize(&c->journal, -+ &c->replicas_journal_res, -+ reserve_journal_replicas(c, &new_r)); -+ } -+ -+ if (!new_r.entries && -+ !new_gc.entries) -+ goto out; -+ -+ /* allocations done, now commit: */ -+ -+ if (new_r.entries) -+ bch2_write_super(c); -+ -+ /* don't update in memory replicas until changes are persistent */ -+ percpu_down_write(&c->mark_lock); -+ if (new_r.entries) -+ ret = replicas_table_update(c, &new_r); -+ if (new_gc.entries) -+ swap(new_gc, c->replicas_gc); -+ percpu_up_write(&c->mark_lock); -+out: -+ mutex_unlock(&c->sb_lock); -+ -+ kfree(new_r.entries); -+ kfree(new_gc.entries); -+ -+ return ret; -+err: -+ bch_err_msg(c, ret, "adding replicas entry"); -+ goto out; -+} -+ -+int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry *r) -+{ -+ return likely(bch2_replicas_marked(c, r)) -+ ? 0 : bch2_mark_replicas_slowpath(c, r); -+} -+ -+/* replicas delta list: */ -+ -+int bch2_replicas_delta_list_mark(struct bch_fs *c, -+ struct replicas_delta_list *r) -+{ -+ struct replicas_delta *d = r->d; -+ struct replicas_delta *top = (void *) r->d + r->used; -+ int ret = 0; -+ -+ for (d = r->d; !ret && d != top; d = replicas_delta_next(d)) -+ ret = bch2_mark_replicas(c, &d->r); -+ return ret; -+} -+ -+/* -+ * Old replicas_gc mechanism: only used for journal replicas entries now, should -+ * die at some point: -+ */ -+ -+int bch2_replicas_gc_end(struct bch_fs *c, int ret) -+{ -+ lockdep_assert_held(&c->replicas_gc_lock); -+ -+ mutex_lock(&c->sb_lock); -+ percpu_down_write(&c->mark_lock); -+ -+ ret = ret ?: -+ bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc) ?: -+ replicas_table_update(c, &c->replicas_gc); -+ -+ kfree(c->replicas_gc.entries); -+ c->replicas_gc.entries = NULL; -+ -+ percpu_up_write(&c->mark_lock); -+ -+ if (!ret) -+ bch2_write_super(c); -+ -+ mutex_unlock(&c->sb_lock); -+ -+ return ret; -+} -+ -+int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) -+{ -+ struct bch_replicas_entry *e; -+ unsigned i = 0; -+ -+ lockdep_assert_held(&c->replicas_gc_lock); -+ -+ mutex_lock(&c->sb_lock); -+ BUG_ON(c->replicas_gc.entries); -+ -+ c->replicas_gc.nr = 0; -+ c->replicas_gc.entry_size = 0; -+ -+ for_each_cpu_replicas_entry(&c->replicas, e) -+ if (!((1 << e->data_type) & typemask)) { -+ c->replicas_gc.nr++; -+ c->replicas_gc.entry_size = -+ max_t(unsigned, c->replicas_gc.entry_size, -+ replicas_entry_bytes(e)); -+ } -+ -+ c->replicas_gc.entries = kcalloc(c->replicas_gc.nr, -+ c->replicas_gc.entry_size, -+ GFP_KERNEL); -+ if (!c->replicas_gc.entries) { -+ mutex_unlock(&c->sb_lock); -+ bch_err(c, "error allocating c->replicas_gc"); -+ return -BCH_ERR_ENOMEM_replicas_gc; -+ } -+ -+ for_each_cpu_replicas_entry(&c->replicas, e) -+ if (!((1 << e->data_type) & typemask)) -+ memcpy(cpu_replicas_entry(&c->replicas_gc, i++), -+ e, c->replicas_gc.entry_size); -+ -+ bch2_cpu_replicas_sort(&c->replicas_gc); -+ mutex_unlock(&c->sb_lock); -+ -+ return 0; -+} -+ -+/* -+ * New much simpler mechanism for clearing out unneeded replicas entries - drop -+ * replicas entries that have 0 sectors used. -+ * -+ * However, we don't track sector counts for journal usage, so this doesn't drop -+ * any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism -+ * is retained for that. -+ */ -+int bch2_replicas_gc2(struct bch_fs *c) -+{ -+ struct bch_replicas_cpu new = { 0 }; -+ unsigned i, nr; -+ int ret = 0; -+ -+ bch2_journal_meta(&c->journal); -+retry: -+ nr = READ_ONCE(c->replicas.nr); -+ new.entry_size = READ_ONCE(c->replicas.entry_size); -+ new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); -+ if (!new.entries) { -+ bch_err(c, "error allocating c->replicas_gc"); -+ return -BCH_ERR_ENOMEM_replicas_gc; -+ } -+ -+ mutex_lock(&c->sb_lock); -+ percpu_down_write(&c->mark_lock); -+ -+ if (nr != c->replicas.nr || -+ new.entry_size != c->replicas.entry_size) { -+ percpu_up_write(&c->mark_lock); -+ mutex_unlock(&c->sb_lock); -+ kfree(new.entries); -+ goto retry; -+ } -+ -+ for (i = 0; i < c->replicas.nr; i++) { -+ struct bch_replicas_entry *e = -+ cpu_replicas_entry(&c->replicas, i); -+ -+ if (e->data_type == BCH_DATA_journal || -+ c->usage_base->replicas[i] || -+ percpu_u64_get(&c->usage[0]->replicas[i]) || -+ percpu_u64_get(&c->usage[1]->replicas[i]) || -+ percpu_u64_get(&c->usage[2]->replicas[i]) || -+ percpu_u64_get(&c->usage[3]->replicas[i])) -+ memcpy(cpu_replicas_entry(&new, new.nr++), -+ e, new.entry_size); -+ } -+ -+ bch2_cpu_replicas_sort(&new); -+ -+ ret = bch2_cpu_replicas_to_sb_replicas(c, &new) ?: -+ replicas_table_update(c, &new); -+ -+ kfree(new.entries); -+ -+ percpu_up_write(&c->mark_lock); -+ -+ if (!ret) -+ bch2_write_super(c); -+ -+ mutex_unlock(&c->sb_lock); -+ -+ return ret; -+} -+ -+int bch2_replicas_set_usage(struct bch_fs *c, -+ struct bch_replicas_entry *r, -+ u64 sectors) -+{ -+ int ret, idx = bch2_replicas_entry_idx(c, r); -+ -+ if (idx < 0) { -+ struct bch_replicas_cpu n; -+ -+ n = cpu_replicas_add_entry(&c->replicas, r); -+ if (!n.entries) -+ return -BCH_ERR_ENOMEM_cpu_replicas; -+ -+ ret = replicas_table_update(c, &n); -+ if (ret) -+ return ret; -+ -+ kfree(n.entries); -+ -+ idx = bch2_replicas_entry_idx(c, r); -+ BUG_ON(ret < 0); -+ } -+ -+ c->usage_base->replicas[idx] = sectors; -+ -+ return 0; -+} -+ -+/* Replicas tracking - superblock: */ -+ -+static int -+__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r, -+ struct bch_replicas_cpu *cpu_r) -+{ -+ struct bch_replicas_entry *e, *dst; -+ unsigned nr = 0, entry_size = 0, idx = 0; -+ -+ for_each_replicas_entry(sb_r, e) { -+ entry_size = max_t(unsigned, entry_size, -+ replicas_entry_bytes(e)); -+ nr++; -+ } -+ -+ cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); -+ if (!cpu_r->entries) -+ return -BCH_ERR_ENOMEM_cpu_replicas; -+ -+ cpu_r->nr = nr; -+ cpu_r->entry_size = entry_size; -+ -+ for_each_replicas_entry(sb_r, e) { -+ dst = cpu_replicas_entry(cpu_r, idx++); -+ memcpy(dst, e, replicas_entry_bytes(e)); -+ bch2_replicas_entry_sort(dst); -+ } -+ -+ return 0; -+} -+ -+static int -+__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r, -+ struct bch_replicas_cpu *cpu_r) -+{ -+ struct bch_replicas_entry_v0 *e; -+ unsigned nr = 0, entry_size = 0, idx = 0; -+ -+ for_each_replicas_entry(sb_r, e) { -+ entry_size = max_t(unsigned, entry_size, -+ replicas_entry_bytes(e)); -+ nr++; -+ } -+ -+ entry_size += sizeof(struct bch_replicas_entry) - -+ sizeof(struct bch_replicas_entry_v0); -+ -+ cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); -+ if (!cpu_r->entries) -+ return -BCH_ERR_ENOMEM_cpu_replicas; -+ -+ cpu_r->nr = nr; -+ cpu_r->entry_size = entry_size; -+ -+ for_each_replicas_entry(sb_r, e) { -+ struct bch_replicas_entry *dst = -+ cpu_replicas_entry(cpu_r, idx++); -+ -+ dst->data_type = e->data_type; -+ dst->nr_devs = e->nr_devs; -+ dst->nr_required = 1; -+ memcpy(dst->devs, e->devs, e->nr_devs); -+ bch2_replicas_entry_sort(dst); -+ } -+ -+ return 0; -+} -+ -+int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) -+{ -+ struct bch_sb_field_replicas *sb_v1; -+ struct bch_sb_field_replicas_v0 *sb_v0; -+ struct bch_replicas_cpu new_r = { 0, 0, NULL }; -+ int ret = 0; -+ -+ if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas))) -+ ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r); -+ else if ((sb_v0 = bch2_sb_field_get(c->disk_sb.sb, replicas_v0))) -+ ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r); -+ if (ret) -+ return ret; -+ -+ bch2_cpu_replicas_sort(&new_r); -+ -+ percpu_down_write(&c->mark_lock); -+ -+ ret = replicas_table_update(c, &new_r); -+ percpu_up_write(&c->mark_lock); -+ -+ kfree(new_r.entries); -+ -+ return 0; -+} -+ -+static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, -+ struct bch_replicas_cpu *r) -+{ -+ struct bch_sb_field_replicas_v0 *sb_r; -+ struct bch_replicas_entry_v0 *dst; -+ struct bch_replicas_entry *src; -+ size_t bytes; -+ -+ bytes = sizeof(struct bch_sb_field_replicas); -+ -+ for_each_cpu_replicas_entry(r, src) -+ bytes += replicas_entry_bytes(src) - 1; -+ -+ sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0, -+ DIV_ROUND_UP(bytes, sizeof(u64))); -+ if (!sb_r) -+ return -BCH_ERR_ENOSPC_sb_replicas; -+ -+ bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas); -+ sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0); -+ -+ memset(&sb_r->entries, 0, -+ vstruct_end(&sb_r->field) - -+ (void *) &sb_r->entries); -+ -+ dst = sb_r->entries; -+ for_each_cpu_replicas_entry(r, src) { -+ dst->data_type = src->data_type; -+ dst->nr_devs = src->nr_devs; -+ memcpy(dst->devs, src->devs, src->nr_devs); -+ -+ dst = replicas_entry_next(dst); -+ -+ BUG_ON((void *) dst > vstruct_end(&sb_r->field)); -+ } -+ -+ return 0; -+} -+ -+static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, -+ struct bch_replicas_cpu *r) -+{ -+ struct bch_sb_field_replicas *sb_r; -+ struct bch_replicas_entry *dst, *src; -+ bool need_v1 = false; -+ size_t bytes; -+ -+ bytes = sizeof(struct bch_sb_field_replicas); -+ -+ for_each_cpu_replicas_entry(r, src) { -+ bytes += replicas_entry_bytes(src); -+ if (src->nr_required != 1) -+ need_v1 = true; -+ } -+ -+ if (!need_v1) -+ return bch2_cpu_replicas_to_sb_replicas_v0(c, r); -+ -+ sb_r = bch2_sb_field_resize(&c->disk_sb, replicas, -+ DIV_ROUND_UP(bytes, sizeof(u64))); -+ if (!sb_r) -+ return -BCH_ERR_ENOSPC_sb_replicas; -+ -+ bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0); -+ sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas); -+ -+ memset(&sb_r->entries, 0, -+ vstruct_end(&sb_r->field) - -+ (void *) &sb_r->entries); -+ -+ dst = sb_r->entries; -+ for_each_cpu_replicas_entry(r, src) { -+ memcpy(dst, src, replicas_entry_bytes(src)); -+ -+ dst = replicas_entry_next(dst); -+ -+ BUG_ON((void *) dst > vstruct_end(&sb_r->field)); -+ } -+ -+ return 0; -+} -+ -+static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, -+ struct bch_sb *sb, -+ struct printbuf *err) -+{ -+ unsigned i, j; -+ -+ sort_cmp_size(cpu_r->entries, -+ cpu_r->nr, -+ cpu_r->entry_size, -+ memcmp, NULL); -+ -+ for (i = 0; i < cpu_r->nr; i++) { -+ struct bch_replicas_entry *e = -+ cpu_replicas_entry(cpu_r, i); -+ -+ if (e->data_type >= BCH_DATA_NR) { -+ prt_printf(err, "invalid data type in entry "); -+ bch2_replicas_entry_to_text(err, e); -+ return -BCH_ERR_invalid_sb_replicas; -+ } -+ -+ if (!e->nr_devs) { -+ prt_printf(err, "no devices in entry "); -+ bch2_replicas_entry_to_text(err, e); -+ return -BCH_ERR_invalid_sb_replicas; -+ } -+ -+ if (e->nr_required > 1 && -+ e->nr_required >= e->nr_devs) { -+ prt_printf(err, "bad nr_required in entry "); -+ bch2_replicas_entry_to_text(err, e); -+ return -BCH_ERR_invalid_sb_replicas; -+ } -+ -+ for (j = 0; j < e->nr_devs; j++) -+ if (!bch2_dev_exists(sb, e->devs[j])) { -+ prt_printf(err, "invalid device %u in entry ", e->devs[j]); -+ bch2_replicas_entry_to_text(err, e); -+ return -BCH_ERR_invalid_sb_replicas; -+ } -+ -+ if (i + 1 < cpu_r->nr) { -+ struct bch_replicas_entry *n = -+ cpu_replicas_entry(cpu_r, i + 1); -+ -+ BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0); -+ -+ if (!memcmp(e, n, cpu_r->entry_size)) { -+ prt_printf(err, "duplicate replicas entry "); -+ bch2_replicas_entry_to_text(err, e); -+ return -BCH_ERR_invalid_sb_replicas; -+ } -+ } -+ } -+ -+ return 0; -+} -+ -+static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f, -+ struct printbuf *err) -+{ -+ struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas); -+ struct bch_replicas_cpu cpu_r; -+ int ret; -+ -+ ret = __bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r); -+ if (ret) -+ return ret; -+ -+ ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); -+ kfree(cpu_r.entries); -+ return ret; -+} -+ -+static void bch2_sb_replicas_to_text(struct printbuf *out, -+ struct bch_sb *sb, -+ struct bch_sb_field *f) -+{ -+ struct bch_sb_field_replicas *r = field_to_type(f, replicas); -+ struct bch_replicas_entry *e; -+ bool first = true; -+ -+ for_each_replicas_entry(r, e) { -+ if (!first) -+ prt_printf(out, " "); -+ first = false; -+ -+ bch2_replicas_entry_to_text(out, e); -+ } -+ prt_newline(out); -+} -+ -+const struct bch_sb_field_ops bch_sb_field_ops_replicas = { -+ .validate = bch2_sb_replicas_validate, -+ .to_text = bch2_sb_replicas_to_text, -+}; -+ -+static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *f, -+ struct printbuf *err) -+{ -+ struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); -+ struct bch_replicas_cpu cpu_r; -+ int ret; -+ -+ ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r); -+ if (ret) -+ return ret; -+ -+ ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); -+ kfree(cpu_r.entries); -+ return ret; -+} -+ -+static void bch2_sb_replicas_v0_to_text(struct printbuf *out, -+ struct bch_sb *sb, -+ struct bch_sb_field *f) -+{ -+ struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); -+ struct bch_replicas_entry_v0 *e; -+ bool first = true; -+ -+ for_each_replicas_entry(sb_r, e) { -+ if (!first) -+ prt_printf(out, " "); -+ first = false; -+ -+ bch2_replicas_entry_v0_to_text(out, e); -+ } -+ prt_newline(out); -+} -+ -+const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { -+ .validate = bch2_sb_replicas_v0_validate, -+ .to_text = bch2_sb_replicas_v0_to_text, -+}; -+ -+/* Query replicas: */ -+ -+bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, -+ unsigned flags, bool print) -+{ -+ struct bch_replicas_entry *e; -+ bool ret = true; -+ -+ percpu_down_read(&c->mark_lock); -+ for_each_cpu_replicas_entry(&c->replicas, e) { -+ unsigned i, nr_online = 0, nr_failed = 0, dflags = 0; -+ bool metadata = e->data_type < BCH_DATA_user; -+ -+ if (e->data_type == BCH_DATA_cached) -+ continue; -+ -+ for (i = 0; i < e->nr_devs; i++) { -+ struct bch_dev *ca = bch_dev_bkey_exists(c, e->devs[i]); -+ -+ nr_online += test_bit(e->devs[i], devs.d); -+ nr_failed += ca->mi.state == BCH_MEMBER_STATE_failed; -+ } -+ -+ if (nr_failed == e->nr_devs) -+ continue; -+ -+ if (nr_online < e->nr_required) -+ dflags |= metadata -+ ? BCH_FORCE_IF_METADATA_LOST -+ : BCH_FORCE_IF_DATA_LOST; -+ -+ if (nr_online < e->nr_devs) -+ dflags |= metadata -+ ? BCH_FORCE_IF_METADATA_DEGRADED -+ : BCH_FORCE_IF_DATA_DEGRADED; -+ -+ if (dflags & ~flags) { -+ if (print) { -+ struct printbuf buf = PRINTBUF; -+ -+ bch2_replicas_entry_to_text(&buf, e); -+ bch_err(c, "insufficient devices online (%u) for replicas entry %s", -+ nr_online, buf.buf); -+ printbuf_exit(&buf); -+ } -+ ret = false; -+ break; -+ } -+ -+ } -+ percpu_up_read(&c->mark_lock); -+ -+ return ret; -+} -+ -+unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) -+{ -+ struct bch_sb_field_replicas *replicas; -+ struct bch_sb_field_replicas_v0 *replicas_v0; -+ unsigned i, data_has = 0; -+ -+ replicas = bch2_sb_field_get(sb, replicas); -+ replicas_v0 = bch2_sb_field_get(sb, replicas_v0); -+ -+ if (replicas) { -+ struct bch_replicas_entry *r; -+ -+ for_each_replicas_entry(replicas, r) -+ for (i = 0; i < r->nr_devs; i++) -+ if (r->devs[i] == dev) -+ data_has |= 1 << r->data_type; -+ } else if (replicas_v0) { -+ struct bch_replicas_entry_v0 *r; -+ -+ for_each_replicas_entry_v0(replicas_v0, r) -+ for (i = 0; i < r->nr_devs; i++) -+ if (r->devs[i] == dev) -+ data_has |= 1 << r->data_type; -+ } -+ -+ -+ return data_has; -+} -+ -+unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) -+{ -+ unsigned ret; -+ -+ mutex_lock(&c->sb_lock); -+ ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); -+ mutex_unlock(&c->sb_lock); -+ -+ return ret; -+} -+ -+void bch2_fs_replicas_exit(struct bch_fs *c) -+{ -+ unsigned i; -+ -+ kfree(c->usage_scratch); -+ for (i = 0; i < ARRAY_SIZE(c->usage); i++) -+ free_percpu(c->usage[i]); -+ kfree(c->usage_base); -+ kfree(c->replicas.entries); -+ kfree(c->replicas_gc.entries); -+ -+ mempool_exit(&c->replicas_delta_pool); -+} -+ -+int bch2_fs_replicas_init(struct bch_fs *c) -+{ -+ bch2_journal_entry_res_resize(&c->journal, -+ &c->replicas_journal_res, -+ reserve_journal_replicas(c, &c->replicas)); -+ -+ return mempool_init_kmalloc_pool(&c->replicas_delta_pool, 1, -+ REPLICAS_DELTA_LIST_MAX) ?: -+ replicas_table_update(c, &c->replicas); -+} -diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h -new file mode 100644 -index 000000000000..4887675a86f0 ---- /dev/null -+++ b/fs/bcachefs/replicas.h -@@ -0,0 +1,91 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_REPLICAS_H -+#define _BCACHEFS_REPLICAS_H -+ -+#include "bkey.h" -+#include "eytzinger.h" -+#include "replicas_types.h" -+ -+void bch2_replicas_entry_sort(struct bch_replicas_entry *); -+void bch2_replicas_entry_to_text(struct printbuf *, -+ struct bch_replicas_entry *); -+void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *); -+ -+static inline struct bch_replicas_entry * -+cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i) -+{ -+ return (void *) r->entries + r->entry_size * i; -+} -+ -+int bch2_replicas_entry_idx(struct bch_fs *, -+ struct bch_replicas_entry *); -+ -+void bch2_devlist_to_replicas(struct bch_replicas_entry *, -+ enum bch_data_type, -+ struct bch_devs_list); -+bool bch2_replicas_marked(struct bch_fs *, struct bch_replicas_entry *); -+int bch2_mark_replicas(struct bch_fs *, -+ struct bch_replicas_entry *); -+ -+static inline struct replicas_delta * -+replicas_delta_next(struct replicas_delta *d) -+{ -+ return (void *) d + replicas_entry_bytes(&d->r) + 8; -+} -+ -+int bch2_replicas_delta_list_mark(struct bch_fs *, struct replicas_delta_list *); -+ -+void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c); -+ -+static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e, -+ unsigned dev) -+{ -+ e->data_type = BCH_DATA_cached; -+ e->nr_devs = 1; -+ e->nr_required = 1; -+ e->devs[0] = dev; -+} -+ -+bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask, -+ unsigned, bool); -+ -+unsigned bch2_sb_dev_has_data(struct bch_sb *, unsigned); -+unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *); -+ -+int bch2_replicas_gc_end(struct bch_fs *, int); -+int bch2_replicas_gc_start(struct bch_fs *, unsigned); -+int bch2_replicas_gc2(struct bch_fs *); -+ -+int bch2_replicas_set_usage(struct bch_fs *, -+ struct bch_replicas_entry *, -+ u64); -+ -+#define for_each_cpu_replicas_entry(_r, _i) \ -+ for (_i = (_r)->entries; \ -+ (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\ -+ _i = (void *) (_i) + (_r)->entry_size) -+ -+/* iterate over superblock replicas - used by userspace tools: */ -+ -+#define replicas_entry_next(_i) \ -+ ((typeof(_i)) ((void *) (_i) + replicas_entry_bytes(_i))) -+ -+#define for_each_replicas_entry(_r, _i) \ -+ for (_i = (_r)->entries; \ -+ (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\ -+ (_i) = replicas_entry_next(_i)) -+ -+#define for_each_replicas_entry_v0(_r, _i) \ -+ for (_i = (_r)->entries; \ -+ (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\ -+ (_i) = replicas_entry_next(_i)) -+ -+int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *); -+ -+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas; -+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0; -+ -+void bch2_fs_replicas_exit(struct bch_fs *); -+int bch2_fs_replicas_init(struct bch_fs *); -+ -+#endif /* _BCACHEFS_REPLICAS_H */ -diff --git a/fs/bcachefs/replicas_types.h b/fs/bcachefs/replicas_types.h -new file mode 100644 -index 000000000000..5cfff489bbc3 ---- /dev/null -+++ b/fs/bcachefs/replicas_types.h -@@ -0,0 +1,27 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_REPLICAS_TYPES_H -+#define _BCACHEFS_REPLICAS_TYPES_H -+ -+struct bch_replicas_cpu { -+ unsigned nr; -+ unsigned entry_size; -+ struct bch_replicas_entry *entries; -+}; -+ -+struct replicas_delta { -+ s64 delta; -+ struct bch_replicas_entry r; -+} __packed; -+ -+struct replicas_delta_list { -+ unsigned size; -+ unsigned used; -+ -+ struct {} memset_start; -+ u64 nr_inodes; -+ u64 persistent_reserved[BCH_REPLICAS_MAX]; -+ struct {} memset_end; -+ struct replicas_delta d[0]; -+}; -+ -+#endif /* _BCACHEFS_REPLICAS_TYPES_H */ -diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c -new file mode 100644 -index 000000000000..e151ada1c8bd ---- /dev/null -+++ b/fs/bcachefs/sb-clean.c -@@ -0,0 +1,398 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "btree_update_interior.h" -+#include "buckets.h" -+#include "error.h" -+#include "journal_io.h" -+#include "replicas.h" -+#include "sb-clean.h" -+#include "super-io.h" -+ -+/* -+ * BCH_SB_FIELD_clean: -+ * -+ * Btree roots, and a few other things, are recovered from the journal after an -+ * unclean shutdown - but after a clean shutdown, to avoid having to read the -+ * journal, we can store them in the superblock. -+ * -+ * bch_sb_field_clean simply contains a list of journal entries, stored exactly -+ * as they would be in the journal: -+ */ -+ -+int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *clean, -+ int write) -+{ -+ struct jset_entry *entry; -+ int ret; -+ -+ for (entry = clean->start; -+ entry < (struct jset_entry *) vstruct_end(&clean->field); -+ entry = vstruct_next(entry)) { -+ ret = bch2_journal_entry_validate(c, NULL, entry, -+ le16_to_cpu(c->disk_sb.sb->version), -+ BCH_SB_BIG_ENDIAN(c->disk_sb.sb), -+ write); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static struct bkey_i *btree_root_find(struct bch_fs *c, -+ struct bch_sb_field_clean *clean, -+ struct jset *j, -+ enum btree_id id, unsigned *level) -+{ -+ struct bkey_i *k; -+ struct jset_entry *entry, *start, *end; -+ -+ if (clean) { -+ start = clean->start; -+ end = vstruct_end(&clean->field); -+ } else { -+ start = j->start; -+ end = vstruct_last(j); -+ } -+ -+ for (entry = start; entry < end; entry = vstruct_next(entry)) -+ if (entry->type == BCH_JSET_ENTRY_btree_root && -+ entry->btree_id == id) -+ goto found; -+ -+ return NULL; -+found: -+ if (!entry->u64s) -+ return ERR_PTR(-EINVAL); -+ -+ k = entry->start; -+ *level = entry->level; -+ return k; -+} -+ -+int bch2_verify_superblock_clean(struct bch_fs *c, -+ struct bch_sb_field_clean **cleanp, -+ struct jset *j) -+{ -+ unsigned i; -+ struct bch_sb_field_clean *clean = *cleanp; -+ struct printbuf buf1 = PRINTBUF; -+ struct printbuf buf2 = PRINTBUF; -+ int ret = 0; -+ -+ if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c, -+ sb_clean_journal_seq_mismatch, -+ "superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown", -+ le64_to_cpu(clean->journal_seq), -+ le64_to_cpu(j->seq))) { -+ kfree(clean); -+ *cleanp = NULL; -+ return 0; -+ } -+ -+ for (i = 0; i < BTREE_ID_NR; i++) { -+ struct bkey_i *k1, *k2; -+ unsigned l1 = 0, l2 = 0; -+ -+ k1 = btree_root_find(c, clean, NULL, i, &l1); -+ k2 = btree_root_find(c, NULL, j, i, &l2); -+ -+ if (!k1 && !k2) -+ continue; -+ -+ printbuf_reset(&buf1); -+ printbuf_reset(&buf2); -+ -+ if (k1) -+ bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(k1)); -+ else -+ prt_printf(&buf1, "(none)"); -+ -+ if (k2) -+ bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(k2)); -+ else -+ prt_printf(&buf2, "(none)"); -+ -+ mustfix_fsck_err_on(!k1 || !k2 || -+ IS_ERR(k1) || -+ IS_ERR(k2) || -+ k1->k.u64s != k2->k.u64s || -+ memcmp(k1, k2, bkey_bytes(&k1->k)) || -+ l1 != l2, c, -+ sb_clean_btree_root_mismatch, -+ "superblock btree root %u doesn't match journal after clean shutdown\n" -+ "sb: l=%u %s\n" -+ "journal: l=%u %s\n", i, -+ l1, buf1.buf, -+ l2, buf2.buf); -+ } -+fsck_err: -+ printbuf_exit(&buf2); -+ printbuf_exit(&buf1); -+ return ret; -+} -+ -+struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c) -+{ -+ struct bch_sb_field_clean *clean, *sb_clean; -+ int ret; -+ -+ mutex_lock(&c->sb_lock); -+ sb_clean = bch2_sb_field_get(c->disk_sb.sb, clean); -+ -+ if (fsck_err_on(!sb_clean, c, -+ sb_clean_missing, -+ "superblock marked clean but clean section not present")) { -+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false); -+ c->sb.clean = false; -+ mutex_unlock(&c->sb_lock); -+ return NULL; -+ } -+ -+ clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field), -+ GFP_KERNEL); -+ if (!clean) { -+ mutex_unlock(&c->sb_lock); -+ return ERR_PTR(-BCH_ERR_ENOMEM_read_superblock_clean); -+ } -+ -+ ret = bch2_sb_clean_validate_late(c, clean, READ); -+ if (ret) { -+ mutex_unlock(&c->sb_lock); -+ return ERR_PTR(ret); -+ } -+ -+ mutex_unlock(&c->sb_lock); -+ -+ return clean; -+fsck_err: -+ mutex_unlock(&c->sb_lock); -+ return ERR_PTR(ret); -+} -+ -+static struct jset_entry *jset_entry_init(struct jset_entry **end, size_t size) -+{ -+ struct jset_entry *entry = *end; -+ unsigned u64s = DIV_ROUND_UP(size, sizeof(u64)); -+ -+ memset(entry, 0, u64s * sizeof(u64)); -+ /* -+ * The u64s field counts from the start of data, ignoring the shared -+ * fields. -+ */ -+ entry->u64s = cpu_to_le16(u64s - 1); -+ -+ *end = vstruct_next(*end); -+ return entry; -+} -+ -+void bch2_journal_super_entries_add_common(struct bch_fs *c, -+ struct jset_entry **end, -+ u64 journal_seq) -+{ -+ struct bch_dev *ca; -+ unsigned i, dev; -+ -+ percpu_down_read(&c->mark_lock); -+ -+ if (!journal_seq) { -+ for (i = 0; i < ARRAY_SIZE(c->usage); i++) -+ bch2_fs_usage_acc_to_base(c, i); -+ } else { -+ bch2_fs_usage_acc_to_base(c, journal_seq & JOURNAL_BUF_MASK); -+ } -+ -+ { -+ struct jset_entry_usage *u = -+ container_of(jset_entry_init(end, sizeof(*u)), -+ struct jset_entry_usage, entry); -+ -+ u->entry.type = BCH_JSET_ENTRY_usage; -+ u->entry.btree_id = BCH_FS_USAGE_inodes; -+ u->v = cpu_to_le64(c->usage_base->nr_inodes); -+ } -+ -+ { -+ struct jset_entry_usage *u = -+ container_of(jset_entry_init(end, sizeof(*u)), -+ struct jset_entry_usage, entry); -+ -+ u->entry.type = BCH_JSET_ENTRY_usage; -+ u->entry.btree_id = BCH_FS_USAGE_key_version; -+ u->v = cpu_to_le64(atomic64_read(&c->key_version)); -+ } -+ -+ for (i = 0; i < BCH_REPLICAS_MAX; i++) { -+ struct jset_entry_usage *u = -+ container_of(jset_entry_init(end, sizeof(*u)), -+ struct jset_entry_usage, entry); -+ -+ u->entry.type = BCH_JSET_ENTRY_usage; -+ u->entry.btree_id = BCH_FS_USAGE_reserved; -+ u->entry.level = i; -+ u->v = cpu_to_le64(c->usage_base->persistent_reserved[i]); -+ } -+ -+ for (i = 0; i < c->replicas.nr; i++) { -+ struct bch_replicas_entry *e = -+ cpu_replicas_entry(&c->replicas, i); -+ struct jset_entry_data_usage *u = -+ container_of(jset_entry_init(end, sizeof(*u) + e->nr_devs), -+ struct jset_entry_data_usage, entry); -+ -+ u->entry.type = BCH_JSET_ENTRY_data_usage; -+ u->v = cpu_to_le64(c->usage_base->replicas[i]); -+ unsafe_memcpy(&u->r, e, replicas_entry_bytes(e), -+ "embedded variable length struct"); -+ } -+ -+ for_each_member_device(ca, c, dev) { -+ unsigned b = sizeof(struct jset_entry_dev_usage) + -+ sizeof(struct jset_entry_dev_usage_type) * BCH_DATA_NR; -+ struct jset_entry_dev_usage *u = -+ container_of(jset_entry_init(end, b), -+ struct jset_entry_dev_usage, entry); -+ -+ u->entry.type = BCH_JSET_ENTRY_dev_usage; -+ u->dev = cpu_to_le32(dev); -+ u->buckets_ec = cpu_to_le64(ca->usage_base->buckets_ec); -+ -+ for (i = 0; i < BCH_DATA_NR; i++) { -+ u->d[i].buckets = cpu_to_le64(ca->usage_base->d[i].buckets); -+ u->d[i].sectors = cpu_to_le64(ca->usage_base->d[i].sectors); -+ u->d[i].fragmented = cpu_to_le64(ca->usage_base->d[i].fragmented); -+ } -+ } -+ -+ percpu_up_read(&c->mark_lock); -+ -+ for (i = 0; i < 2; i++) { -+ struct jset_entry_clock *clock = -+ container_of(jset_entry_init(end, sizeof(*clock)), -+ struct jset_entry_clock, entry); -+ -+ clock->entry.type = BCH_JSET_ENTRY_clock; -+ clock->rw = i; -+ clock->time = cpu_to_le64(atomic64_read(&c->io_clock[i].now)); -+ } -+} -+ -+static int bch2_sb_clean_validate(struct bch_sb *sb, -+ struct bch_sb_field *f, -+ struct printbuf *err) -+{ -+ struct bch_sb_field_clean *clean = field_to_type(f, clean); -+ -+ if (vstruct_bytes(&clean->field) < sizeof(*clean)) { -+ prt_printf(err, "wrong size (got %zu should be %zu)", -+ vstruct_bytes(&clean->field), sizeof(*clean)); -+ return -BCH_ERR_invalid_sb_clean; -+ } -+ -+ return 0; -+} -+ -+static void bch2_sb_clean_to_text(struct printbuf *out, struct bch_sb *sb, -+ struct bch_sb_field *f) -+{ -+ struct bch_sb_field_clean *clean = field_to_type(f, clean); -+ struct jset_entry *entry; -+ -+ prt_printf(out, "flags: %x", le32_to_cpu(clean->flags)); -+ prt_newline(out); -+ prt_printf(out, "journal_seq: %llu", le64_to_cpu(clean->journal_seq)); -+ prt_newline(out); -+ -+ for (entry = clean->start; -+ entry != vstruct_end(&clean->field); -+ entry = vstruct_next(entry)) { -+ if (entry->type == BCH_JSET_ENTRY_btree_keys && -+ !entry->u64s) -+ continue; -+ -+ bch2_journal_entry_to_text(out, NULL, entry); -+ prt_newline(out); -+ } -+} -+ -+const struct bch_sb_field_ops bch_sb_field_ops_clean = { -+ .validate = bch2_sb_clean_validate, -+ .to_text = bch2_sb_clean_to_text, -+}; -+ -+int bch2_fs_mark_dirty(struct bch_fs *c) -+{ -+ int ret; -+ -+ /* -+ * Unconditionally write superblock, to verify it hasn't changed before -+ * we go rw: -+ */ -+ -+ mutex_lock(&c->sb_lock); -+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false); -+ -+ bch2_sb_maybe_downgrade(c); -+ c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS); -+ -+ ret = bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ -+ return ret; -+} -+ -+void bch2_fs_mark_clean(struct bch_fs *c) -+{ -+ struct bch_sb_field_clean *sb_clean; -+ struct jset_entry *entry; -+ unsigned u64s; -+ int ret; -+ -+ mutex_lock(&c->sb_lock); -+ if (BCH_SB_CLEAN(c->disk_sb.sb)) -+ goto out; -+ -+ SET_BCH_SB_CLEAN(c->disk_sb.sb, true); -+ -+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info); -+ c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_metadata); -+ c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_extents_above_btree_updates)); -+ c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_btree_updates_journalled)); -+ -+ u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved; -+ -+ sb_clean = bch2_sb_field_resize(&c->disk_sb, clean, u64s); -+ if (!sb_clean) { -+ bch_err(c, "error resizing superblock while setting filesystem clean"); -+ goto out; -+ } -+ -+ sb_clean->flags = 0; -+ sb_clean->journal_seq = cpu_to_le64(atomic64_read(&c->journal.seq)); -+ -+ /* Trying to catch outstanding bug: */ -+ BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX); -+ -+ entry = sb_clean->start; -+ bch2_journal_super_entries_add_common(c, &entry, 0); -+ entry = bch2_btree_roots_to_journal_entries(c, entry, 0); -+ BUG_ON((void *) entry > vstruct_end(&sb_clean->field)); -+ -+ memset(entry, 0, -+ vstruct_end(&sb_clean->field) - (void *) entry); -+ -+ /* -+ * this should be in the write path, and we should be validating every -+ * superblock section: -+ */ -+ ret = bch2_sb_clean_validate_late(c, sb_clean, WRITE); -+ if (ret) { -+ bch_err(c, "error writing marking filesystem clean: validate error"); -+ goto out; -+ } -+ -+ bch2_write_super(c); -+out: -+ mutex_unlock(&c->sb_lock); -+} -diff --git a/fs/bcachefs/sb-clean.h b/fs/bcachefs/sb-clean.h -new file mode 100644 -index 000000000000..71caef281239 ---- /dev/null -+++ b/fs/bcachefs/sb-clean.h -@@ -0,0 +1,16 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_SB_CLEAN_H -+#define _BCACHEFS_SB_CLEAN_H -+ -+int bch2_sb_clean_validate_late(struct bch_fs *, struct bch_sb_field_clean *, int); -+int bch2_verify_superblock_clean(struct bch_fs *, struct bch_sb_field_clean **, -+ struct jset *); -+struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *); -+void bch2_journal_super_entries_add_common(struct bch_fs *, struct jset_entry **, u64); -+ -+extern const struct bch_sb_field_ops bch_sb_field_ops_clean; -+ -+int bch2_fs_mark_dirty(struct bch_fs *); -+void bch2_fs_mark_clean(struct bch_fs *); -+ -+#endif /* _BCACHEFS_SB_CLEAN_H */ -diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c -new file mode 100644 -index 000000000000..f0930ab7f036 ---- /dev/null -+++ b/fs/bcachefs/sb-errors.c -@@ -0,0 +1,172 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "sb-errors.h" -+#include "super-io.h" -+ -+static const char * const bch2_sb_error_strs[] = { -+#define x(t, n, ...) [n] = #t, -+ BCH_SB_ERRS() -+ NULL -+}; -+ -+static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id id) -+{ -+ if (id < BCH_SB_ERR_MAX) -+ prt_str(out, bch2_sb_error_strs[id]); -+ else -+ prt_printf(out, "(unknown error %u)", id); -+} -+ -+static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e) -+{ -+ return e -+ ? (bch2_sb_field_bytes(&e->field) - sizeof(*e)) / sizeof(e->entries[0]) -+ : 0; -+} -+ -+static inline unsigned bch2_sb_field_errors_u64s(unsigned nr) -+{ -+ return (sizeof(struct bch_sb_field_errors) + -+ sizeof(struct bch_sb_field_error_entry) * nr) / sizeof(u64); -+} -+ -+static int bch2_sb_errors_validate(struct bch_sb *sb, struct bch_sb_field *f, -+ struct printbuf *err) -+{ -+ struct bch_sb_field_errors *e = field_to_type(f, errors); -+ unsigned i, nr = bch2_sb_field_errors_nr_entries(e); -+ -+ for (i = 0; i < nr; i++) { -+ if (!BCH_SB_ERROR_ENTRY_NR(&e->entries[i])) { -+ prt_printf(err, "entry with count 0 (id "); -+ bch2_sb_error_id_to_text(err, BCH_SB_ERROR_ENTRY_ID(&e->entries[i])); -+ prt_printf(err, ")"); -+ return -BCH_ERR_invalid_sb_errors; -+ } -+ -+ if (i + 1 < nr && -+ BCH_SB_ERROR_ENTRY_ID(&e->entries[i]) >= -+ BCH_SB_ERROR_ENTRY_ID(&e->entries[i + 1])) { -+ prt_printf(err, "entries out of order"); -+ return -BCH_ERR_invalid_sb_errors; -+ } -+ } -+ -+ return 0; -+} -+ -+static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb, -+ struct bch_sb_field *f) -+{ -+ struct bch_sb_field_errors *e = field_to_type(f, errors); -+ unsigned i, nr = bch2_sb_field_errors_nr_entries(e); -+ -+ if (out->nr_tabstops <= 1) -+ printbuf_tabstop_push(out, 16); -+ -+ for (i = 0; i < nr; i++) { -+ bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(&e->entries[i])); -+ prt_tab(out); -+ prt_u64(out, BCH_SB_ERROR_ENTRY_NR(&e->entries[i])); -+ prt_tab(out); -+ bch2_prt_datetime(out, le64_to_cpu(e->entries[i].last_error_time)); -+ prt_newline(out); -+ } -+} -+ -+const struct bch_sb_field_ops bch_sb_field_ops_errors = { -+ .validate = bch2_sb_errors_validate, -+ .to_text = bch2_sb_errors_to_text, -+}; -+ -+void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err) -+{ -+ bch_sb_errors_cpu *e = &c->fsck_error_counts; -+ struct bch_sb_error_entry_cpu n = { -+ .id = err, -+ .nr = 1, -+ .last_error_time = ktime_get_real_seconds() -+ }; -+ unsigned i; -+ -+ mutex_lock(&c->fsck_error_counts_lock); -+ for (i = 0; i < e->nr; i++) { -+ if (err == e->data[i].id) { -+ e->data[i].nr++; -+ e->data[i].last_error_time = n.last_error_time; -+ goto out; -+ } -+ if (err < e->data[i].id) -+ break; -+ } -+ -+ if (darray_make_room(e, 1)) -+ goto out; -+ -+ darray_insert_item(e, i, n); -+out: -+ mutex_unlock(&c->fsck_error_counts_lock); -+} -+ -+void bch2_sb_errors_from_cpu(struct bch_fs *c) -+{ -+ bch_sb_errors_cpu *src = &c->fsck_error_counts; -+ struct bch_sb_field_errors *dst = -+ bch2_sb_field_resize(&c->disk_sb, errors, -+ bch2_sb_field_errors_u64s(src->nr)); -+ unsigned i; -+ -+ if (!dst) -+ return; -+ -+ for (i = 0; i < src->nr; i++) { -+ SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id); -+ SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr); -+ dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time); -+ } -+} -+ -+static int bch2_sb_errors_to_cpu(struct bch_fs *c) -+{ -+ struct bch_sb_field_errors *src = bch2_sb_field_get(c->disk_sb.sb, errors); -+ bch_sb_errors_cpu *dst = &c->fsck_error_counts; -+ unsigned i, nr = bch2_sb_field_errors_nr_entries(src); -+ int ret; -+ -+ if (!nr) -+ return 0; -+ -+ mutex_lock(&c->fsck_error_counts_lock); -+ ret = darray_make_room(dst, nr); -+ if (ret) -+ goto err; -+ -+ dst->nr = nr; -+ -+ for (i = 0; i < nr; i++) { -+ dst->data[i].id = BCH_SB_ERROR_ENTRY_ID(&src->entries[i]); -+ dst->data[i].nr = BCH_SB_ERROR_ENTRY_NR(&src->entries[i]); -+ dst->data[i].last_error_time = le64_to_cpu(src->entries[i].last_error_time); -+ } -+err: -+ mutex_unlock(&c->fsck_error_counts_lock); -+ -+ return ret; -+} -+ -+void bch2_fs_sb_errors_exit(struct bch_fs *c) -+{ -+ darray_exit(&c->fsck_error_counts); -+} -+ -+void bch2_fs_sb_errors_init_early(struct bch_fs *c) -+{ -+ mutex_init(&c->fsck_error_counts_lock); -+ darray_init(&c->fsck_error_counts); -+} -+ -+int bch2_fs_sb_errors_init(struct bch_fs *c) -+{ -+ return bch2_sb_errors_to_cpu(c); -+} -diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h -new file mode 100644 -index 000000000000..5a09a53966be ---- /dev/null -+++ b/fs/bcachefs/sb-errors.h -@@ -0,0 +1,270 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_SB_ERRORS_H -+#define _BCACHEFS_SB_ERRORS_H -+ -+#include "sb-errors_types.h" -+ -+#define BCH_SB_ERRS() \ -+ x(clean_but_journal_not_empty, 0) \ -+ x(dirty_but_no_journal_entries, 1) \ -+ x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \ -+ x(sb_clean_journal_seq_mismatch, 3) \ -+ x(sb_clean_btree_root_mismatch, 4) \ -+ x(sb_clean_missing, 5) \ -+ x(jset_unsupported_version, 6) \ -+ x(jset_unknown_csum, 7) \ -+ x(jset_last_seq_newer_than_seq, 8) \ -+ x(jset_past_bucket_end, 9) \ -+ x(jset_seq_blacklisted, 10) \ -+ x(journal_entries_missing, 11) \ -+ x(journal_entry_replicas_not_marked, 12) \ -+ x(journal_entry_past_jset_end, 13) \ -+ x(journal_entry_replicas_data_mismatch, 14) \ -+ x(journal_entry_bkey_u64s_0, 15) \ -+ x(journal_entry_bkey_past_end, 16) \ -+ x(journal_entry_bkey_bad_format, 17) \ -+ x(journal_entry_bkey_invalid, 18) \ -+ x(journal_entry_btree_root_bad_size, 19) \ -+ x(journal_entry_blacklist_bad_size, 20) \ -+ x(journal_entry_blacklist_v2_bad_size, 21) \ -+ x(journal_entry_blacklist_v2_start_past_end, 22) \ -+ x(journal_entry_usage_bad_size, 23) \ -+ x(journal_entry_data_usage_bad_size, 24) \ -+ x(journal_entry_clock_bad_size, 25) \ -+ x(journal_entry_clock_bad_rw, 26) \ -+ x(journal_entry_dev_usage_bad_size, 27) \ -+ x(journal_entry_dev_usage_bad_dev, 28) \ -+ x(journal_entry_dev_usage_bad_pad, 29) \ -+ x(btree_node_unreadable, 30) \ -+ x(btree_node_fault_injected, 31) \ -+ x(btree_node_bad_magic, 32) \ -+ x(btree_node_bad_seq, 33) \ -+ x(btree_node_unsupported_version, 34) \ -+ x(btree_node_bset_older_than_sb_min, 35) \ -+ x(btree_node_bset_newer_than_sb, 36) \ -+ x(btree_node_data_missing, 37) \ -+ x(btree_node_bset_after_end, 38) \ -+ x(btree_node_replicas_sectors_written_mismatch, 39) \ -+ x(btree_node_replicas_data_mismatch, 40) \ -+ x(bset_unknown_csum, 41) \ -+ x(bset_bad_csum, 42) \ -+ x(bset_past_end_of_btree_node, 43) \ -+ x(bset_wrong_sector_offset, 44) \ -+ x(bset_empty, 45) \ -+ x(bset_bad_seq, 46) \ -+ x(bset_blacklisted_journal_seq, 47) \ -+ x(first_bset_blacklisted_journal_seq, 48) \ -+ x(btree_node_bad_btree, 49) \ -+ x(btree_node_bad_level, 50) \ -+ x(btree_node_bad_min_key, 51) \ -+ x(btree_node_bad_max_key, 52) \ -+ x(btree_node_bad_format, 53) \ -+ x(btree_node_bkey_past_bset_end, 54) \ -+ x(btree_node_bkey_bad_format, 55) \ -+ x(btree_node_bad_bkey, 56) \ -+ x(btree_node_bkey_out_of_order, 57) \ -+ x(btree_root_bkey_invalid, 58) \ -+ x(btree_root_read_error, 59) \ -+ x(btree_root_bad_min_key, 50) \ -+ x(btree_root_bad_max_key, 61) \ -+ x(btree_node_read_error, 62) \ -+ x(btree_node_topology_bad_min_key, 63) \ -+ x(btree_node_topology_bad_max_key, 64) \ -+ x(btree_node_topology_overwritten_by_prev_node, 65) \ -+ x(btree_node_topology_overwritten_by_next_node, 66) \ -+ x(btree_node_topology_interior_node_empty, 67) \ -+ x(fs_usage_hidden_wrong, 68) \ -+ x(fs_usage_btree_wrong, 69) \ -+ x(fs_usage_data_wrong, 70) \ -+ x(fs_usage_cached_wrong, 71) \ -+ x(fs_usage_reserved_wrong, 72) \ -+ x(fs_usage_persistent_reserved_wrong, 73) \ -+ x(fs_usage_nr_inodes_wrong, 74) \ -+ x(fs_usage_replicas_wrong, 75) \ -+ x(dev_usage_buckets_wrong, 76) \ -+ x(dev_usage_sectors_wrong, 77) \ -+ x(dev_usage_fragmented_wrong, 78) \ -+ x(dev_usage_buckets_ec_wrong, 79) \ -+ x(bkey_version_in_future, 80) \ -+ x(bkey_u64s_too_small, 81) \ -+ x(bkey_invalid_type_for_btree, 82) \ -+ x(bkey_extent_size_zero, 83) \ -+ x(bkey_extent_size_greater_than_offset, 84) \ -+ x(bkey_size_nonzero, 85) \ -+ x(bkey_snapshot_nonzero, 86) \ -+ x(bkey_snapshot_zero, 87) \ -+ x(bkey_at_pos_max, 88) \ -+ x(bkey_before_start_of_btree_node, 89) \ -+ x(bkey_after_end_of_btree_node, 90) \ -+ x(bkey_val_size_nonzero, 91) \ -+ x(bkey_val_size_too_small, 92) \ -+ x(alloc_v1_val_size_bad, 93) \ -+ x(alloc_v2_unpack_error, 94) \ -+ x(alloc_v3_unpack_error, 95) \ -+ x(alloc_v4_val_size_bad, 96) \ -+ x(alloc_v4_backpointers_start_bad, 97) \ -+ x(alloc_key_data_type_bad, 98) \ -+ x(alloc_key_empty_but_have_data, 99) \ -+ x(alloc_key_dirty_sectors_0, 100) \ -+ x(alloc_key_data_type_inconsistency, 101) \ -+ x(alloc_key_to_missing_dev_bucket, 102) \ -+ x(alloc_key_cached_inconsistency, 103) \ -+ x(alloc_key_cached_but_read_time_zero, 104) \ -+ x(alloc_key_to_missing_lru_entry, 105) \ -+ x(alloc_key_data_type_wrong, 106) \ -+ x(alloc_key_gen_wrong, 107) \ -+ x(alloc_key_dirty_sectors_wrong, 108) \ -+ x(alloc_key_cached_sectors_wrong, 109) \ -+ x(alloc_key_stripe_wrong, 110) \ -+ x(alloc_key_stripe_redundancy_wrong, 111) \ -+ x(bucket_sector_count_overflow, 112) \ -+ x(bucket_metadata_type_mismatch, 113) \ -+ x(need_discard_key_wrong, 114) \ -+ x(freespace_key_wrong, 115) \ -+ x(freespace_hole_missing, 116) \ -+ x(bucket_gens_val_size_bad, 117) \ -+ x(bucket_gens_key_wrong, 118) \ -+ x(bucket_gens_hole_wrong, 119) \ -+ x(bucket_gens_to_invalid_dev, 120) \ -+ x(bucket_gens_to_invalid_buckets, 121) \ -+ x(bucket_gens_nonzero_for_invalid_buckets, 122) \ -+ x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \ -+ x(need_discard_freespace_key_bad, 124) \ -+ x(backpointer_pos_wrong, 125) \ -+ x(backpointer_to_missing_device, 126) \ -+ x(backpointer_to_missing_alloc, 127) \ -+ x(backpointer_to_missing_ptr, 128) \ -+ x(lru_entry_at_time_0, 129) \ -+ x(lru_entry_to_invalid_bucket, 130) \ -+ x(lru_entry_bad, 131) \ -+ x(btree_ptr_val_too_big, 132) \ -+ x(btree_ptr_v2_val_too_big, 133) \ -+ x(btree_ptr_has_non_ptr, 134) \ -+ x(extent_ptrs_invalid_entry, 135) \ -+ x(extent_ptrs_no_ptrs, 136) \ -+ x(extent_ptrs_too_many_ptrs, 137) \ -+ x(extent_ptrs_redundant_crc, 138) \ -+ x(extent_ptrs_redundant_stripe, 139) \ -+ x(extent_ptrs_unwritten, 140) \ -+ x(extent_ptrs_written_and_unwritten, 141) \ -+ x(ptr_to_invalid_device, 142) \ -+ x(ptr_to_duplicate_device, 143) \ -+ x(ptr_after_last_bucket, 144) \ -+ x(ptr_before_first_bucket, 145) \ -+ x(ptr_spans_multiple_buckets, 146) \ -+ x(ptr_to_missing_backpointer, 147) \ -+ x(ptr_to_missing_alloc_key, 148) \ -+ x(ptr_to_missing_replicas_entry, 149) \ -+ x(ptr_to_missing_stripe, 150) \ -+ x(ptr_to_incorrect_stripe, 151) \ -+ x(ptr_gen_newer_than_bucket_gen, 152) \ -+ x(ptr_too_stale, 153) \ -+ x(stale_dirty_ptr, 154) \ -+ x(ptr_bucket_data_type_mismatch, 155) \ -+ x(ptr_cached_and_erasure_coded, 156) \ -+ x(ptr_crc_uncompressed_size_too_small, 157) \ -+ x(ptr_crc_csum_type_unknown, 158) \ -+ x(ptr_crc_compression_type_unknown, 159) \ -+ x(ptr_crc_redundant, 160) \ -+ x(ptr_crc_uncompressed_size_too_big, 161) \ -+ x(ptr_crc_nonce_mismatch, 162) \ -+ x(ptr_stripe_redundant, 163) \ -+ x(reservation_key_nr_replicas_invalid, 164) \ -+ x(reflink_v_refcount_wrong, 165) \ -+ x(reflink_p_to_missing_reflink_v, 166) \ -+ x(stripe_pos_bad, 167) \ -+ x(stripe_val_size_bad, 168) \ -+ x(stripe_sector_count_wrong, 169) \ -+ x(snapshot_tree_pos_bad, 170) \ -+ x(snapshot_tree_to_missing_snapshot, 171) \ -+ x(snapshot_tree_to_missing_subvol, 172) \ -+ x(snapshot_tree_to_wrong_subvol, 173) \ -+ x(snapshot_tree_to_snapshot_subvol, 174) \ -+ x(snapshot_pos_bad, 175) \ -+ x(snapshot_parent_bad, 176) \ -+ x(snapshot_children_not_normalized, 177) \ -+ x(snapshot_child_duplicate, 178) \ -+ x(snapshot_child_bad, 179) \ -+ x(snapshot_skiplist_not_normalized, 180) \ -+ x(snapshot_skiplist_bad, 181) \ -+ x(snapshot_should_not_have_subvol, 182) \ -+ x(snapshot_to_bad_snapshot_tree, 183) \ -+ x(snapshot_bad_depth, 184) \ -+ x(snapshot_bad_skiplist, 185) \ -+ x(subvol_pos_bad, 186) \ -+ x(subvol_not_master_and_not_snapshot, 187) \ -+ x(subvol_to_missing_root, 188) \ -+ x(subvol_root_wrong_bi_subvol, 189) \ -+ x(bkey_in_missing_snapshot, 190) \ -+ x(inode_pos_inode_nonzero, 191) \ -+ x(inode_pos_blockdev_range, 192) \ -+ x(inode_unpack_error, 193) \ -+ x(inode_str_hash_invalid, 194) \ -+ x(inode_v3_fields_start_bad, 195) \ -+ x(inode_snapshot_mismatch, 196) \ -+ x(inode_unlinked_but_clean, 197) \ -+ x(inode_unlinked_but_nlink_nonzero, 198) \ -+ x(inode_checksum_type_invalid, 199) \ -+ x(inode_compression_type_invalid, 200) \ -+ x(inode_subvol_root_but_not_dir, 201) \ -+ x(inode_i_size_dirty_but_clean, 202) \ -+ x(inode_i_sectors_dirty_but_clean, 203) \ -+ x(inode_i_sectors_wrong, 204) \ -+ x(inode_dir_wrong_nlink, 205) \ -+ x(inode_dir_multiple_links, 206) \ -+ x(inode_multiple_links_but_nlink_0, 207) \ -+ x(inode_wrong_backpointer, 208) \ -+ x(inode_wrong_nlink, 209) \ -+ x(inode_unreachable, 210) \ -+ x(deleted_inode_but_clean, 211) \ -+ x(deleted_inode_missing, 212) \ -+ x(deleted_inode_is_dir, 213) \ -+ x(deleted_inode_not_unlinked, 214) \ -+ x(extent_overlapping, 215) \ -+ x(extent_in_missing_inode, 216) \ -+ x(extent_in_non_reg_inode, 217) \ -+ x(extent_past_end_of_inode, 218) \ -+ x(dirent_empty_name, 219) \ -+ x(dirent_val_too_big, 220) \ -+ x(dirent_name_too_long, 221) \ -+ x(dirent_name_embedded_nul, 222) \ -+ x(dirent_name_dot_or_dotdot, 223) \ -+ x(dirent_name_has_slash, 224) \ -+ x(dirent_d_type_wrong, 225) \ -+ x(dirent_d_parent_subvol_wrong, 226) \ -+ x(dirent_in_missing_dir_inode, 227) \ -+ x(dirent_in_non_dir_inode, 228) \ -+ x(dirent_to_missing_inode, 229) \ -+ x(dirent_to_missing_subvol, 230) \ -+ x(dirent_to_itself, 231) \ -+ x(quota_type_invalid, 232) \ -+ x(xattr_val_size_too_small, 233) \ -+ x(xattr_val_size_too_big, 234) \ -+ x(xattr_invalid_type, 235) \ -+ x(xattr_name_invalid_chars, 236) \ -+ x(xattr_in_missing_inode, 237) \ -+ x(root_subvol_missing, 238) \ -+ x(root_dir_missing, 239) \ -+ x(root_inode_not_dir, 240) \ -+ x(dir_loop, 241) \ -+ x(hash_table_key_duplicate, 242) \ -+ x(hash_table_key_wrong_offset, 243) -+ -+enum bch_sb_error_id { -+#define x(t, n) BCH_FSCK_ERR_##t = n, -+ BCH_SB_ERRS() -+#undef x -+ BCH_SB_ERR_MAX -+}; -+ -+extern const struct bch_sb_field_ops bch_sb_field_ops_errors; -+ -+void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id); -+ -+void bch2_sb_errors_from_cpu(struct bch_fs *); -+ -+void bch2_fs_sb_errors_exit(struct bch_fs *); -+void bch2_fs_sb_errors_init_early(struct bch_fs *); -+int bch2_fs_sb_errors_init(struct bch_fs *); -+ -+#endif /* _BCACHEFS_SB_ERRORS_H */ -diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h -new file mode 100644 -index 000000000000..b1c099843a39 ---- /dev/null -+++ b/fs/bcachefs/sb-errors_types.h -@@ -0,0 +1,16 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_SB_ERRORS_TYPES_H -+#define _BCACHEFS_SB_ERRORS_TYPES_H -+ -+#include "darray.h" -+ -+struct bch_sb_error_entry_cpu { -+ u64 id:16, -+ nr:48; -+ u64 last_error_time; -+}; -+ -+typedef DARRAY(struct bch_sb_error_entry_cpu) bch_sb_errors_cpu; -+ -+#endif /* _BCACHEFS_SB_ERRORS_TYPES_H */ -+ -diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c -new file mode 100644 -index 000000000000..bed0f857fe5b ---- /dev/null -+++ b/fs/bcachefs/sb-members.c -@@ -0,0 +1,420 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "disk_groups.h" -+#include "opts.h" -+#include "replicas.h" -+#include "sb-members.h" -+#include "super-io.h" -+ -+#define x(t, n, ...) [n] = #t, -+static const char * const bch2_iops_measurements[] = { -+ BCH_IOPS_MEASUREMENTS() -+ NULL -+}; -+ -+char * const bch2_member_error_strs[] = { -+ BCH_MEMBER_ERROR_TYPES() -+ NULL -+}; -+#undef x -+ -+/* Code for bch_sb_field_members_v1: */ -+ -+struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i) -+{ -+ return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i); -+} -+ -+static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i) -+{ -+ struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i); -+ memset(&ret, 0, sizeof(ret)); -+ memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret))); -+ return ret; -+} -+ -+static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i) -+{ -+ return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES); -+} -+ -+static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i) -+{ -+ struct bch_member ret, *p = members_v1_get_mut(mi, i); -+ memset(&ret, 0, sizeof(ret)); -+ memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret))); -+ return ret; -+} -+ -+struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i) -+{ -+ struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2); -+ if (mi2) -+ return members_v2_get(mi2, i); -+ struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1); -+ return members_v1_get(mi1, i); -+} -+ -+static int sb_members_v2_resize_entries(struct bch_fs *c) -+{ -+ struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); -+ -+ if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) { -+ unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) * -+ c->disk_sb.sb->nr_devices), 8); -+ -+ mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s); -+ if (!mi) -+ return -BCH_ERR_ENOSPC_sb_members_v2; -+ -+ for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) { -+ void *dst = (void *) mi->_members + (i * sizeof(struct bch_member)); -+ memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes)); -+ memset(dst + le16_to_cpu(mi->member_bytes), -+ 0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes))); -+ } -+ mi->member_bytes = cpu_to_le16(sizeof(struct bch_member)); -+ } -+ return 0; -+} -+ -+int bch2_sb_members_v2_init(struct bch_fs *c) -+{ -+ struct bch_sb_field_members_v1 *mi1; -+ struct bch_sb_field_members_v2 *mi2; -+ -+ if (!bch2_sb_field_get(c->disk_sb.sb, members_v2)) { -+ mi2 = bch2_sb_field_resize(&c->disk_sb, members_v2, -+ DIV_ROUND_UP(sizeof(*mi2) + -+ sizeof(struct bch_member) * c->sb.nr_devices, -+ sizeof(u64))); -+ mi1 = bch2_sb_field_get(c->disk_sb.sb, members_v1); -+ memcpy(&mi2->_members[0], &mi1->_members[0], -+ BCH_MEMBER_V1_BYTES * c->sb.nr_devices); -+ memset(&mi2->pad[0], 0, sizeof(mi2->pad)); -+ mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES); -+ } -+ -+ return sb_members_v2_resize_entries(c); -+} -+ -+int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) -+{ -+ struct bch_sb_field_members_v1 *mi1; -+ struct bch_sb_field_members_v2 *mi2; -+ -+ mi1 = bch2_sb_field_resize(disk_sb, members_v1, -+ DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES * -+ disk_sb->sb->nr_devices, sizeof(u64))); -+ if (!mi1) -+ return -BCH_ERR_ENOSPC_sb_members; -+ -+ mi2 = bch2_sb_field_get(disk_sb->sb, members_v2); -+ -+ for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++) -+ memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES); -+ -+ return 0; -+} -+ -+static int validate_member(struct printbuf *err, -+ struct bch_member m, -+ struct bch_sb *sb, -+ int i) -+{ -+ if (le64_to_cpu(m.nbuckets) > LONG_MAX) { -+ prt_printf(err, "device %u: too many buckets (got %llu, max %lu)", -+ i, le64_to_cpu(m.nbuckets), LONG_MAX); -+ return -BCH_ERR_invalid_sb_members; -+ } -+ -+ if (le64_to_cpu(m.nbuckets) - -+ le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) { -+ prt_printf(err, "device %u: not enough buckets (got %llu, max %u)", -+ i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS); -+ return -BCH_ERR_invalid_sb_members; -+ } -+ -+ if (le16_to_cpu(m.bucket_size) < -+ le16_to_cpu(sb->block_size)) { -+ prt_printf(err, "device %u: bucket size %u smaller than block size %u", -+ i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size)); -+ return -BCH_ERR_invalid_sb_members; -+ } -+ -+ if (le16_to_cpu(m.bucket_size) < -+ BCH_SB_BTREE_NODE_SIZE(sb)) { -+ prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu", -+ i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb)); -+ return -BCH_ERR_invalid_sb_members; -+ } -+ -+ return 0; -+} -+ -+static void member_to_text(struct printbuf *out, -+ struct bch_member m, -+ struct bch_sb_field_disk_groups *gi, -+ struct bch_sb *sb, -+ int i) -+{ -+ unsigned data_have = bch2_sb_dev_has_data(sb, i); -+ u64 bucket_size = le16_to_cpu(m.bucket_size); -+ u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size; -+ -+ if (!bch2_member_exists(&m)) -+ return; -+ -+ prt_printf(out, "Device:"); -+ prt_tab(out); -+ prt_printf(out, "%u", i); -+ prt_newline(out); -+ -+ printbuf_indent_add(out, 2); -+ -+ prt_printf(out, "Label:"); -+ prt_tab(out); -+ if (BCH_MEMBER_GROUP(&m)) { -+ unsigned idx = BCH_MEMBER_GROUP(&m) - 1; -+ -+ if (idx < disk_groups_nr(gi)) -+ prt_printf(out, "%s (%u)", -+ gi->entries[idx].label, idx); -+ else -+ prt_printf(out, "(bad disk labels section)"); -+ } else { -+ prt_printf(out, "(none)"); -+ } -+ prt_newline(out); -+ -+ prt_printf(out, "UUID:"); -+ prt_tab(out); -+ pr_uuid(out, m.uuid.b); -+ prt_newline(out); -+ -+ prt_printf(out, "Size:"); -+ prt_tab(out); -+ prt_units_u64(out, device_size << 9); -+ prt_newline(out); -+ -+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { -+ prt_printf(out, "%s errors:", bch2_member_error_strs[i]); -+ prt_tab(out); -+ prt_u64(out, le64_to_cpu(m.errors[i])); -+ prt_newline(out); -+ } -+ -+ for (unsigned i = 0; i < BCH_IOPS_NR; i++) { -+ prt_printf(out, "%s iops:", bch2_iops_measurements[i]); -+ prt_tab(out); -+ prt_printf(out, "%u", le32_to_cpu(m.iops[i])); -+ prt_newline(out); -+ } -+ -+ prt_printf(out, "Bucket size:"); -+ prt_tab(out); -+ prt_units_u64(out, bucket_size << 9); -+ prt_newline(out); -+ -+ prt_printf(out, "First bucket:"); -+ prt_tab(out); -+ prt_printf(out, "%u", le16_to_cpu(m.first_bucket)); -+ prt_newline(out); -+ -+ prt_printf(out, "Buckets:"); -+ prt_tab(out); -+ prt_printf(out, "%llu", le64_to_cpu(m.nbuckets)); -+ prt_newline(out); -+ -+ prt_printf(out, "Last mount:"); -+ prt_tab(out); -+ if (m.last_mount) -+ bch2_prt_datetime(out, le64_to_cpu(m.last_mount)); -+ else -+ prt_printf(out, "(never)"); -+ prt_newline(out); -+ -+ prt_printf(out, "State:"); -+ prt_tab(out); -+ prt_printf(out, "%s", -+ BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR -+ ? bch2_member_states[BCH_MEMBER_STATE(&m)] -+ : "unknown"); -+ prt_newline(out); -+ -+ prt_printf(out, "Data allowed:"); -+ prt_tab(out); -+ if (BCH_MEMBER_DATA_ALLOWED(&m)) -+ prt_bitflags(out, bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m)); -+ else -+ prt_printf(out, "(none)"); -+ prt_newline(out); -+ -+ prt_printf(out, "Has data:"); -+ prt_tab(out); -+ if (data_have) -+ prt_bitflags(out, bch2_data_types, data_have); -+ else -+ prt_printf(out, "(none)"); -+ prt_newline(out); -+ -+ prt_printf(out, "Discard:"); -+ prt_tab(out); -+ prt_printf(out, "%llu", BCH_MEMBER_DISCARD(&m)); -+ prt_newline(out); -+ -+ prt_printf(out, "Freespace initialized:"); -+ prt_tab(out); -+ prt_printf(out, "%llu", BCH_MEMBER_FREESPACE_INITIALIZED(&m)); -+ prt_newline(out); -+ -+ printbuf_indent_sub(out, 2); -+} -+ -+static int bch2_sb_members_v1_validate(struct bch_sb *sb, -+ struct bch_sb_field *f, -+ struct printbuf *err) -+{ -+ struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); -+ unsigned i; -+ -+ if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) { -+ prt_printf(err, "too many devices for section size"); -+ return -BCH_ERR_invalid_sb_members; -+ } -+ -+ for (i = 0; i < sb->nr_devices; i++) { -+ struct bch_member m = members_v1_get(mi, i); -+ -+ int ret = validate_member(err, m, sb, i); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, -+ struct bch_sb_field *f) -+{ -+ struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); -+ struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); -+ unsigned i; -+ -+ for (i = 0; i < sb->nr_devices; i++) -+ member_to_text(out, members_v1_get(mi, i), gi, sb, i); -+} -+ -+const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = { -+ .validate = bch2_sb_members_v1_validate, -+ .to_text = bch2_sb_members_v1_to_text, -+}; -+ -+static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, -+ struct bch_sb_field *f) -+{ -+ struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); -+ struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); -+ unsigned i; -+ -+ for (i = 0; i < sb->nr_devices; i++) -+ member_to_text(out, members_v2_get(mi, i), gi, sb, i); -+} -+ -+static int bch2_sb_members_v2_validate(struct bch_sb *sb, -+ struct bch_sb_field *f, -+ struct printbuf *err) -+{ -+ struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); -+ size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) - -+ (void *) mi; -+ -+ if (mi_bytes > vstruct_bytes(&mi->field)) { -+ prt_printf(err, "section too small (%zu > %zu)", -+ mi_bytes, vstruct_bytes(&mi->field)); -+ return -BCH_ERR_invalid_sb_members; -+ } -+ -+ for (unsigned i = 0; i < sb->nr_devices; i++) { -+ int ret = validate_member(err, members_v2_get(mi, i), sb, i); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = { -+ .validate = bch2_sb_members_v2_validate, -+ .to_text = bch2_sb_members_v2_to_text, -+}; -+ -+void bch2_sb_members_from_cpu(struct bch_fs *c) -+{ -+ struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); -+ struct bch_dev *ca; -+ unsigned i, e; -+ -+ rcu_read_lock(); -+ for_each_member_device_rcu(ca, c, i, NULL) { -+ struct bch_member *m = __bch2_members_v2_get_mut(mi, i); -+ -+ for (e = 0; e < BCH_MEMBER_ERROR_NR; e++) -+ m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e])); -+ } -+ rcu_read_unlock(); -+} -+ -+void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) -+{ -+ struct bch_fs *c = ca->fs; -+ struct bch_member m; -+ -+ mutex_lock(&ca->fs->sb_lock); -+ m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); -+ mutex_unlock(&ca->fs->sb_lock); -+ -+ printbuf_tabstop_push(out, 12); -+ -+ prt_str(out, "IO errors since filesystem creation"); -+ prt_newline(out); -+ -+ printbuf_indent_add(out, 2); -+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { -+ prt_printf(out, "%s:", bch2_member_error_strs[i]); -+ prt_tab(out); -+ prt_u64(out, atomic64_read(&ca->errors[i])); -+ prt_newline(out); -+ } -+ printbuf_indent_sub(out, 2); -+ -+ prt_str(out, "IO errors since "); -+ bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC); -+ prt_str(out, " ago"); -+ prt_newline(out); -+ -+ printbuf_indent_add(out, 2); -+ for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { -+ prt_printf(out, "%s:", bch2_member_error_strs[i]); -+ prt_tab(out); -+ prt_u64(out, atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i])); -+ prt_newline(out); -+ } -+ printbuf_indent_sub(out, 2); -+} -+ -+void bch2_dev_errors_reset(struct bch_dev *ca) -+{ -+ struct bch_fs *c = ca->fs; -+ struct bch_member *m; -+ -+ mutex_lock(&c->sb_lock); -+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -+ for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++) -+ m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i])); -+ m->errors_reset_time = ktime_get_real_seconds(); -+ -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+} -diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h -new file mode 100644 -index 000000000000..03613e3eb8e3 ---- /dev/null -+++ b/fs/bcachefs/sb-members.h -@@ -0,0 +1,227 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_SB_MEMBERS_H -+#define _BCACHEFS_SB_MEMBERS_H -+ -+extern char * const bch2_member_error_strs[]; -+ -+static inline struct bch_member * -+__bch2_members_v2_get_mut(struct bch_sb_field_members_v2 *mi, unsigned i) -+{ -+ return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes)); -+} -+ -+int bch2_sb_members_v2_init(struct bch_fs *c); -+int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb); -+struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i); -+struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i); -+ -+static inline bool bch2_dev_is_online(struct bch_dev *ca) -+{ -+ return !percpu_ref_is_zero(&ca->io_ref); -+} -+ -+static inline bool bch2_dev_is_readable(struct bch_dev *ca) -+{ -+ return bch2_dev_is_online(ca) && -+ ca->mi.state != BCH_MEMBER_STATE_failed; -+} -+ -+static inline bool bch2_dev_get_ioref(struct bch_dev *ca, int rw) -+{ -+ if (!percpu_ref_tryget(&ca->io_ref)) -+ return false; -+ -+ if (ca->mi.state == BCH_MEMBER_STATE_rw || -+ (ca->mi.state == BCH_MEMBER_STATE_ro && rw == READ)) -+ return true; -+ -+ percpu_ref_put(&ca->io_ref); -+ return false; -+} -+ -+static inline unsigned dev_mask_nr(const struct bch_devs_mask *devs) -+{ -+ return bitmap_weight(devs->d, BCH_SB_MEMBERS_MAX); -+} -+ -+static inline bool bch2_dev_list_has_dev(struct bch_devs_list devs, -+ unsigned dev) -+{ -+ unsigned i; -+ -+ for (i = 0; i < devs.nr; i++) -+ if (devs.devs[i] == dev) -+ return true; -+ -+ return false; -+} -+ -+static inline void bch2_dev_list_drop_dev(struct bch_devs_list *devs, -+ unsigned dev) -+{ -+ unsigned i; -+ -+ for (i = 0; i < devs->nr; i++) -+ if (devs->devs[i] == dev) { -+ array_remove_item(devs->devs, devs->nr, i); -+ return; -+ } -+} -+ -+static inline void bch2_dev_list_add_dev(struct bch_devs_list *devs, -+ unsigned dev) -+{ -+ if (!bch2_dev_list_has_dev(*devs, dev)) { -+ BUG_ON(devs->nr >= ARRAY_SIZE(devs->devs)); -+ devs->devs[devs->nr++] = dev; -+ } -+} -+ -+static inline struct bch_devs_list bch2_dev_list_single(unsigned dev) -+{ -+ return (struct bch_devs_list) { .nr = 1, .devs[0] = dev }; -+} -+ -+static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter, -+ const struct bch_devs_mask *mask) -+{ -+ struct bch_dev *ca = NULL; -+ -+ while ((*iter = mask -+ ? find_next_bit(mask->d, c->sb.nr_devices, *iter) -+ : *iter) < c->sb.nr_devices && -+ !(ca = rcu_dereference_check(c->devs[*iter], -+ lockdep_is_held(&c->state_lock)))) -+ (*iter)++; -+ -+ return ca; -+} -+ -+#define for_each_member_device_rcu(ca, c, iter, mask) \ -+ for ((iter) = 0; ((ca) = __bch2_next_dev((c), &(iter), mask)); (iter)++) -+ -+static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, unsigned *iter) -+{ -+ struct bch_dev *ca; -+ -+ rcu_read_lock(); -+ if ((ca = __bch2_next_dev(c, iter, NULL))) -+ percpu_ref_get(&ca->ref); -+ rcu_read_unlock(); -+ -+ return ca; -+} -+ -+/* -+ * If you break early, you must drop your ref on the current device -+ */ -+#define for_each_member_device(ca, c, iter) \ -+ for ((iter) = 0; \ -+ (ca = bch2_get_next_dev(c, &(iter))); \ -+ percpu_ref_put(&ca->ref), (iter)++) -+ -+static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c, -+ unsigned *iter, -+ int state_mask) -+{ -+ struct bch_dev *ca; -+ -+ rcu_read_lock(); -+ while ((ca = __bch2_next_dev(c, iter, NULL)) && -+ (!((1 << ca->mi.state) & state_mask) || -+ !percpu_ref_tryget(&ca->io_ref))) -+ (*iter)++; -+ rcu_read_unlock(); -+ -+ return ca; -+} -+ -+#define __for_each_online_member(ca, c, iter, state_mask) \ -+ for ((iter) = 0; \ -+ (ca = bch2_get_next_online_dev(c, &(iter), state_mask)); \ -+ percpu_ref_put(&ca->io_ref), (iter)++) -+ -+#define for_each_online_member(ca, c, iter) \ -+ __for_each_online_member(ca, c, iter, ~0) -+ -+#define for_each_rw_member(ca, c, iter) \ -+ __for_each_online_member(ca, c, iter, 1 << BCH_MEMBER_STATE_rw) -+ -+#define for_each_readable_member(ca, c, iter) \ -+ __for_each_online_member(ca, c, iter, \ -+ (1 << BCH_MEMBER_STATE_rw)|(1 << BCH_MEMBER_STATE_ro)) -+ -+/* -+ * If a key exists that references a device, the device won't be going away and -+ * we can omit rcu_read_lock(): -+ */ -+static inline struct bch_dev *bch_dev_bkey_exists(const struct bch_fs *c, unsigned idx) -+{ -+ EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]); -+ -+ return rcu_dereference_check(c->devs[idx], 1); -+} -+ -+static inline struct bch_dev *bch_dev_locked(struct bch_fs *c, unsigned idx) -+{ -+ EBUG_ON(idx >= c->sb.nr_devices || !c->devs[idx]); -+ -+ return rcu_dereference_protected(c->devs[idx], -+ lockdep_is_held(&c->sb_lock) || -+ lockdep_is_held(&c->state_lock)); -+} -+ -+/* XXX kill, move to struct bch_fs */ -+static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c) -+{ -+ struct bch_devs_mask devs; -+ struct bch_dev *ca; -+ unsigned i; -+ -+ memset(&devs, 0, sizeof(devs)); -+ for_each_online_member(ca, c, i) -+ __set_bit(ca->dev_idx, devs.d); -+ return devs; -+} -+ -+extern const struct bch_sb_field_ops bch_sb_field_ops_members_v1; -+extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2; -+ -+static inline bool bch2_member_exists(struct bch_member *m) -+{ -+ return !bch2_is_zero(&m->uuid, sizeof(m->uuid)); -+} -+ -+static inline bool bch2_dev_exists(struct bch_sb *sb, unsigned dev) -+{ -+ if (dev < sb->nr_devices) { -+ struct bch_member m = bch2_sb_member_get(sb, dev); -+ return bch2_member_exists(&m); -+ } -+ return false; -+} -+ -+static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) -+{ -+ return (struct bch_member_cpu) { -+ .nbuckets = le64_to_cpu(mi->nbuckets), -+ .first_bucket = le16_to_cpu(mi->first_bucket), -+ .bucket_size = le16_to_cpu(mi->bucket_size), -+ .group = BCH_MEMBER_GROUP(mi), -+ .state = BCH_MEMBER_STATE(mi), -+ .discard = BCH_MEMBER_DISCARD(mi), -+ .data_allowed = BCH_MEMBER_DATA_ALLOWED(mi), -+ .durability = BCH_MEMBER_DURABILITY(mi) -+ ? BCH_MEMBER_DURABILITY(mi) - 1 -+ : 1, -+ .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi), -+ .valid = bch2_member_exists(mi), -+ }; -+} -+ -+void bch2_sb_members_from_cpu(struct bch_fs *); -+ -+void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *); -+void bch2_dev_errors_reset(struct bch_dev *); -+ -+#endif /* _BCACHEFS_SB_MEMBERS_H */ -diff --git a/fs/bcachefs/seqmutex.h b/fs/bcachefs/seqmutex.h -new file mode 100644 -index 000000000000..c1860d8163fb ---- /dev/null -+++ b/fs/bcachefs/seqmutex.h -@@ -0,0 +1,48 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_SEQMUTEX_H -+#define _BCACHEFS_SEQMUTEX_H -+ -+#include -+ -+struct seqmutex { -+ struct mutex lock; -+ u32 seq; -+}; -+ -+#define seqmutex_init(_lock) mutex_init(&(_lock)->lock) -+ -+static inline bool seqmutex_trylock(struct seqmutex *lock) -+{ -+ return mutex_trylock(&lock->lock); -+} -+ -+static inline void seqmutex_lock(struct seqmutex *lock) -+{ -+ mutex_lock(&lock->lock); -+} -+ -+static inline void seqmutex_unlock(struct seqmutex *lock) -+{ -+ lock->seq++; -+ mutex_unlock(&lock->lock); -+} -+ -+static inline u32 seqmutex_seq(struct seqmutex *lock) -+{ -+ return lock->seq; -+} -+ -+static inline bool seqmutex_relock(struct seqmutex *lock, u32 seq) -+{ -+ if (lock->seq != seq || !mutex_trylock(&lock->lock)) -+ return false; -+ -+ if (lock->seq != seq) { -+ mutex_unlock(&lock->lock); -+ return false; -+ } -+ -+ return true; -+} -+ -+#endif /* _BCACHEFS_SEQMUTEX_H */ -diff --git a/fs/bcachefs/siphash.c b/fs/bcachefs/siphash.c -new file mode 100644 -index 000000000000..dc1a27cc31cd ---- /dev/null -+++ b/fs/bcachefs/siphash.c -@@ -0,0 +1,173 @@ -+// SPDX-License-Identifier: BSD-3-Clause -+/* $OpenBSD: siphash.c,v 1.3 2015/02/20 11:51:03 tedu Exp $ */ -+ -+/*- -+ * Copyright (c) 2013 Andre Oppermann -+ * All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. The name of the author may not be used to endorse or promote -+ * products derived from this software without specific prior written -+ * permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ */ -+ -+/* -+ * SipHash is a family of PRFs SipHash-c-d where the integer parameters c and d -+ * are the number of compression rounds and the number of finalization rounds. -+ * A compression round is identical to a finalization round and this round -+ * function is called SipRound. Given a 128-bit key k and a (possibly empty) -+ * byte string m, SipHash-c-d returns a 64-bit value SipHash-c-d(k; m). -+ * -+ * Implemented from the paper "SipHash: a fast short-input PRF", 2012.09.18, -+ * by Jean-Philippe Aumasson and Daniel J. Bernstein, -+ * Permanent Document ID b9a943a805fbfc6fde808af9fc0ecdfa -+ * https://131002.net/siphash/siphash.pdf -+ * https://131002.net/siphash/ -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#include "siphash.h" -+ -+static void SipHash_Rounds(SIPHASH_CTX *ctx, int rounds) -+{ -+ while (rounds--) { -+ ctx->v[0] += ctx->v[1]; -+ ctx->v[2] += ctx->v[3]; -+ ctx->v[1] = rol64(ctx->v[1], 13); -+ ctx->v[3] = rol64(ctx->v[3], 16); -+ -+ ctx->v[1] ^= ctx->v[0]; -+ ctx->v[3] ^= ctx->v[2]; -+ ctx->v[0] = rol64(ctx->v[0], 32); -+ -+ ctx->v[2] += ctx->v[1]; -+ ctx->v[0] += ctx->v[3]; -+ ctx->v[1] = rol64(ctx->v[1], 17); -+ ctx->v[3] = rol64(ctx->v[3], 21); -+ -+ ctx->v[1] ^= ctx->v[2]; -+ ctx->v[3] ^= ctx->v[0]; -+ ctx->v[2] = rol64(ctx->v[2], 32); -+ } -+} -+ -+static void SipHash_CRounds(SIPHASH_CTX *ctx, const void *ptr, int rounds) -+{ -+ u64 m = get_unaligned_le64(ptr); -+ -+ ctx->v[3] ^= m; -+ SipHash_Rounds(ctx, rounds); -+ ctx->v[0] ^= m; -+} -+ -+void SipHash_Init(SIPHASH_CTX *ctx, const SIPHASH_KEY *key) -+{ -+ u64 k0, k1; -+ -+ k0 = le64_to_cpu(key->k0); -+ k1 = le64_to_cpu(key->k1); -+ -+ ctx->v[0] = 0x736f6d6570736575ULL ^ k0; -+ ctx->v[1] = 0x646f72616e646f6dULL ^ k1; -+ ctx->v[2] = 0x6c7967656e657261ULL ^ k0; -+ ctx->v[3] = 0x7465646279746573ULL ^ k1; -+ -+ memset(ctx->buf, 0, sizeof(ctx->buf)); -+ ctx->bytes = 0; -+} -+ -+void SipHash_Update(SIPHASH_CTX *ctx, int rc, int rf, -+ const void *src, size_t len) -+{ -+ const u8 *ptr = src; -+ size_t left, used; -+ -+ if (len == 0) -+ return; -+ -+ used = ctx->bytes % sizeof(ctx->buf); -+ ctx->bytes += len; -+ -+ if (used > 0) { -+ left = sizeof(ctx->buf) - used; -+ -+ if (len >= left) { -+ memcpy(&ctx->buf[used], ptr, left); -+ SipHash_CRounds(ctx, ctx->buf, rc); -+ len -= left; -+ ptr += left; -+ } else { -+ memcpy(&ctx->buf[used], ptr, len); -+ return; -+ } -+ } -+ -+ while (len >= sizeof(ctx->buf)) { -+ SipHash_CRounds(ctx, ptr, rc); -+ len -= sizeof(ctx->buf); -+ ptr += sizeof(ctx->buf); -+ } -+ -+ if (len > 0) -+ memcpy(&ctx->buf[used], ptr, len); -+} -+ -+void SipHash_Final(void *dst, SIPHASH_CTX *ctx, int rc, int rf) -+{ -+ u64 r; -+ -+ r = SipHash_End(ctx, rc, rf); -+ -+ *((__le64 *) dst) = cpu_to_le64(r); -+} -+ -+u64 SipHash_End(SIPHASH_CTX *ctx, int rc, int rf) -+{ -+ u64 r; -+ size_t left, used; -+ -+ used = ctx->bytes % sizeof(ctx->buf); -+ left = sizeof(ctx->buf) - used; -+ memset(&ctx->buf[used], 0, left - 1); -+ ctx->buf[7] = ctx->bytes; -+ -+ SipHash_CRounds(ctx, ctx->buf, rc); -+ ctx->v[2] ^= 0xff; -+ SipHash_Rounds(ctx, rf); -+ -+ r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]); -+ memset(ctx, 0, sizeof(*ctx)); -+ return r; -+} -+ -+u64 SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len) -+{ -+ SIPHASH_CTX ctx; -+ -+ SipHash_Init(&ctx, key); -+ SipHash_Update(&ctx, rc, rf, src, len); -+ return SipHash_End(&ctx, rc, rf); -+} -diff --git a/fs/bcachefs/siphash.h b/fs/bcachefs/siphash.h -new file mode 100644 -index 000000000000..3dfaf34a43b2 ---- /dev/null -+++ b/fs/bcachefs/siphash.h -@@ -0,0 +1,87 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* $OpenBSD: siphash.h,v 1.5 2015/02/20 11:51:03 tedu Exp $ */ -+/*- -+ * Copyright (c) 2013 Andre Oppermann -+ * All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. The name of the author may not be used to endorse or promote -+ * products derived from this software without specific prior written -+ * permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * $FreeBSD$ -+ */ -+ -+/* -+ * SipHash is a family of pseudorandom functions (a.k.a. keyed hash functions) -+ * optimized for speed on short messages returning a 64bit hash/digest value. -+ * -+ * The number of rounds is defined during the initialization: -+ * SipHash24_Init() for the fast and resonable strong version -+ * SipHash48_Init() for the strong version (half as fast) -+ * -+ * struct SIPHASH_CTX ctx; -+ * SipHash24_Init(&ctx); -+ * SipHash_SetKey(&ctx, "16bytes long key"); -+ * SipHash_Update(&ctx, pointer_to_string, length_of_string); -+ * SipHash_Final(output, &ctx); -+ */ -+ -+#ifndef _SIPHASH_H_ -+#define _SIPHASH_H_ -+ -+#include -+ -+#define SIPHASH_BLOCK_LENGTH 8 -+#define SIPHASH_KEY_LENGTH 16 -+#define SIPHASH_DIGEST_LENGTH 8 -+ -+typedef struct _SIPHASH_CTX { -+ u64 v[4]; -+ u8 buf[SIPHASH_BLOCK_LENGTH]; -+ u32 bytes; -+} SIPHASH_CTX; -+ -+typedef struct { -+ __le64 k0; -+ __le64 k1; -+} SIPHASH_KEY; -+ -+void SipHash_Init(SIPHASH_CTX *, const SIPHASH_KEY *); -+void SipHash_Update(SIPHASH_CTX *, int, int, const void *, size_t); -+u64 SipHash_End(SIPHASH_CTX *, int, int); -+void SipHash_Final(void *, SIPHASH_CTX *, int, int); -+u64 SipHash(const SIPHASH_KEY *, int, int, const void *, size_t); -+ -+#define SipHash24_Init(_c, _k) SipHash_Init((_c), (_k)) -+#define SipHash24_Update(_c, _p, _l) SipHash_Update((_c), 2, 4, (_p), (_l)) -+#define SipHash24_End(_d) SipHash_End((_d), 2, 4) -+#define SipHash24_Final(_d, _c) SipHash_Final((_d), (_c), 2, 4) -+#define SipHash24(_k, _p, _l) SipHash((_k), 2, 4, (_p), (_l)) -+ -+#define SipHash48_Init(_c, _k) SipHash_Init((_c), (_k)) -+#define SipHash48_Update(_c, _p, _l) SipHash_Update((_c), 4, 8, (_p), (_l)) -+#define SipHash48_End(_d) SipHash_End((_d), 4, 8) -+#define SipHash48_Final(_d, _c) SipHash_Final((_d), (_c), 4, 8) -+#define SipHash48(_k, _p, _l) SipHash((_k), 4, 8, (_p), (_l)) -+ -+#endif /* _SIPHASH_H_ */ -diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c -new file mode 100644 -index 000000000000..b775cf0fb7cb ---- /dev/null -+++ b/fs/bcachefs/six.c -@@ -0,0 +1,917 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include "six.h" -+ -+#ifdef DEBUG -+#define EBUG_ON(cond) BUG_ON(cond) -+#else -+#define EBUG_ON(cond) do {} while (0) -+#endif -+ -+#define six_acquire(l, t, r, ip) lock_acquire(l, 0, t, r, 1, NULL, ip) -+#define six_release(l, ip) lock_release(l, ip) -+ -+static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type); -+ -+#define SIX_LOCK_HELD_read_OFFSET 0 -+#define SIX_LOCK_HELD_read ~(~0U << 26) -+#define SIX_LOCK_HELD_intent (1U << 26) -+#define SIX_LOCK_HELD_write (1U << 27) -+#define SIX_LOCK_WAITING_read (1U << (28 + SIX_LOCK_read)) -+#define SIX_LOCK_WAITING_write (1U << (28 + SIX_LOCK_write)) -+#define SIX_LOCK_NOSPIN (1U << 31) -+ -+struct six_lock_vals { -+ /* Value we add to the lock in order to take the lock: */ -+ u32 lock_val; -+ -+ /* If the lock has this value (used as a mask), taking the lock fails: */ -+ u32 lock_fail; -+ -+ /* Mask that indicates lock is held for this type: */ -+ u32 held_mask; -+ -+ /* Waitlist we wakeup when releasing the lock: */ -+ enum six_lock_type unlock_wakeup; -+}; -+ -+static const struct six_lock_vals l[] = { -+ [SIX_LOCK_read] = { -+ .lock_val = 1U << SIX_LOCK_HELD_read_OFFSET, -+ .lock_fail = SIX_LOCK_HELD_write, -+ .held_mask = SIX_LOCK_HELD_read, -+ .unlock_wakeup = SIX_LOCK_write, -+ }, -+ [SIX_LOCK_intent] = { -+ .lock_val = SIX_LOCK_HELD_intent, -+ .lock_fail = SIX_LOCK_HELD_intent, -+ .held_mask = SIX_LOCK_HELD_intent, -+ .unlock_wakeup = SIX_LOCK_intent, -+ }, -+ [SIX_LOCK_write] = { -+ .lock_val = SIX_LOCK_HELD_write, -+ .lock_fail = SIX_LOCK_HELD_read, -+ .held_mask = SIX_LOCK_HELD_write, -+ .unlock_wakeup = SIX_LOCK_read, -+ }, -+}; -+ -+static inline void six_set_bitmask(struct six_lock *lock, u32 mask) -+{ -+ if ((atomic_read(&lock->state) & mask) != mask) -+ atomic_or(mask, &lock->state); -+} -+ -+static inline void six_clear_bitmask(struct six_lock *lock, u32 mask) -+{ -+ if (atomic_read(&lock->state) & mask) -+ atomic_and(~mask, &lock->state); -+} -+ -+static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type, -+ u32 old, struct task_struct *owner) -+{ -+ if (type != SIX_LOCK_intent) -+ return; -+ -+ if (!(old & SIX_LOCK_HELD_intent)) { -+ EBUG_ON(lock->owner); -+ lock->owner = owner; -+ } else { -+ EBUG_ON(lock->owner != current); -+ } -+} -+ -+static inline unsigned pcpu_read_count(struct six_lock *lock) -+{ -+ unsigned read_count = 0; -+ int cpu; -+ -+ for_each_possible_cpu(cpu) -+ read_count += *per_cpu_ptr(lock->readers, cpu); -+ return read_count; -+} -+ -+/* -+ * __do_six_trylock() - main trylock routine -+ * -+ * Returns 1 on success, 0 on failure -+ * -+ * In percpu reader mode, a failed trylock may cause a spurious trylock failure -+ * for anoter thread taking the competing lock type, and we may havve to do a -+ * wakeup: when a wakeup is required, we return -1 - wakeup_type. -+ */ -+static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type, -+ struct task_struct *task, bool try) -+{ -+ int ret; -+ u32 old; -+ -+ EBUG_ON(type == SIX_LOCK_write && lock->owner != task); -+ EBUG_ON(type == SIX_LOCK_write && -+ (try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write))); -+ -+ /* -+ * Percpu reader mode: -+ * -+ * The basic idea behind this algorithm is that you can implement a lock -+ * between two threads without any atomics, just memory barriers: -+ * -+ * For two threads you'll need two variables, one variable for "thread a -+ * has the lock" and another for "thread b has the lock". -+ * -+ * To take the lock, a thread sets its variable indicating that it holds -+ * the lock, then issues a full memory barrier, then reads from the -+ * other thread's variable to check if the other thread thinks it has -+ * the lock. If we raced, we backoff and retry/sleep. -+ * -+ * Failure to take the lock may cause a spurious trylock failure in -+ * another thread, because we temporarily set the lock to indicate that -+ * we held it. This would be a problem for a thread in six_lock(), when -+ * they are calling trylock after adding themself to the waitlist and -+ * prior to sleeping. -+ * -+ * Therefore, if we fail to get the lock, and there were waiters of the -+ * type we conflict with, we will have to issue a wakeup. -+ * -+ * Since we may be called under wait_lock (and by the wakeup code -+ * itself), we return that the wakeup has to be done instead of doing it -+ * here. -+ */ -+ if (type == SIX_LOCK_read && lock->readers) { -+ preempt_disable(); -+ this_cpu_inc(*lock->readers); /* signal that we own lock */ -+ -+ smp_mb(); -+ -+ old = atomic_read(&lock->state); -+ ret = !(old & l[type].lock_fail); -+ -+ this_cpu_sub(*lock->readers, !ret); -+ preempt_enable(); -+ -+ if (!ret && (old & SIX_LOCK_WAITING_write)) -+ ret = -1 - SIX_LOCK_write; -+ } else if (type == SIX_LOCK_write && lock->readers) { -+ if (try) { -+ atomic_add(SIX_LOCK_HELD_write, &lock->state); -+ smp_mb__after_atomic(); -+ } -+ -+ ret = !pcpu_read_count(lock); -+ -+ if (try && !ret) { -+ old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state); -+ if (old & SIX_LOCK_WAITING_read) -+ ret = -1 - SIX_LOCK_read; -+ } -+ } else { -+ old = atomic_read(&lock->state); -+ do { -+ ret = !(old & l[type].lock_fail); -+ if (!ret || (type == SIX_LOCK_write && !try)) { -+ smp_mb(); -+ break; -+ } -+ } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val)); -+ -+ EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask)); -+ } -+ -+ if (ret > 0) -+ six_set_owner(lock, type, old, task); -+ -+ EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 && -+ (atomic_read(&lock->state) & SIX_LOCK_HELD_write)); -+ -+ return ret; -+} -+ -+static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type) -+{ -+ struct six_lock_waiter *w, *next; -+ struct task_struct *task; -+ bool saw_one; -+ int ret; -+again: -+ ret = 0; -+ saw_one = false; -+ raw_spin_lock(&lock->wait_lock); -+ -+ list_for_each_entry_safe(w, next, &lock->wait_list, list) { -+ if (w->lock_want != lock_type) -+ continue; -+ -+ if (saw_one && lock_type != SIX_LOCK_read) -+ goto unlock; -+ saw_one = true; -+ -+ ret = __do_six_trylock(lock, lock_type, w->task, false); -+ if (ret <= 0) -+ goto unlock; -+ -+ /* -+ * Similar to percpu_rwsem_wake_function(), we need to guard -+ * against the wakee noticing w->lock_acquired, returning, and -+ * then exiting before we do the wakeup: -+ */ -+ task = get_task_struct(w->task); -+ __list_del(w->list.prev, w->list.next); -+ /* -+ * The release barrier here ensures the ordering of the -+ * __list_del before setting w->lock_acquired; @w is on the -+ * stack of the thread doing the waiting and will be reused -+ * after it sees w->lock_acquired with no other locking: -+ * pairs with smp_load_acquire() in six_lock_slowpath() -+ */ -+ smp_store_release(&w->lock_acquired, true); -+ wake_up_process(task); -+ put_task_struct(task); -+ } -+ -+ six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type); -+unlock: -+ raw_spin_unlock(&lock->wait_lock); -+ -+ if (ret < 0) { -+ lock_type = -ret - 1; -+ goto again; -+ } -+} -+ -+__always_inline -+static void six_lock_wakeup(struct six_lock *lock, u32 state, -+ enum six_lock_type lock_type) -+{ -+ if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read)) -+ return; -+ -+ if (!(state & (SIX_LOCK_WAITING_read << lock_type))) -+ return; -+ -+ __six_lock_wakeup(lock, lock_type); -+} -+ -+__always_inline -+static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try) -+{ -+ int ret; -+ -+ ret = __do_six_trylock(lock, type, current, try); -+ if (ret < 0) -+ __six_lock_wakeup(lock, -ret - 1); -+ -+ return ret > 0; -+} -+ -+/** -+ * six_trylock_ip - attempt to take a six lock without blocking -+ * @lock: lock to take -+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write -+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ -+ * -+ * Return: true on success, false on failure. -+ */ -+bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip) -+{ -+ if (!do_six_trylock(lock, type, true)) -+ return false; -+ -+ if (type != SIX_LOCK_write) -+ six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip); -+ return true; -+} -+EXPORT_SYMBOL_GPL(six_trylock_ip); -+ -+/** -+ * six_relock_ip - attempt to re-take a lock that was held previously -+ * @lock: lock to take -+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write -+ * @seq: lock sequence number obtained from six_lock_seq() while lock was -+ * held previously -+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ -+ * -+ * Return: true on success, false on failure. -+ */ -+bool six_relock_ip(struct six_lock *lock, enum six_lock_type type, -+ unsigned seq, unsigned long ip) -+{ -+ if (six_lock_seq(lock) != seq || !six_trylock_ip(lock, type, ip)) -+ return false; -+ -+ if (six_lock_seq(lock) != seq) { -+ six_unlock_ip(lock, type, ip); -+ return false; -+ } -+ -+ return true; -+} -+EXPORT_SYMBOL_GPL(six_relock_ip); -+ -+#ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER -+ -+static inline bool six_can_spin_on_owner(struct six_lock *lock) -+{ -+ struct task_struct *owner; -+ bool ret; -+ -+ if (need_resched()) -+ return false; -+ -+ rcu_read_lock(); -+ owner = READ_ONCE(lock->owner); -+ ret = !owner || owner_on_cpu(owner); -+ rcu_read_unlock(); -+ -+ return ret; -+} -+ -+static inline bool six_spin_on_owner(struct six_lock *lock, -+ struct task_struct *owner, -+ u64 end_time) -+{ -+ bool ret = true; -+ unsigned loop = 0; -+ -+ rcu_read_lock(); -+ while (lock->owner == owner) { -+ /* -+ * Ensure we emit the owner->on_cpu, dereference _after_ -+ * checking lock->owner still matches owner. If that fails, -+ * owner might point to freed memory. If it still matches, -+ * the rcu_read_lock() ensures the memory stays valid. -+ */ -+ barrier(); -+ -+ if (!owner_on_cpu(owner) || need_resched()) { -+ ret = false; -+ break; -+ } -+ -+ if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) { -+ six_set_bitmask(lock, SIX_LOCK_NOSPIN); -+ ret = false; -+ break; -+ } -+ -+ cpu_relax(); -+ } -+ rcu_read_unlock(); -+ -+ return ret; -+} -+ -+static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type) -+{ -+ struct task_struct *task = current; -+ u64 end_time; -+ -+ if (type == SIX_LOCK_write) -+ return false; -+ -+ preempt_disable(); -+ if (!six_can_spin_on_owner(lock)) -+ goto fail; -+ -+ if (!osq_lock(&lock->osq)) -+ goto fail; -+ -+ end_time = sched_clock() + 10 * NSEC_PER_USEC; -+ -+ while (1) { -+ struct task_struct *owner; -+ -+ /* -+ * If there's an owner, wait for it to either -+ * release the lock or go to sleep. -+ */ -+ owner = READ_ONCE(lock->owner); -+ if (owner && !six_spin_on_owner(lock, owner, end_time)) -+ break; -+ -+ if (do_six_trylock(lock, type, false)) { -+ osq_unlock(&lock->osq); -+ preempt_enable(); -+ return true; -+ } -+ -+ /* -+ * When there's no owner, we might have preempted between the -+ * owner acquiring the lock and setting the owner field. If -+ * we're an RT task that will live-lock because we won't let -+ * the owner complete. -+ */ -+ if (!owner && (need_resched() || rt_task(task))) -+ break; -+ -+ /* -+ * The cpu_relax() call is a compiler barrier which forces -+ * everything in this loop to be re-loaded. We don't need -+ * memory barriers as we'll eventually observe the right -+ * values at the cost of a few extra spins. -+ */ -+ cpu_relax(); -+ } -+ -+ osq_unlock(&lock->osq); -+fail: -+ preempt_enable(); -+ -+ /* -+ * If we fell out of the spin path because of need_resched(), -+ * reschedule now, before we try-lock again. This avoids getting -+ * scheduled out right after we obtained the lock. -+ */ -+ if (need_resched()) -+ schedule(); -+ -+ return false; -+} -+ -+#else /* CONFIG_SIX_LOCK_SPIN_ON_OWNER */ -+ -+static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type) -+{ -+ return false; -+} -+ -+#endif -+ -+noinline -+static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type, -+ struct six_lock_waiter *wait, -+ six_lock_should_sleep_fn should_sleep_fn, void *p, -+ unsigned long ip) -+{ -+ int ret = 0; -+ -+ if (type == SIX_LOCK_write) { -+ EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write); -+ atomic_add(SIX_LOCK_HELD_write, &lock->state); -+ smp_mb__after_atomic(); -+ } -+ -+ trace_contention_begin(lock, 0); -+ lock_contended(&lock->dep_map, ip); -+ -+ if (six_optimistic_spin(lock, type)) -+ goto out; -+ -+ wait->task = current; -+ wait->lock_want = type; -+ wait->lock_acquired = false; -+ -+ raw_spin_lock(&lock->wait_lock); -+ six_set_bitmask(lock, SIX_LOCK_WAITING_read << type); -+ /* -+ * Retry taking the lock after taking waitlist lock, in case we raced -+ * with an unlock: -+ */ -+ ret = __do_six_trylock(lock, type, current, false); -+ if (ret <= 0) { -+ wait->start_time = local_clock(); -+ -+ if (!list_empty(&lock->wait_list)) { -+ struct six_lock_waiter *last = -+ list_last_entry(&lock->wait_list, -+ struct six_lock_waiter, list); -+ -+ if (time_before_eq64(wait->start_time, last->start_time)) -+ wait->start_time = last->start_time + 1; -+ } -+ -+ list_add_tail(&wait->list, &lock->wait_list); -+ } -+ raw_spin_unlock(&lock->wait_lock); -+ -+ if (unlikely(ret > 0)) { -+ ret = 0; -+ goto out; -+ } -+ -+ if (unlikely(ret < 0)) { -+ __six_lock_wakeup(lock, -ret - 1); -+ ret = 0; -+ } -+ -+ while (1) { -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ -+ /* -+ * Ensures that writes to the waitlist entry happen after we see -+ * wait->lock_acquired: pairs with the smp_store_release in -+ * __six_lock_wakeup -+ */ -+ if (smp_load_acquire(&wait->lock_acquired)) -+ break; -+ -+ ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0; -+ if (unlikely(ret)) { -+ bool acquired; -+ -+ /* -+ * If should_sleep_fn() returns an error, we are -+ * required to return that error even if we already -+ * acquired the lock - should_sleep_fn() might have -+ * modified external state (e.g. when the deadlock cycle -+ * detector in bcachefs issued a transaction restart) -+ */ -+ raw_spin_lock(&lock->wait_lock); -+ acquired = wait->lock_acquired; -+ if (!acquired) -+ list_del(&wait->list); -+ raw_spin_unlock(&lock->wait_lock); -+ -+ if (unlikely(acquired)) -+ do_six_unlock_type(lock, type); -+ break; -+ } -+ -+ schedule(); -+ } -+ -+ __set_current_state(TASK_RUNNING); -+out: -+ if (ret && type == SIX_LOCK_write) { -+ six_clear_bitmask(lock, SIX_LOCK_HELD_write); -+ six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read); -+ } -+ trace_contention_end(lock, 0); -+ -+ return ret; -+} -+ -+/** -+ * six_lock_ip_waiter - take a lock, with full waitlist interface -+ * @lock: lock to take -+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write -+ * @wait: pointer to wait object, which will be added to lock's waitlist -+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior -+ * to scheduling -+ * @p: passed through to @should_sleep_fn -+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ -+ * -+ * This is the most general six_lock() variant, with parameters to support full -+ * cycle detection for deadlock avoidance. -+ * -+ * The code calling this function must implement tracking of held locks, and the -+ * @wait object should be embedded into the struct that tracks held locks - -+ * which must also be accessible in a thread-safe way. -+ * -+ * @should_sleep_fn should invoke the cycle detector; it should walk each -+ * lock's waiters, and for each waiter recursively walk their held locks. -+ * -+ * When this function must block, @wait will be added to @lock's waitlist before -+ * calling trylock, and before calling @should_sleep_fn, and @wait will not be -+ * removed from the lock waitlist until the lock has been successfully acquired, -+ * or we abort. -+ * -+ * @wait.start_time will be monotonically increasing for any given waitlist, and -+ * thus may be used as a loop cursor. -+ * -+ * Return: 0 on success, or the return code from @should_sleep_fn on failure. -+ */ -+int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type, -+ struct six_lock_waiter *wait, -+ six_lock_should_sleep_fn should_sleep_fn, void *p, -+ unsigned long ip) -+{ -+ int ret; -+ -+ wait->start_time = 0; -+ -+ if (type != SIX_LOCK_write) -+ six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip); -+ -+ ret = do_six_trylock(lock, type, true) ? 0 -+ : six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip); -+ -+ if (ret && type != SIX_LOCK_write) -+ six_release(&lock->dep_map, ip); -+ if (!ret) -+ lock_acquired(&lock->dep_map, ip); -+ -+ return ret; -+} -+EXPORT_SYMBOL_GPL(six_lock_ip_waiter); -+ -+__always_inline -+static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type) -+{ -+ u32 state; -+ -+ if (type == SIX_LOCK_intent) -+ lock->owner = NULL; -+ -+ if (type == SIX_LOCK_read && -+ lock->readers) { -+ smp_mb(); /* unlock barrier */ -+ this_cpu_dec(*lock->readers); -+ smp_mb(); /* between unlocking and checking for waiters */ -+ state = atomic_read(&lock->state); -+ } else { -+ u32 v = l[type].lock_val; -+ -+ if (type != SIX_LOCK_read) -+ v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN; -+ -+ EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask)); -+ state = atomic_sub_return_release(v, &lock->state); -+ } -+ -+ six_lock_wakeup(lock, state, l[type].unlock_wakeup); -+} -+ -+/** -+ * six_unlock_ip - drop a six lock -+ * @lock: lock to unlock -+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write -+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ -+ * -+ * When a lock is held multiple times (because six_lock_incement()) was used), -+ * this decrements the 'lock held' counter by one. -+ * -+ * For example: -+ * six_lock_read(&foo->lock); read count 1 -+ * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2 -+ * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1 -+ * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0 -+ */ -+void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip) -+{ -+ EBUG_ON(type == SIX_LOCK_write && -+ !(atomic_read(&lock->state) & SIX_LOCK_HELD_intent)); -+ EBUG_ON((type == SIX_LOCK_write || -+ type == SIX_LOCK_intent) && -+ lock->owner != current); -+ -+ if (type != SIX_LOCK_write) -+ six_release(&lock->dep_map, ip); -+ else -+ lock->seq++; -+ -+ if (type == SIX_LOCK_intent && -+ lock->intent_lock_recurse) { -+ --lock->intent_lock_recurse; -+ return; -+ } -+ -+ do_six_unlock_type(lock, type); -+} -+EXPORT_SYMBOL_GPL(six_unlock_ip); -+ -+/** -+ * six_lock_downgrade - convert an intent lock to a read lock -+ * @lock: lock to dowgrade -+ * -+ * @lock will have read count incremented and intent count decremented -+ */ -+void six_lock_downgrade(struct six_lock *lock) -+{ -+ six_lock_increment(lock, SIX_LOCK_read); -+ six_unlock_intent(lock); -+} -+EXPORT_SYMBOL_GPL(six_lock_downgrade); -+ -+/** -+ * six_lock_tryupgrade - attempt to convert read lock to an intent lock -+ * @lock: lock to upgrade -+ * -+ * On success, @lock will have intent count incremented and read count -+ * decremented -+ * -+ * Return: true on success, false on failure -+ */ -+bool six_lock_tryupgrade(struct six_lock *lock) -+{ -+ u32 old = atomic_read(&lock->state), new; -+ -+ do { -+ new = old; -+ -+ if (new & SIX_LOCK_HELD_intent) -+ return false; -+ -+ if (!lock->readers) { -+ EBUG_ON(!(new & SIX_LOCK_HELD_read)); -+ new -= l[SIX_LOCK_read].lock_val; -+ } -+ -+ new |= SIX_LOCK_HELD_intent; -+ } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new)); -+ -+ if (lock->readers) -+ this_cpu_dec(*lock->readers); -+ -+ six_set_owner(lock, SIX_LOCK_intent, old, current); -+ -+ return true; -+} -+EXPORT_SYMBOL_GPL(six_lock_tryupgrade); -+ -+/** -+ * six_trylock_convert - attempt to convert a held lock from one type to another -+ * @lock: lock to upgrade -+ * @from: SIX_LOCK_read or SIX_LOCK_intent -+ * @to: SIX_LOCK_read or SIX_LOCK_intent -+ * -+ * On success, @lock will have intent count incremented and read count -+ * decremented -+ * -+ * Return: true on success, false on failure -+ */ -+bool six_trylock_convert(struct six_lock *lock, -+ enum six_lock_type from, -+ enum six_lock_type to) -+{ -+ EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write); -+ -+ if (to == from) -+ return true; -+ -+ if (to == SIX_LOCK_read) { -+ six_lock_downgrade(lock); -+ return true; -+ } else { -+ return six_lock_tryupgrade(lock); -+ } -+} -+EXPORT_SYMBOL_GPL(six_trylock_convert); -+ -+/** -+ * six_lock_increment - increase held lock count on a lock that is already held -+ * @lock: lock to increment -+ * @type: SIX_LOCK_read or SIX_LOCK_intent -+ * -+ * @lock must already be held, with a lock type that is greater than or equal to -+ * @type -+ * -+ * A corresponding six_unlock_type() call will be required for @lock to be fully -+ * unlocked. -+ */ -+void six_lock_increment(struct six_lock *lock, enum six_lock_type type) -+{ -+ six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_); -+ -+ /* XXX: assert already locked, and that we don't overflow: */ -+ -+ switch (type) { -+ case SIX_LOCK_read: -+ if (lock->readers) { -+ this_cpu_inc(*lock->readers); -+ } else { -+ EBUG_ON(!(atomic_read(&lock->state) & -+ (SIX_LOCK_HELD_read| -+ SIX_LOCK_HELD_intent))); -+ atomic_add(l[type].lock_val, &lock->state); -+ } -+ break; -+ case SIX_LOCK_intent: -+ EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent)); -+ lock->intent_lock_recurse++; -+ break; -+ case SIX_LOCK_write: -+ BUG(); -+ break; -+ } -+} -+EXPORT_SYMBOL_GPL(six_lock_increment); -+ -+/** -+ * six_lock_wakeup_all - wake up all waiters on @lock -+ * @lock: lock to wake up waiters for -+ * -+ * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then -+ * abort the lock operation. -+ * -+ * This function is never needed in a bug-free program; it's only useful in -+ * debug code, e.g. to determine if a cycle detector is at fault. -+ */ -+void six_lock_wakeup_all(struct six_lock *lock) -+{ -+ u32 state = atomic_read(&lock->state); -+ struct six_lock_waiter *w; -+ -+ six_lock_wakeup(lock, state, SIX_LOCK_read); -+ six_lock_wakeup(lock, state, SIX_LOCK_intent); -+ six_lock_wakeup(lock, state, SIX_LOCK_write); -+ -+ raw_spin_lock(&lock->wait_lock); -+ list_for_each_entry(w, &lock->wait_list, list) -+ wake_up_process(w->task); -+ raw_spin_unlock(&lock->wait_lock); -+} -+EXPORT_SYMBOL_GPL(six_lock_wakeup_all); -+ -+/** -+ * six_lock_counts - return held lock counts, for each lock type -+ * @lock: lock to return counters for -+ * -+ * Return: the number of times a lock is held for read, intent and write. -+ */ -+struct six_lock_count six_lock_counts(struct six_lock *lock) -+{ -+ struct six_lock_count ret; -+ -+ ret.n[SIX_LOCK_read] = !lock->readers -+ ? atomic_read(&lock->state) & SIX_LOCK_HELD_read -+ : pcpu_read_count(lock); -+ ret.n[SIX_LOCK_intent] = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) + -+ lock->intent_lock_recurse; -+ ret.n[SIX_LOCK_write] = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write); -+ -+ return ret; -+} -+EXPORT_SYMBOL_GPL(six_lock_counts); -+ -+/** -+ * six_lock_readers_add - directly manipulate reader count of a lock -+ * @lock: lock to add/subtract readers for -+ * @nr: reader count to add/subtract -+ * -+ * When an upper layer is implementing lock reentrency, we may have both read -+ * and intent locks on the same lock. -+ * -+ * When we need to take a write lock, the read locks will cause self-deadlock, -+ * because six locks themselves do not track which read locks are held by the -+ * current thread and which are held by a different thread - it does no -+ * per-thread tracking of held locks. -+ * -+ * The upper layer that is tracking held locks may however, if trylock() has -+ * failed, count up its own read locks, subtract them, take the write lock, and -+ * then re-add them. -+ * -+ * As in any other situation when taking a write lock, @lock must be held for -+ * intent one (or more) times, so @lock will never be left unlocked. -+ */ -+void six_lock_readers_add(struct six_lock *lock, int nr) -+{ -+ if (lock->readers) { -+ this_cpu_add(*lock->readers, nr); -+ } else { -+ EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0); -+ /* reader count starts at bit 0 */ -+ atomic_add(nr, &lock->state); -+ } -+} -+EXPORT_SYMBOL_GPL(six_lock_readers_add); -+ -+/** -+ * six_lock_exit - release resources held by a lock prior to freeing -+ * @lock: lock to exit -+ * -+ * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is -+ * required to free the percpu read counts. -+ */ -+void six_lock_exit(struct six_lock *lock) -+{ -+ WARN_ON(lock->readers && pcpu_read_count(lock)); -+ WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read); -+ -+ free_percpu(lock->readers); -+ lock->readers = NULL; -+} -+EXPORT_SYMBOL_GPL(six_lock_exit); -+ -+void __six_lock_init(struct six_lock *lock, const char *name, -+ struct lock_class_key *key, enum six_lock_init_flags flags) -+{ -+ atomic_set(&lock->state, 0); -+ raw_spin_lock_init(&lock->wait_lock); -+ INIT_LIST_HEAD(&lock->wait_list); -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ debug_check_no_locks_freed((void *) lock, sizeof(*lock)); -+ lockdep_init_map(&lock->dep_map, name, key, 0); -+#endif -+ -+ /* -+ * Don't assume that we have real percpu variables available in -+ * userspace: -+ */ -+#ifdef __KERNEL__ -+ if (flags & SIX_LOCK_INIT_PCPU) { -+ /* -+ * We don't return an error here on memory allocation failure -+ * since percpu is an optimization, and locks will work with the -+ * same semantics in non-percpu mode: callers can check for -+ * failure if they wish by checking lock->readers, but generally -+ * will not want to treat it as an error. -+ */ -+ lock->readers = alloc_percpu(unsigned); -+ } -+#endif -+} -+EXPORT_SYMBOL_GPL(__six_lock_init); -diff --git a/fs/bcachefs/six.h b/fs/bcachefs/six.h -new file mode 100644 -index 000000000000..4c268b0b8316 ---- /dev/null -+++ b/fs/bcachefs/six.h -@@ -0,0 +1,393 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+ -+#ifndef _LINUX_SIX_H -+#define _LINUX_SIX_H -+ -+/** -+ * DOC: SIX locks overview -+ * -+ * Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores -+ * but with an additional state: read/shared, intent, exclusive/write -+ * -+ * The purpose of the intent state is to allow for greater concurrency on tree -+ * structures without deadlocking. In general, a read can't be upgraded to a -+ * write lock without deadlocking, so an operation that updates multiple nodes -+ * will have to take write locks for the full duration of the operation. -+ * -+ * But by adding an intent state, which is exclusive with other intent locks but -+ * not with readers, we can take intent locks at thte start of the operation, -+ * and then take write locks only for the actual update to each individual -+ * nodes, without deadlocking. -+ * -+ * Example usage: -+ * six_lock_read(&foo->lock); -+ * six_unlock_read(&foo->lock); -+ * -+ * An intent lock must be held before taking a write lock: -+ * six_lock_intent(&foo->lock); -+ * six_lock_write(&foo->lock); -+ * six_unlock_write(&foo->lock); -+ * six_unlock_intent(&foo->lock); -+ * -+ * Other operations: -+ * six_trylock_read() -+ * six_trylock_intent() -+ * six_trylock_write() -+ * -+ * six_lock_downgrade() convert from intent to read -+ * six_lock_tryupgrade() attempt to convert from read to intent, may fail -+ * -+ * There are also interfaces that take the lock type as an enum: -+ * -+ * six_lock_type(&foo->lock, SIX_LOCK_read); -+ * six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent) -+ * six_lock_type(&foo->lock, SIX_LOCK_write); -+ * six_unlock_type(&foo->lock, SIX_LOCK_write); -+ * six_unlock_type(&foo->lock, SIX_LOCK_intent); -+ * -+ * Lock sequence numbers - unlock(), relock(): -+ * -+ * Locks embed sequences numbers, which are incremented on write lock/unlock. -+ * This allows locks to be dropped and the retaken iff the state they protect -+ * hasn't changed; this makes it much easier to avoid holding locks while e.g. -+ * doing IO or allocating memory. -+ * -+ * Example usage: -+ * six_lock_read(&foo->lock); -+ * u32 seq = six_lock_seq(&foo->lock); -+ * six_unlock_read(&foo->lock); -+ * -+ * some_operation_that_may_block(); -+ * -+ * if (six_relock_read(&foo->lock, seq)) { ... } -+ * -+ * If the relock operation succeeds, it is as if the lock was never unlocked. -+ * -+ * Reentrancy: -+ * -+ * Six locks are not by themselves reentrent, but have counters for both the -+ * read and intent states that can be used to provide reentrency by an upper -+ * layer that tracks held locks. If a lock is known to already be held in the -+ * read or intent state, six_lock_increment() can be used to bump the "lock -+ * held in this state" counter, increasing the number of unlock calls that -+ * will be required to fully unlock it. -+ * -+ * Example usage: -+ * six_lock_read(&foo->lock); -+ * six_lock_increment(&foo->lock, SIX_LOCK_read); -+ * six_unlock_read(&foo->lock); -+ * six_unlock_read(&foo->lock); -+ * foo->lock is now fully unlocked. -+ * -+ * Since the intent state supercedes read, it's legal to increment the read -+ * counter when holding an intent lock, but not the reverse. -+ * -+ * A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write) -+ * is not legal. -+ * -+ * should_sleep_fn: -+ * -+ * There is a six_lock() variant that takes a function pointer that is called -+ * immediately prior to schedule() when blocking, and may return an error to -+ * abort. -+ * -+ * One possible use for this feature is when objects being locked are part of -+ * a cache and may reused, and lock ordering is based on a property of the -+ * object that will change when the object is reused - i.e. logical key order. -+ * -+ * If looking up an object in the cache may race with object reuse, and lock -+ * ordering is required to prevent deadlock, object reuse may change the -+ * correct lock order for that object and cause a deadlock. should_sleep_fn -+ * can be used to check if the object is still the object we want and avoid -+ * this deadlock. -+ * -+ * Wait list entry interface: -+ * -+ * There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a -+ * wait list entry. By embedding six_lock_waiter into another object, and by -+ * traversing lock waitlists, it is then possible for an upper layer to -+ * implement full cycle detection for deadlock avoidance. -+ * -+ * should_sleep_fn should be used for invoking the cycle detector, walking the -+ * graph of held locks to check for a deadlock. The upper layer must track -+ * held locks for each thread, and each thread's held locks must be reachable -+ * from its six_lock_waiter object. -+ * -+ * six_lock_waiter() will add the wait object to the waitlist re-trying taking -+ * the lock, and before calling should_sleep_fn, and the wait object will not -+ * be removed from the waitlist until either the lock has been successfully -+ * acquired, or we aborted because should_sleep_fn returned an error. -+ * -+ * Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will -+ * have timestamps in strictly ascending order - this is so the timestamp can -+ * be used as a cursor for lock graph traverse. -+ */ -+ -+#include -+#include -+#include -+ -+#ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER -+#include -+#endif -+ -+enum six_lock_type { -+ SIX_LOCK_read, -+ SIX_LOCK_intent, -+ SIX_LOCK_write, -+}; -+ -+struct six_lock { -+ atomic_t state; -+ u32 seq; -+ unsigned intent_lock_recurse; -+ struct task_struct *owner; -+ unsigned __percpu *readers; -+#ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER -+ struct optimistic_spin_queue osq; -+#endif -+ raw_spinlock_t wait_lock; -+ struct list_head wait_list; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+}; -+ -+struct six_lock_waiter { -+ struct list_head list; -+ struct task_struct *task; -+ enum six_lock_type lock_want; -+ bool lock_acquired; -+ u64 start_time; -+}; -+ -+typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *); -+ -+void six_lock_exit(struct six_lock *lock); -+ -+enum six_lock_init_flags { -+ SIX_LOCK_INIT_PCPU = 1U << 0, -+}; -+ -+void __six_lock_init(struct six_lock *lock, const char *name, -+ struct lock_class_key *key, enum six_lock_init_flags flags); -+ -+/** -+ * six_lock_init - initialize a six lock -+ * @lock: lock to initialize -+ * @flags: optional flags, i.e. SIX_LOCK_INIT_PCPU -+ */ -+#define six_lock_init(lock, flags) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ __six_lock_init((lock), #lock, &__key, flags); \ -+} while (0) -+ -+/** -+ * six_lock_seq - obtain current lock sequence number -+ * @lock: six_lock to obtain sequence number for -+ * -+ * @lock should be held for read or intent, and not write -+ * -+ * By saving the lock sequence number, we can unlock @lock and then (typically -+ * after some blocking operation) attempt to relock it: the relock will succeed -+ * if the sequence number hasn't changed, meaning no write locks have been taken -+ * and state corresponding to what @lock protects is still valid. -+ */ -+static inline u32 six_lock_seq(const struct six_lock *lock) -+{ -+ return lock->seq; -+} -+ -+bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip); -+ -+/** -+ * six_trylock_type - attempt to take a six lock without blocking -+ * @lock: lock to take -+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write -+ * -+ * Return: true on success, false on failure. -+ */ -+static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type) -+{ -+ return six_trylock_ip(lock, type, _THIS_IP_); -+} -+ -+int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type, -+ struct six_lock_waiter *wait, -+ six_lock_should_sleep_fn should_sleep_fn, void *p, -+ unsigned long ip); -+ -+/** -+ * six_lock_waiter - take a lock, with full waitlist interface -+ * @lock: lock to take -+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write -+ * @wait: pointer to wait object, which will be added to lock's waitlist -+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior -+ * to scheduling -+ * @p: passed through to @should_sleep_fn -+ * -+ * This is a convenience wrapper around six_lock_ip_waiter(), see that function -+ * for full documentation. -+ * -+ * Return: 0 on success, or the return code from @should_sleep_fn on failure. -+ */ -+static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type, -+ struct six_lock_waiter *wait, -+ six_lock_should_sleep_fn should_sleep_fn, void *p) -+{ -+ return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_); -+} -+ -+/** -+ * six_lock_ip - take a six lock lock -+ * @lock: lock to take -+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write -+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior -+ * to scheduling -+ * @p: passed through to @should_sleep_fn -+ * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ -+ * -+ * Return: 0 on success, or the return code from @should_sleep_fn on failure. -+ */ -+static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type, -+ six_lock_should_sleep_fn should_sleep_fn, void *p, -+ unsigned long ip) -+{ -+ struct six_lock_waiter wait; -+ -+ return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip); -+} -+ -+/** -+ * six_lock_type - take a six lock lock -+ * @lock: lock to take -+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write -+ * @should_sleep_fn: callback run after adding to waitlist, immediately prior -+ * to scheduling -+ * @p: passed through to @should_sleep_fn -+ * -+ * Return: 0 on success, or the return code from @should_sleep_fn on failure. -+ */ -+static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type, -+ six_lock_should_sleep_fn should_sleep_fn, void *p) -+{ -+ struct six_lock_waiter wait; -+ -+ return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_); -+} -+ -+bool six_relock_ip(struct six_lock *lock, enum six_lock_type type, -+ unsigned seq, unsigned long ip); -+ -+/** -+ * six_relock_type - attempt to re-take a lock that was held previously -+ * @lock: lock to take -+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write -+ * @seq: lock sequence number obtained from six_lock_seq() while lock was -+ * held previously -+ * -+ * Return: true on success, false on failure. -+ */ -+static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type, -+ unsigned seq) -+{ -+ return six_relock_ip(lock, type, seq, _THIS_IP_); -+} -+ -+void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip); -+ -+/** -+ * six_unlock_type - drop a six lock -+ * @lock: lock to unlock -+ * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write -+ * -+ * When a lock is held multiple times (because six_lock_incement()) was used), -+ * this decrements the 'lock held' counter by one. -+ * -+ * For example: -+ * six_lock_read(&foo->lock); read count 1 -+ * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2 -+ * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1 -+ * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0 -+ */ -+static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type) -+{ -+ six_unlock_ip(lock, type, _THIS_IP_); -+} -+ -+#define __SIX_LOCK(type) \ -+static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\ -+{ \ -+ return six_trylock_ip(lock, SIX_LOCK_##type, ip); \ -+} \ -+ \ -+static inline bool six_trylock_##type(struct six_lock *lock) \ -+{ \ -+ return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \ -+} \ -+ \ -+static inline int six_lock_ip_waiter_##type(struct six_lock *lock, \ -+ struct six_lock_waiter *wait, \ -+ six_lock_should_sleep_fn should_sleep_fn, void *p,\ -+ unsigned long ip) \ -+{ \ -+ return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\ -+} \ -+ \ -+static inline int six_lock_ip_##type(struct six_lock *lock, \ -+ six_lock_should_sleep_fn should_sleep_fn, void *p, \ -+ unsigned long ip) \ -+{ \ -+ return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\ -+} \ -+ \ -+static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\ -+{ \ -+ return six_relock_ip(lock, SIX_LOCK_##type, seq, ip); \ -+} \ -+ \ -+static inline bool six_relock_##type(struct six_lock *lock, u32 seq) \ -+{ \ -+ return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_); \ -+} \ -+ \ -+static inline int six_lock_##type(struct six_lock *lock, \ -+ six_lock_should_sleep_fn fn, void *p)\ -+{ \ -+ return six_lock_ip_##type(lock, fn, p, _THIS_IP_); \ -+} \ -+ \ -+static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip) \ -+{ \ -+ six_unlock_ip(lock, SIX_LOCK_##type, ip); \ -+} \ -+ \ -+static inline void six_unlock_##type(struct six_lock *lock) \ -+{ \ -+ six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \ -+} -+ -+__SIX_LOCK(read) -+__SIX_LOCK(intent) -+__SIX_LOCK(write) -+#undef __SIX_LOCK -+ -+void six_lock_downgrade(struct six_lock *); -+bool six_lock_tryupgrade(struct six_lock *); -+bool six_trylock_convert(struct six_lock *, enum six_lock_type, -+ enum six_lock_type); -+ -+void six_lock_increment(struct six_lock *, enum six_lock_type); -+ -+void six_lock_wakeup_all(struct six_lock *); -+ -+struct six_lock_count { -+ unsigned n[3]; -+}; -+ -+struct six_lock_count six_lock_counts(struct six_lock *); -+void six_lock_readers_add(struct six_lock *, int); -+ -+#endif /* _LINUX_SIX_H */ -diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c -new file mode 100644 -index 000000000000..e9af77b384c7 ---- /dev/null -+++ b/fs/bcachefs/snapshot.c -@@ -0,0 +1,1713 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "bkey_buf.h" -+#include "btree_key_cache.h" -+#include "btree_update.h" -+#include "buckets.h" -+#include "errcode.h" -+#include "error.h" -+#include "fs.h" -+#include "snapshot.h" -+ -+#include -+ -+/* -+ * Snapshot trees: -+ * -+ * Keys in BTREE_ID_snapshot_trees identify a whole tree of snapshot nodes; they -+ * exist to provide a stable identifier for the whole lifetime of a snapshot -+ * tree. -+ */ -+ -+void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k); -+ -+ prt_printf(out, "subvol %u root snapshot %u", -+ le32_to_cpu(t.v->master_subvol), -+ le32_to_cpu(t.v->root_snapshot)); -+} -+ -+int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || -+ bkey_lt(k.k->p, POS(0, 1)), c, err, -+ snapshot_tree_pos_bad, -+ "bad pos"); -+fsck_err: -+ return ret; -+} -+ -+int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id, -+ struct bch_snapshot_tree *s) -+{ -+ int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id), -+ BTREE_ITER_WITH_UPDATES, snapshot_tree, s); -+ -+ if (bch2_err_matches(ret, ENOENT)) -+ ret = -BCH_ERR_ENOENT_snapshot_tree; -+ return ret; -+} -+ -+struct bkey_i_snapshot_tree * -+__bch2_snapshot_tree_create(struct btree_trans *trans) -+{ -+ struct btree_iter iter; -+ int ret = bch2_bkey_get_empty_slot(trans, &iter, -+ BTREE_ID_snapshot_trees, POS(0, U32_MAX)); -+ struct bkey_i_snapshot_tree *s_t; -+ -+ if (ret == -BCH_ERR_ENOSPC_btree_slot) -+ ret = -BCH_ERR_ENOSPC_snapshot_tree; -+ if (ret) -+ return ERR_PTR(ret); -+ -+ s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree); -+ ret = PTR_ERR_OR_ZERO(s_t); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret ? ERR_PTR(ret) : s_t; -+} -+ -+static int bch2_snapshot_tree_create(struct btree_trans *trans, -+ u32 root_id, u32 subvol_id, u32 *tree_id) -+{ -+ struct bkey_i_snapshot_tree *n_tree = -+ __bch2_snapshot_tree_create(trans); -+ -+ if (IS_ERR(n_tree)) -+ return PTR_ERR(n_tree); -+ -+ n_tree->v.master_subvol = cpu_to_le32(subvol_id); -+ n_tree->v.root_snapshot = cpu_to_le32(root_id); -+ *tree_id = n_tree->k.p.offset; -+ return 0; -+} -+ -+/* Snapshot nodes: */ -+ -+static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) -+{ -+ struct snapshot_table *t; -+ -+ rcu_read_lock(); -+ t = rcu_dereference(c->snapshots); -+ -+ while (id && id < ancestor) -+ id = __snapshot_t(t, id)->parent; -+ rcu_read_unlock(); -+ -+ return id == ancestor; -+} -+ -+static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor) -+{ -+ const struct snapshot_t *s = __snapshot_t(t, id); -+ -+ if (s->skip[2] <= ancestor) -+ return s->skip[2]; -+ if (s->skip[1] <= ancestor) -+ return s->skip[1]; -+ if (s->skip[0] <= ancestor) -+ return s->skip[0]; -+ return s->parent; -+} -+ -+bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) -+{ -+ struct snapshot_table *t; -+ bool ret; -+ -+ EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots); -+ -+ rcu_read_lock(); -+ t = rcu_dereference(c->snapshots); -+ -+ while (id && id < ancestor - IS_ANCESTOR_BITMAP) -+ id = get_ancestor_below(t, id, ancestor); -+ -+ if (id && id < ancestor) { -+ ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor); -+ -+ EBUG_ON(ret != bch2_snapshot_is_ancestor_early(c, id, ancestor)); -+ } else { -+ ret = id == ancestor; -+ } -+ -+ rcu_read_unlock(); -+ -+ return ret; -+} -+ -+static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id) -+{ -+ size_t idx = U32_MAX - id; -+ size_t new_size; -+ struct snapshot_table *new, *old; -+ -+ new_size = max(16UL, roundup_pow_of_two(idx + 1)); -+ -+ new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL); -+ if (!new) -+ return NULL; -+ -+ old = rcu_dereference_protected(c->snapshots, true); -+ if (old) -+ memcpy(new->s, -+ rcu_dereference_protected(c->snapshots, true)->s, -+ sizeof(new->s[0]) * c->snapshot_table_size); -+ -+ rcu_assign_pointer(c->snapshots, new); -+ c->snapshot_table_size = new_size; -+ kvfree_rcu_mightsleep(old); -+ -+ return &rcu_dereference_protected(c->snapshots, true)->s[idx]; -+} -+ -+static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id) -+{ -+ size_t idx = U32_MAX - id; -+ -+ lockdep_assert_held(&c->snapshot_table_lock); -+ -+ if (likely(idx < c->snapshot_table_size)) -+ return &rcu_dereference_protected(c->snapshots, true)->s[idx]; -+ -+ return __snapshot_t_mut(c, id); -+} -+ -+void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); -+ -+ prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u", -+ BCH_SNAPSHOT_SUBVOL(s.v), -+ BCH_SNAPSHOT_DELETED(s.v), -+ le32_to_cpu(s.v->parent), -+ le32_to_cpu(s.v->children[0]), -+ le32_to_cpu(s.v->children[1]), -+ le32_to_cpu(s.v->subvol), -+ le32_to_cpu(s.v->tree)); -+ -+ if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth)) -+ prt_printf(out, " depth %u skiplist %u %u %u", -+ le32_to_cpu(s.v->depth), -+ le32_to_cpu(s.v->skip[0]), -+ le32_to_cpu(s.v->skip[1]), -+ le32_to_cpu(s.v->skip[2])); -+} -+ -+int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ struct bkey_s_c_snapshot s; -+ u32 i, id; -+ int ret = 0; -+ -+ bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || -+ bkey_lt(k.k->p, POS(0, 1)), c, err, -+ snapshot_pos_bad, -+ "bad pos"); -+ -+ s = bkey_s_c_to_snapshot(k); -+ -+ id = le32_to_cpu(s.v->parent); -+ bkey_fsck_err_on(id && id <= k.k->p.offset, c, err, -+ snapshot_parent_bad, -+ "bad parent node (%u <= %llu)", -+ id, k.k->p.offset); -+ -+ bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err, -+ snapshot_children_not_normalized, -+ "children not normalized"); -+ -+ bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err, -+ snapshot_child_duplicate, -+ "duplicate child nodes"); -+ -+ for (i = 0; i < 2; i++) { -+ id = le32_to_cpu(s.v->children[i]); -+ -+ bkey_fsck_err_on(id >= k.k->p.offset, c, err, -+ snapshot_child_bad, -+ "bad child node (%u >= %llu)", -+ id, k.k->p.offset); -+ } -+ -+ if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { -+ bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || -+ le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err, -+ snapshot_skiplist_not_normalized, -+ "skiplist not normalized"); -+ -+ for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { -+ id = le32_to_cpu(s.v->skip[i]); -+ -+ bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err, -+ snapshot_skiplist_bad, -+ "bad skiplist node %u", id); -+ } -+ } -+fsck_err: -+ return ret; -+} -+ -+static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id) -+{ -+ struct snapshot_t *t = snapshot_t_mut(c, id); -+ u32 parent = id; -+ -+ while ((parent = bch2_snapshot_parent_early(c, parent)) && -+ parent - id - 1 < IS_ANCESTOR_BITMAP) -+ __set_bit(parent - id - 1, t->is_ancestor); -+} -+ -+static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id) -+{ -+ mutex_lock(&c->snapshot_table_lock); -+ __set_is_ancestor_bitmap(c, id); -+ mutex_unlock(&c->snapshot_table_lock); -+} -+ -+int bch2_mark_snapshot(struct btree_trans *trans, -+ enum btree_id btree, unsigned level, -+ struct bkey_s_c old, struct bkey_s_c new, -+ unsigned flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct snapshot_t *t; -+ u32 id = new.k->p.offset; -+ int ret = 0; -+ -+ mutex_lock(&c->snapshot_table_lock); -+ -+ t = snapshot_t_mut(c, id); -+ if (!t) { -+ ret = -BCH_ERR_ENOMEM_mark_snapshot; -+ goto err; -+ } -+ -+ if (new.k->type == KEY_TYPE_snapshot) { -+ struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); -+ -+ t->parent = le32_to_cpu(s.v->parent); -+ t->children[0] = le32_to_cpu(s.v->children[0]); -+ t->children[1] = le32_to_cpu(s.v->children[1]); -+ t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0; -+ t->tree = le32_to_cpu(s.v->tree); -+ -+ if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) { -+ t->depth = le32_to_cpu(s.v->depth); -+ t->skip[0] = le32_to_cpu(s.v->skip[0]); -+ t->skip[1] = le32_to_cpu(s.v->skip[1]); -+ t->skip[2] = le32_to_cpu(s.v->skip[2]); -+ } else { -+ t->depth = 0; -+ t->skip[0] = 0; -+ t->skip[1] = 0; -+ t->skip[2] = 0; -+ } -+ -+ __set_is_ancestor_bitmap(c, id); -+ -+ if (BCH_SNAPSHOT_DELETED(s.v)) { -+ set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); -+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots) -+ bch2_delete_dead_snapshots_async(c); -+ } -+ } else { -+ memset(t, 0, sizeof(*t)); -+ } -+err: -+ mutex_unlock(&c->snapshot_table_lock); -+ return ret; -+} -+ -+int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, -+ struct bch_snapshot *s) -+{ -+ return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id), -+ BTREE_ITER_WITH_UPDATES, snapshot, s); -+} -+ -+static int bch2_snapshot_live(struct btree_trans *trans, u32 id) -+{ -+ struct bch_snapshot v; -+ int ret; -+ -+ if (!id) -+ return 0; -+ -+ ret = bch2_snapshot_lookup(trans, id, &v); -+ if (bch2_err_matches(ret, ENOENT)) -+ bch_err(trans->c, "snapshot node %u not found", id); -+ if (ret) -+ return ret; -+ -+ return !BCH_SNAPSHOT_DELETED(&v); -+} -+ -+/* -+ * If @k is a snapshot with just one live child, it's part of a linear chain, -+ * which we consider to be an equivalence class: and then after snapshot -+ * deletion cleanup, there should only be a single key at a given position in -+ * this equivalence class. -+ * -+ * This sets the equivalence class of @k to be the child's equivalence class, if -+ * it's part of such a linear chain: this correctly sets equivalence classes on -+ * startup if we run leaf to root (i.e. in natural key order). -+ */ -+static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ unsigned i, nr_live = 0, live_idx = 0; -+ struct bkey_s_c_snapshot snap; -+ u32 id = k.k->p.offset, child[2]; -+ -+ if (k.k->type != KEY_TYPE_snapshot) -+ return 0; -+ -+ snap = bkey_s_c_to_snapshot(k); -+ -+ child[0] = le32_to_cpu(snap.v->children[0]); -+ child[1] = le32_to_cpu(snap.v->children[1]); -+ -+ for (i = 0; i < 2; i++) { -+ int ret = bch2_snapshot_live(trans, child[i]); -+ -+ if (ret < 0) -+ return ret; -+ -+ if (ret) -+ live_idx = i; -+ nr_live += ret; -+ } -+ -+ mutex_lock(&c->snapshot_table_lock); -+ -+ snapshot_t_mut(c, id)->equiv = nr_live == 1 -+ ? snapshot_t_mut(c, child[live_idx])->equiv -+ : id; -+ -+ mutex_unlock(&c->snapshot_table_lock); -+ -+ return 0; -+} -+ -+/* fsck: */ -+ -+static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child) -+{ -+ return snapshot_t(c, id)->children[child]; -+} -+ -+static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id) -+{ -+ return bch2_snapshot_child(c, id, 0); -+} -+ -+static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id) -+{ -+ return bch2_snapshot_child(c, id, 1); -+} -+ -+static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id) -+{ -+ u32 n, parent; -+ -+ n = bch2_snapshot_left_child(c, id); -+ if (n) -+ return n; -+ -+ while ((parent = bch2_snapshot_parent(c, id))) { -+ n = bch2_snapshot_right_child(c, parent); -+ if (n && n != id) -+ return n; -+ id = parent; -+ } -+ -+ return 0; -+} -+ -+static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root) -+{ -+ u32 id = snapshot_root; -+ u32 subvol = 0, s; -+ -+ while (id) { -+ s = snapshot_t(c, id)->subvol; -+ -+ if (s && (!subvol || s < subvol)) -+ subvol = s; -+ -+ id = bch2_snapshot_tree_next(c, id); -+ } -+ -+ return subvol; -+} -+ -+static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, -+ u32 snapshot_root, u32 *subvol_id) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bkey_s_c_subvolume s; -+ bool found = false; -+ int ret; -+ -+ for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, -+ 0, k, ret) { -+ if (k.k->type != KEY_TYPE_subvolume) -+ continue; -+ -+ s = bkey_s_c_to_subvolume(k); -+ if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root)) -+ continue; -+ if (!BCH_SUBVOLUME_SNAP(s.v)) { -+ *subvol_id = s.k->p.offset; -+ found = true; -+ break; -+ } -+ } -+ -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (!ret && !found) { -+ struct bkey_i_subvolume *u; -+ -+ *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root); -+ -+ u = bch2_bkey_get_mut_typed(trans, &iter, -+ BTREE_ID_subvolumes, POS(0, *subvol_id), -+ 0, subvolume); -+ ret = PTR_ERR_OR_ZERO(u); -+ if (ret) -+ return ret; -+ -+ SET_BCH_SUBVOLUME_SNAP(&u->v, false); -+ } -+ -+ return ret; -+} -+ -+static int check_snapshot_tree(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_s_c_snapshot_tree st; -+ struct bch_snapshot s; -+ struct bch_subvolume subvol; -+ struct printbuf buf = PRINTBUF; -+ u32 root_id; -+ int ret; -+ -+ if (k.k->type != KEY_TYPE_snapshot_tree) -+ return 0; -+ -+ st = bkey_s_c_to_snapshot_tree(k); -+ root_id = le32_to_cpu(st.v->root_snapshot); -+ -+ ret = bch2_snapshot_lookup(trans, root_id, &s); -+ if (ret && !bch2_err_matches(ret, ENOENT)) -+ goto err; -+ -+ if (fsck_err_on(ret || -+ root_id != bch2_snapshot_root(c, root_id) || -+ st.k->p.offset != le32_to_cpu(s.tree), -+ c, snapshot_tree_to_missing_snapshot, -+ "snapshot tree points to missing/incorrect snapshot:\n %s", -+ (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { -+ ret = bch2_btree_delete_at(trans, iter, 0); -+ goto err; -+ } -+ -+ ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), -+ false, 0, &subvol); -+ if (ret && !bch2_err_matches(ret, ENOENT)) -+ goto err; -+ -+ if (fsck_err_on(ret, -+ c, snapshot_tree_to_missing_subvol, -+ "snapshot tree points to missing subvolume:\n %s", -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || -+ fsck_err_on(!bch2_snapshot_is_ancestor_early(c, -+ le32_to_cpu(subvol.snapshot), -+ root_id), -+ c, snapshot_tree_to_wrong_subvol, -+ "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s", -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || -+ fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), -+ c, snapshot_tree_to_snapshot_subvol, -+ "snapshot tree points to snapshot subvolume:\n %s", -+ (printbuf_reset(&buf), -+ bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { -+ struct bkey_i_snapshot_tree *u; -+ u32 subvol_id; -+ -+ ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id); -+ if (ret) -+ goto err; -+ -+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree); -+ ret = PTR_ERR_OR_ZERO(u); -+ if (ret) -+ goto err; -+ -+ u->v.master_subvol = cpu_to_le32(subvol_id); -+ st = snapshot_tree_i_to_s_c(u); -+ } -+err: -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+/* -+ * For each snapshot_tree, make sure it points to the root of a snapshot tree -+ * and that snapshot entry points back to it, or delete it. -+ * -+ * And, make sure it points to a subvolume within that snapshot tree, or correct -+ * it to point to the oldest subvolume within that snapshot tree. -+ */ -+int bch2_check_snapshot_trees(struct bch_fs *c) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ ret = bch2_trans_run(c, -+ for_each_btree_key_commit(trans, iter, -+ BTREE_ID_snapshot_trees, POS_MIN, -+ BTREE_ITER_PREFETCH, k, -+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, -+ check_snapshot_tree(trans, &iter, k))); -+ -+ if (ret) -+ bch_err(c, "error %i checking snapshot trees", ret); -+ return ret; -+} -+ -+/* -+ * Look up snapshot tree for @tree_id and find root, -+ * make sure @snap_id is a descendent: -+ */ -+static int snapshot_tree_ptr_good(struct btree_trans *trans, -+ u32 snap_id, u32 tree_id) -+{ -+ struct bch_snapshot_tree s_t; -+ int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); -+ -+ if (bch2_err_matches(ret, ENOENT)) -+ return 0; -+ if (ret) -+ return ret; -+ -+ return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot)); -+} -+ -+u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id) -+{ -+ const struct snapshot_t *s; -+ -+ if (!id) -+ return 0; -+ -+ rcu_read_lock(); -+ s = snapshot_t(c, id); -+ if (s->parent) -+ id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)); -+ rcu_read_unlock(); -+ -+ return id; -+} -+ -+static int snapshot_skiplist_good(struct btree_trans *trans, u32 id, struct bch_snapshot s) -+{ -+ unsigned i; -+ -+ for (i = 0; i < 3; i++) -+ if (!s.parent) { -+ if (s.skip[i]) -+ return false; -+ } else { -+ if (!bch2_snapshot_is_ancestor_early(trans->c, id, le32_to_cpu(s.skip[i]))) -+ return false; -+ } -+ -+ return true; -+} -+ -+/* -+ * snapshot_tree pointer was incorrect: look up root snapshot node, make sure -+ * its snapshot_tree pointer is correct (allocate new one if necessary), then -+ * update this node's pointer to root node's pointer: -+ */ -+static int snapshot_tree_ptr_repair(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k, -+ struct bch_snapshot *s) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter root_iter; -+ struct bch_snapshot_tree s_t; -+ struct bkey_s_c_snapshot root; -+ struct bkey_i_snapshot *u; -+ u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id; -+ int ret; -+ -+ root = bch2_bkey_get_iter_typed(trans, &root_iter, -+ BTREE_ID_snapshots, POS(0, root_id), -+ BTREE_ITER_WITH_UPDATES, snapshot); -+ ret = bkey_err(root); -+ if (ret) -+ goto err; -+ -+ tree_id = le32_to_cpu(root.v->tree); -+ -+ ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); -+ if (ret && !bch2_err_matches(ret, ENOENT)) -+ return ret; -+ -+ if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) { -+ u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot); -+ ret = PTR_ERR_OR_ZERO(u) ?: -+ bch2_snapshot_tree_create(trans, root_id, -+ bch2_snapshot_tree_oldest_subvol(c, root_id), -+ &tree_id); -+ if (ret) -+ goto err; -+ -+ u->v.tree = cpu_to_le32(tree_id); -+ if (k.k->p.offset == root_id) -+ *s = u->v; -+ } -+ -+ if (k.k->p.offset != root_id) { -+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); -+ ret = PTR_ERR_OR_ZERO(u); -+ if (ret) -+ goto err; -+ -+ u->v.tree = cpu_to_le32(tree_id); -+ *s = u->v; -+ } -+err: -+ bch2_trans_iter_exit(trans, &root_iter); -+ return ret; -+} -+ -+static int check_snapshot(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ struct bch_snapshot s; -+ struct bch_subvolume subvol; -+ struct bch_snapshot v; -+ struct bkey_i_snapshot *u; -+ u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); -+ u32 real_depth; -+ struct printbuf buf = PRINTBUF; -+ bool should_have_subvol; -+ u32 i, id; -+ int ret = 0; -+ -+ if (k.k->type != KEY_TYPE_snapshot) -+ return 0; -+ -+ memset(&s, 0, sizeof(s)); -+ memcpy(&s, k.v, bkey_val_bytes(k.k)); -+ -+ id = le32_to_cpu(s.parent); -+ if (id) { -+ ret = bch2_snapshot_lookup(trans, id, &v); -+ if (bch2_err_matches(ret, ENOENT)) -+ bch_err(c, "snapshot with nonexistent parent:\n %s", -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); -+ if (ret) -+ goto err; -+ -+ if (le32_to_cpu(v.children[0]) != k.k->p.offset && -+ le32_to_cpu(v.children[1]) != k.k->p.offset) { -+ bch_err(c, "snapshot parent %u missing pointer to child %llu", -+ id, k.k->p.offset); -+ ret = -EINVAL; -+ goto err; -+ } -+ } -+ -+ for (i = 0; i < 2 && s.children[i]; i++) { -+ id = le32_to_cpu(s.children[i]); -+ -+ ret = bch2_snapshot_lookup(trans, id, &v); -+ if (bch2_err_matches(ret, ENOENT)) -+ bch_err(c, "snapshot node %llu has nonexistent child %u", -+ k.k->p.offset, id); -+ if (ret) -+ goto err; -+ -+ if (le32_to_cpu(v.parent) != k.k->p.offset) { -+ bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)", -+ id, le32_to_cpu(v.parent), k.k->p.offset); -+ ret = -EINVAL; -+ goto err; -+ } -+ } -+ -+ should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && -+ !BCH_SNAPSHOT_DELETED(&s); -+ -+ if (should_have_subvol) { -+ id = le32_to_cpu(s.subvol); -+ ret = bch2_subvolume_get(trans, id, 0, false, &subvol); -+ if (bch2_err_matches(ret, ENOENT)) -+ bch_err(c, "snapshot points to nonexistent subvolume:\n %s", -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); -+ if (ret) -+ goto err; -+ -+ if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) { -+ bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL", -+ k.k->p.offset); -+ ret = -EINVAL; -+ goto err; -+ } -+ } else { -+ if (fsck_err_on(s.subvol, -+ c, snapshot_should_not_have_subvol, -+ "snapshot should not point to subvol:\n %s", -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); -+ ret = PTR_ERR_OR_ZERO(u); -+ if (ret) -+ goto err; -+ -+ u->v.subvol = 0; -+ s = u->v; -+ } -+ } -+ -+ ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree)); -+ if (ret < 0) -+ goto err; -+ -+ if (fsck_err_on(!ret, c, snapshot_to_bad_snapshot_tree, -+ "snapshot points to missing/incorrect tree:\n %s", -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { -+ ret = snapshot_tree_ptr_repair(trans, iter, k, &s); -+ if (ret) -+ goto err; -+ } -+ ret = 0; -+ -+ real_depth = bch2_snapshot_depth(c, parent_id); -+ -+ if (le32_to_cpu(s.depth) != real_depth && -+ (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || -+ fsck_err(c, snapshot_bad_depth, -+ "snapshot with incorrect depth field, should be %u:\n %s", -+ real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { -+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); -+ ret = PTR_ERR_OR_ZERO(u); -+ if (ret) -+ goto err; -+ -+ u->v.depth = cpu_to_le32(real_depth); -+ s = u->v; -+ } -+ -+ ret = snapshot_skiplist_good(trans, k.k->p.offset, s); -+ if (ret < 0) -+ goto err; -+ -+ if (!ret && -+ (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || -+ fsck_err(c, snapshot_bad_skiplist, -+ "snapshot with bad skiplist field:\n %s", -+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { -+ u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); -+ ret = PTR_ERR_OR_ZERO(u); -+ if (ret) -+ goto err; -+ -+ for (i = 0; i < ARRAY_SIZE(u->v.skip); i++) -+ u->v.skip[i] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent_id)); -+ -+ bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32); -+ s = u->v; -+ } -+ ret = 0; -+err: -+fsck_err: -+ printbuf_exit(&buf); -+ return ret; -+} -+ -+int bch2_check_snapshots(struct bch_fs *c) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ /* -+ * We iterate backwards as checking/fixing the depth field requires that -+ * the parent's depth already be correct: -+ */ -+ ret = bch2_trans_run(c, -+ for_each_btree_key_reverse_commit(trans, iter, -+ BTREE_ID_snapshots, POS_MAX, -+ BTREE_ITER_PREFETCH, k, -+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, -+ check_snapshot(trans, &iter, k))); -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+/* -+ * Mark a snapshot as deleted, for future cleanup: -+ */ -+int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) -+{ -+ struct btree_iter iter; -+ struct bkey_i_snapshot *s; -+ int ret = 0; -+ -+ s = bch2_bkey_get_mut_typed(trans, &iter, -+ BTREE_ID_snapshots, POS(0, id), -+ 0, snapshot); -+ ret = PTR_ERR_OR_ZERO(s); -+ if (unlikely(ret)) { -+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), -+ trans->c, "missing snapshot %u", id); -+ return ret; -+ } -+ -+ /* already deleted? */ -+ if (BCH_SNAPSHOT_DELETED(&s->v)) -+ goto err; -+ -+ SET_BCH_SNAPSHOT_DELETED(&s->v, true); -+ SET_BCH_SNAPSHOT_SUBVOL(&s->v, false); -+ s->v.subvol = 0; -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s) -+{ -+ if (le32_to_cpu(s->children[0]) < le32_to_cpu(s->children[1])) -+ swap(s->children[0], s->children[1]); -+} -+ -+static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter, p_iter = (struct btree_iter) { NULL }; -+ struct btree_iter c_iter = (struct btree_iter) { NULL }; -+ struct btree_iter tree_iter = (struct btree_iter) { NULL }; -+ struct bkey_s_c_snapshot s; -+ u32 parent_id, child_id; -+ unsigned i; -+ int ret = 0; -+ -+ s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id), -+ BTREE_ITER_INTENT, snapshot); -+ ret = bkey_err(s); -+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, -+ "missing snapshot %u", id); -+ -+ if (ret) -+ goto err; -+ -+ BUG_ON(s.v->children[1]); -+ -+ parent_id = le32_to_cpu(s.v->parent); -+ child_id = le32_to_cpu(s.v->children[0]); -+ -+ if (parent_id) { -+ struct bkey_i_snapshot *parent; -+ -+ parent = bch2_bkey_get_mut_typed(trans, &p_iter, -+ BTREE_ID_snapshots, POS(0, parent_id), -+ 0, snapshot); -+ ret = PTR_ERR_OR_ZERO(parent); -+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, -+ "missing snapshot %u", parent_id); -+ if (unlikely(ret)) -+ goto err; -+ -+ /* find entry in parent->children for node being deleted */ -+ for (i = 0; i < 2; i++) -+ if (le32_to_cpu(parent->v.children[i]) == id) -+ break; -+ -+ if (bch2_fs_inconsistent_on(i == 2, c, -+ "snapshot %u missing child pointer to %u", -+ parent_id, id)) -+ goto err; -+ -+ parent->v.children[i] = le32_to_cpu(child_id); -+ -+ normalize_snapshot_child_pointers(&parent->v); -+ } -+ -+ if (child_id) { -+ struct bkey_i_snapshot *child; -+ -+ child = bch2_bkey_get_mut_typed(trans, &c_iter, -+ BTREE_ID_snapshots, POS(0, child_id), -+ 0, snapshot); -+ ret = PTR_ERR_OR_ZERO(child); -+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, -+ "missing snapshot %u", child_id); -+ if (unlikely(ret)) -+ goto err; -+ -+ child->v.parent = cpu_to_le32(parent_id); -+ -+ if (!child->v.parent) { -+ child->v.skip[0] = 0; -+ child->v.skip[1] = 0; -+ child->v.skip[2] = 0; -+ } -+ } -+ -+ if (!parent_id) { -+ /* -+ * We're deleting the root of a snapshot tree: update the -+ * snapshot_tree entry to point to the new root, or delete it if -+ * this is the last snapshot ID in this tree: -+ */ -+ struct bkey_i_snapshot_tree *s_t; -+ -+ BUG_ON(s.v->children[1]); -+ -+ s_t = bch2_bkey_get_mut_typed(trans, &tree_iter, -+ BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)), -+ 0, snapshot_tree); -+ ret = PTR_ERR_OR_ZERO(s_t); -+ if (ret) -+ goto err; -+ -+ if (s.v->children[0]) { -+ s_t->v.root_snapshot = s.v->children[0]; -+ } else { -+ s_t->k.type = KEY_TYPE_deleted; -+ set_bkey_val_u64s(&s_t->k, 0); -+ } -+ } -+ -+ ret = bch2_btree_delete_at(trans, &iter, 0); -+err: -+ bch2_trans_iter_exit(trans, &tree_iter); -+ bch2_trans_iter_exit(trans, &p_iter); -+ bch2_trans_iter_exit(trans, &c_iter); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, -+ u32 *new_snapids, -+ u32 *snapshot_subvols, -+ unsigned nr_snapids) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_i_snapshot *n; -+ struct bkey_s_c k; -+ unsigned i, j; -+ u32 depth = bch2_snapshot_depth(c, parent); -+ int ret; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, -+ POS_MIN, BTREE_ITER_INTENT); -+ k = bch2_btree_iter_peek(&iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ for (i = 0; i < nr_snapids; i++) { -+ k = bch2_btree_iter_prev_slot(&iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (!k.k || !k.k->p.offset) { -+ ret = -BCH_ERR_ENOSPC_snapshot_create; -+ goto err; -+ } -+ -+ n = bch2_bkey_alloc(trans, &iter, 0, snapshot); -+ ret = PTR_ERR_OR_ZERO(n); -+ if (ret) -+ goto err; -+ -+ n->v.flags = 0; -+ n->v.parent = cpu_to_le32(parent); -+ n->v.subvol = cpu_to_le32(snapshot_subvols[i]); -+ n->v.tree = cpu_to_le32(tree); -+ n->v.depth = cpu_to_le32(depth); -+ -+ for (j = 0; j < ARRAY_SIZE(n->v.skip); j++) -+ n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent)); -+ -+ bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32); -+ SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); -+ -+ ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, -+ bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); -+ if (ret) -+ goto err; -+ -+ new_snapids[i] = iter.pos.offset; -+ -+ mutex_lock(&c->snapshot_table_lock); -+ snapshot_t_mut(c, new_snapids[i])->equiv = new_snapids[i]; -+ mutex_unlock(&c->snapshot_table_lock); -+ } -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+/* -+ * Create new snapshot IDs as children of an existing snapshot ID: -+ */ -+static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent, -+ u32 *new_snapids, -+ u32 *snapshot_subvols, -+ unsigned nr_snapids) -+{ -+ struct btree_iter iter; -+ struct bkey_i_snapshot *n_parent; -+ int ret = 0; -+ -+ n_parent = bch2_bkey_get_mut_typed(trans, &iter, -+ BTREE_ID_snapshots, POS(0, parent), -+ 0, snapshot); -+ ret = PTR_ERR_OR_ZERO(n_parent); -+ if (unlikely(ret)) { -+ if (bch2_err_matches(ret, ENOENT)) -+ bch_err(trans->c, "snapshot %u not found", parent); -+ return ret; -+ } -+ -+ if (n_parent->v.children[0] || n_parent->v.children[1]) { -+ bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children"); -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree), -+ new_snapids, snapshot_subvols, nr_snapids); -+ if (ret) -+ goto err; -+ -+ n_parent->v.children[0] = cpu_to_le32(new_snapids[0]); -+ n_parent->v.children[1] = cpu_to_le32(new_snapids[1]); -+ n_parent->v.subvol = 0; -+ SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+/* -+ * Create a snapshot node that is the root of a new tree: -+ */ -+static int bch2_snapshot_node_create_tree(struct btree_trans *trans, -+ u32 *new_snapids, -+ u32 *snapshot_subvols, -+ unsigned nr_snapids) -+{ -+ struct bkey_i_snapshot_tree *n_tree; -+ int ret; -+ -+ n_tree = __bch2_snapshot_tree_create(trans); -+ ret = PTR_ERR_OR_ZERO(n_tree) ?: -+ create_snapids(trans, 0, n_tree->k.p.offset, -+ new_snapids, snapshot_subvols, nr_snapids); -+ if (ret) -+ return ret; -+ -+ n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]); -+ n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]); -+ return 0; -+} -+ -+int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, -+ u32 *new_snapids, -+ u32 *snapshot_subvols, -+ unsigned nr_snapids) -+{ -+ BUG_ON((parent == 0) != (nr_snapids == 1)); -+ BUG_ON((parent != 0) != (nr_snapids == 2)); -+ -+ return parent -+ ? bch2_snapshot_node_create_children(trans, parent, -+ new_snapids, snapshot_subvols, nr_snapids) -+ : bch2_snapshot_node_create_tree(trans, -+ new_snapids, snapshot_subvols, nr_snapids); -+ -+} -+ -+/* -+ * If we have an unlinked inode in an internal snapshot node, and the inode -+ * really has been deleted in all child snapshots, how does this get cleaned up? -+ * -+ * first there is the problem of how keys that have been overwritten in all -+ * child snapshots get deleted (unimplemented?), but inodes may perhaps be -+ * special? -+ * -+ * also: unlinked inode in internal snapshot appears to not be getting deleted -+ * correctly if inode doesn't exist in leaf snapshots -+ * -+ * solution: -+ * -+ * for a key in an interior snapshot node that needs work to be done that -+ * requires it to be mutated: iterate over all descendent leaf nodes and copy -+ * that key to snapshot leaf nodes, where we can mutate it -+ */ -+ -+static int snapshot_delete_key(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k, -+ snapshot_id_list *deleted, -+ snapshot_id_list *equiv_seen, -+ struct bpos *last_pos) -+{ -+ struct bch_fs *c = trans->c; -+ u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); -+ -+ if (!bkey_eq(k.k->p, *last_pos)) -+ equiv_seen->nr = 0; -+ *last_pos = k.k->p; -+ -+ if (snapshot_list_has_id(deleted, k.k->p.snapshot) || -+ snapshot_list_has_id(equiv_seen, equiv)) { -+ return bch2_btree_delete_at(trans, iter, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+ } else { -+ return snapshot_list_add(c, equiv_seen, equiv); -+ } -+} -+ -+static int move_key_to_correct_snapshot(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); -+ -+ /* -+ * When we have a linear chain of snapshot nodes, we consider -+ * those to form an equivalence class: we're going to collapse -+ * them all down to a single node, and keep the leaf-most node - -+ * which has the same id as the equivalence class id. -+ * -+ * If there are multiple keys in different snapshots at the same -+ * position, we're only going to keep the one in the newest -+ * snapshot - the rest have been overwritten and are redundant, -+ * and for the key we're going to keep we need to move it to the -+ * equivalance class ID if it's not there already. -+ */ -+ if (equiv != k.k->p.snapshot) { -+ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); -+ struct btree_iter new_iter; -+ int ret; -+ -+ ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ return ret; -+ -+ new->k.p.snapshot = equiv; -+ -+ bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p, -+ BTREE_ITER_ALL_SNAPSHOTS| -+ BTREE_ITER_CACHED| -+ BTREE_ITER_INTENT); -+ -+ ret = bch2_btree_iter_traverse(&new_iter) ?: -+ bch2_trans_update(trans, &new_iter, new, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: -+ bch2_btree_delete_at(trans, iter, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -+ bch2_trans_iter_exit(trans, &new_iter); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k) -+{ -+ struct bkey_s_c_snapshot snap; -+ u32 children[2]; -+ int ret; -+ -+ if (k.k->type != KEY_TYPE_snapshot) -+ return 0; -+ -+ snap = bkey_s_c_to_snapshot(k); -+ if (BCH_SNAPSHOT_DELETED(snap.v) || -+ BCH_SNAPSHOT_SUBVOL(snap.v)) -+ return 0; -+ -+ children[0] = le32_to_cpu(snap.v->children[0]); -+ children[1] = le32_to_cpu(snap.v->children[1]); -+ -+ ret = bch2_snapshot_live(trans, children[0]) ?: -+ bch2_snapshot_live(trans, children[1]); -+ if (ret < 0) -+ return ret; -+ return !ret; -+} -+ -+/* -+ * For a given snapshot, if it doesn't have a subvolume that points to it, and -+ * it doesn't have child snapshot nodes - it's now redundant and we can mark it -+ * as deleted. -+ */ -+static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k) -+{ -+ int ret = bch2_snapshot_needs_delete(trans, k); -+ -+ return ret <= 0 -+ ? ret -+ : bch2_snapshot_node_set_deleted(trans, k.k->p.offset); -+} -+ -+static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, -+ snapshot_id_list *skip) -+{ -+ rcu_read_lock(); -+ while (snapshot_list_has_id(skip, id)) -+ id = __bch2_snapshot_parent(c, id); -+ -+ while (n--) { -+ do { -+ id = __bch2_snapshot_parent(c, id); -+ } while (snapshot_list_has_id(skip, id)); -+ } -+ rcu_read_unlock(); -+ -+ return id; -+} -+ -+static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, -+ struct btree_iter *iter, struct bkey_s_c k, -+ snapshot_id_list *deleted) -+{ -+ struct bch_fs *c = trans->c; -+ u32 nr_deleted_ancestors = 0; -+ struct bkey_i_snapshot *s; -+ u32 *i; -+ int ret; -+ -+ if (k.k->type != KEY_TYPE_snapshot) -+ return 0; -+ -+ if (snapshot_list_has_id(deleted, k.k->p.offset)) -+ return 0; -+ -+ s = bch2_bkey_make_mut_noupdate_typed(trans, k, snapshot); -+ ret = PTR_ERR_OR_ZERO(s); -+ if (ret) -+ return ret; -+ -+ darray_for_each(*deleted, i) -+ nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, *i); -+ -+ if (!nr_deleted_ancestors) -+ return 0; -+ -+ le32_add_cpu(&s->v.depth, -nr_deleted_ancestors); -+ -+ if (!s->v.depth) { -+ s->v.skip[0] = 0; -+ s->v.skip[1] = 0; -+ s->v.skip[2] = 0; -+ } else { -+ u32 depth = le32_to_cpu(s->v.depth); -+ u32 parent = bch2_snapshot_parent(c, s->k.p.offset); -+ -+ for (unsigned j = 0; j < ARRAY_SIZE(s->v.skip); j++) { -+ u32 id = le32_to_cpu(s->v.skip[j]); -+ -+ if (snapshot_list_has_id(deleted, id)) { -+ id = bch2_snapshot_nth_parent_skip(c, -+ parent, -+ depth > 1 -+ ? get_random_u32_below(depth - 1) -+ : 0, -+ deleted); -+ s->v.skip[j] = cpu_to_le32(id); -+ } -+ } -+ -+ bubble_sort(s->v.skip, ARRAY_SIZE(s->v.skip), cmp_le32); -+ } -+ -+ return bch2_trans_update(trans, iter, &s->k_i, 0); -+} -+ -+int bch2_delete_dead_snapshots(struct bch_fs *c) -+{ -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bkey_s_c_snapshot snap; -+ snapshot_id_list deleted = { 0 }; -+ snapshot_id_list deleted_interior = { 0 }; -+ u32 *i, id; -+ int ret = 0; -+ -+ if (!test_and_clear_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) -+ return 0; -+ -+ if (!test_bit(BCH_FS_STARTED, &c->flags)) { -+ ret = bch2_fs_read_write_early(c); -+ if (ret) { -+ bch_err_msg(c, ret, "deleting dead snapshots: error going rw"); -+ return ret; -+ } -+ } -+ -+ trans = bch2_trans_get(c); -+ -+ /* -+ * For every snapshot node: If we have no live children and it's not -+ * pointed to by a subvolume, delete it: -+ */ -+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, -+ POS_MIN, 0, k, -+ NULL, NULL, 0, -+ bch2_delete_redundant_snapshot(trans, k)); -+ if (ret) { -+ bch_err_msg(c, ret, "deleting redundant snapshots"); -+ goto err; -+ } -+ -+ ret = for_each_btree_key2(trans, iter, BTREE_ID_snapshots, -+ POS_MIN, 0, k, -+ bch2_snapshot_set_equiv(trans, k)); -+ if (ret) { -+ bch_err_msg(c, ret, "in bch2_snapshots_set_equiv"); -+ goto err; -+ } -+ -+ for_each_btree_key(trans, iter, BTREE_ID_snapshots, -+ POS_MIN, 0, k, ret) { -+ if (k.k->type != KEY_TYPE_snapshot) -+ continue; -+ -+ snap = bkey_s_c_to_snapshot(k); -+ if (BCH_SNAPSHOT_DELETED(snap.v)) { -+ ret = snapshot_list_add(c, &deleted, k.k->p.offset); -+ if (ret) -+ break; -+ } -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (ret) { -+ bch_err_msg(c, ret, "walking snapshots"); -+ goto err; -+ } -+ -+ for (id = 0; id < BTREE_ID_NR; id++) { -+ struct bpos last_pos = POS_MIN; -+ snapshot_id_list equiv_seen = { 0 }; -+ struct disk_reservation res = { 0 }; -+ -+ if (!btree_type_has_snapshots(id)) -+ continue; -+ -+ /* -+ * deleted inodes btree is maintained by a trigger on the inodes -+ * btree - no work for us to do here, and it's not safe to scan -+ * it because we'll see out of date keys due to the btree write -+ * buffer: -+ */ -+ if (id == BTREE_ID_deleted_inodes) -+ continue; -+ -+ ret = for_each_btree_key_commit(trans, iter, -+ id, POS_MIN, -+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, -+ &res, NULL, BTREE_INSERT_NOFAIL, -+ snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?: -+ for_each_btree_key_commit(trans, iter, -+ id, POS_MIN, -+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, -+ &res, NULL, BTREE_INSERT_NOFAIL, -+ move_key_to_correct_snapshot(trans, &iter, k)); -+ -+ bch2_disk_reservation_put(c, &res); -+ darray_exit(&equiv_seen); -+ -+ if (ret) { -+ bch_err_msg(c, ret, "deleting keys from dying snapshots"); -+ goto err; -+ } -+ } -+ -+ bch2_trans_unlock(trans); -+ down_write(&c->snapshot_create_lock); -+ -+ for_each_btree_key(trans, iter, BTREE_ID_snapshots, -+ POS_MIN, 0, k, ret) { -+ u32 snapshot = k.k->p.offset; -+ u32 equiv = bch2_snapshot_equiv(c, snapshot); -+ -+ if (equiv != snapshot) -+ snapshot_list_add(c, &deleted_interior, snapshot); -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ if (ret) -+ goto err_create_lock; -+ -+ /* -+ * Fixing children of deleted snapshots can't be done completely -+ * atomically, if we crash between here and when we delete the interior -+ * nodes some depth fields will be off: -+ */ -+ ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, -+ BTREE_ITER_INTENT, k, -+ NULL, NULL, BTREE_INSERT_NOFAIL, -+ bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior)); -+ if (ret) -+ goto err_create_lock; -+ -+ darray_for_each(deleted, i) { -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_snapshot_node_delete(trans, *i)); -+ if (ret) { -+ bch_err_msg(c, ret, "deleting snapshot %u", *i); -+ goto err_create_lock; -+ } -+ } -+ -+ darray_for_each(deleted_interior, i) { -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_snapshot_node_delete(trans, *i)); -+ if (ret) { -+ bch_err_msg(c, ret, "deleting snapshot %u", *i); -+ goto err_create_lock; -+ } -+ } -+err_create_lock: -+ up_write(&c->snapshot_create_lock); -+err: -+ darray_exit(&deleted_interior); -+ darray_exit(&deleted); -+ bch2_trans_put(trans); -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+void bch2_delete_dead_snapshots_work(struct work_struct *work) -+{ -+ struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); -+ -+ bch2_delete_dead_snapshots(c); -+ bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); -+} -+ -+void bch2_delete_dead_snapshots_async(struct bch_fs *c) -+{ -+ if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) && -+ !queue_work(c->write_ref_wq, &c->snapshot_delete_work)) -+ bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); -+} -+ -+int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, -+ enum btree_id id, -+ struct bpos pos) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ bch2_trans_iter_init(trans, &iter, id, pos, -+ BTREE_ITER_NOT_EXTENTS| -+ BTREE_ITER_ALL_SNAPSHOTS); -+ while (1) { -+ k = bch2_btree_iter_prev(&iter); -+ ret = bkey_err(k); -+ if (ret) -+ break; -+ -+ if (!k.k) -+ break; -+ -+ if (!bkey_eq(pos, k.k->p)) -+ break; -+ -+ if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) { -+ ret = 1; -+ break; -+ } -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ -+ return ret; -+} -+ -+static u32 bch2_snapshot_smallest_child(struct bch_fs *c, u32 id) -+{ -+ const struct snapshot_t *s = snapshot_t(c, id); -+ -+ return s->children[1] ?: s->children[0]; -+} -+ -+static u32 bch2_snapshot_smallest_descendent(struct bch_fs *c, u32 id) -+{ -+ u32 child; -+ -+ while ((child = bch2_snapshot_smallest_child(c, id))) -+ id = child; -+ return id; -+} -+ -+static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans, -+ enum btree_id btree, -+ struct bkey_s_c interior_k, -+ u32 leaf_id, struct bpos *new_min_pos) -+{ -+ struct btree_iter iter; -+ struct bpos pos = interior_k.k->p; -+ struct bkey_s_c k; -+ struct bkey_i *new; -+ int ret; -+ -+ pos.snapshot = leaf_id; -+ -+ bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT); -+ k = bch2_btree_iter_peek_slot(&iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto out; -+ -+ /* key already overwritten in this snapshot? */ -+ if (k.k->p.snapshot != interior_k.k->p.snapshot) -+ goto out; -+ -+ if (bpos_eq(*new_min_pos, POS_MIN)) { -+ *new_min_pos = k.k->p; -+ new_min_pos->snapshot = leaf_id; -+ } -+ -+ new = bch2_bkey_make_mut_noupdate(trans, interior_k); -+ ret = PTR_ERR_OR_ZERO(new); -+ if (ret) -+ goto out; -+ -+ new->k.p.snapshot = leaf_id; -+ ret = bch2_trans_update(trans, &iter, new, 0); -+out: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *trans, -+ enum btree_id btree, -+ struct bkey_s_c k, -+ struct bpos *new_min_pos) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_buf sk; -+ u32 restart_count = trans->restart_count; -+ int ret = 0; -+ -+ bch2_bkey_buf_init(&sk); -+ bch2_bkey_buf_reassemble(&sk, c, k); -+ k = bkey_i_to_s_c(sk.k); -+ -+ *new_min_pos = POS_MIN; -+ -+ for (u32 id = bch2_snapshot_smallest_descendent(c, k.k->p.snapshot); -+ id < k.k->p.snapshot; -+ id++) { -+ if (!bch2_snapshot_is_ancestor(c, id, k.k->p.snapshot) || -+ !bch2_snapshot_is_leaf(c, id)) -+ continue; -+again: -+ ret = btree_trans_too_many_iters(trans) ?: -+ bch2_propagate_key_to_snapshot_leaf(trans, btree, k, id, new_min_pos) ?: -+ bch2_trans_commit(trans, NULL, NULL, 0); -+ if (ret && bch2_err_matches(ret, BCH_ERR_transaction_restart)) { -+ bch2_trans_begin(trans); -+ goto again; -+ } -+ -+ if (ret) -+ break; -+ } -+ -+ bch2_bkey_buf_exit(&sk, c); -+ -+ return ret ?: trans_was_restarted(trans, restart_count); -+} -+ -+static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_s_c_snapshot snap; -+ int ret = 0; -+ -+ if (k.k->type != KEY_TYPE_snapshot) -+ return 0; -+ -+ snap = bkey_s_c_to_snapshot(k); -+ if (BCH_SNAPSHOT_DELETED(snap.v) || -+ bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset || -+ (ret = bch2_snapshot_needs_delete(trans, k)) > 0) { -+ set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); -+ return 0; -+ } -+ -+ return ret; -+} -+ -+int bch2_snapshots_read(struct bch_fs *c) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ ret = bch2_trans_run(c, -+ for_each_btree_key2(trans, iter, BTREE_ID_snapshots, -+ POS_MIN, 0, k, -+ bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: -+ bch2_snapshot_set_equiv(trans, k) ?: -+ bch2_check_snapshot_needs_deletion(trans, k)) ?: -+ for_each_btree_key2(trans, iter, BTREE_ID_snapshots, -+ POS_MIN, 0, k, -+ (set_is_ancestor_bitmap(c, k.k->p.offset), 0))); -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+void bch2_fs_snapshots_exit(struct bch_fs *c) -+{ -+ kfree(rcu_dereference_protected(c->snapshots, true)); -+} -diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h -new file mode 100644 -index 000000000000..f09a22f44239 ---- /dev/null -+++ b/fs/bcachefs/snapshot.h -@@ -0,0 +1,268 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_SNAPSHOT_H -+#define _BCACHEFS_SNAPSHOT_H -+ -+enum bkey_invalid_flags; -+ -+void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+int bch2_snapshot_tree_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+ -+#define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ -+ .key_invalid = bch2_snapshot_tree_invalid, \ -+ .val_to_text = bch2_snapshot_tree_to_text, \ -+ .min_val_size = 8, \ -+}) -+ -+struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *); -+ -+int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); -+ -+void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, -+ struct bkey_s_c, struct bkey_s_c, unsigned); -+ -+#define bch2_bkey_ops_snapshot ((struct bkey_ops) { \ -+ .key_invalid = bch2_snapshot_invalid, \ -+ .val_to_text = bch2_snapshot_to_text, \ -+ .atomic_trigger = bch2_mark_snapshot, \ -+ .min_val_size = 24, \ -+}) -+ -+static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id) -+{ -+ return &t->s[U32_MAX - id]; -+} -+ -+static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) -+{ -+ return __snapshot_t(rcu_dereference(c->snapshots), id); -+} -+ -+static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id) -+{ -+ rcu_read_lock(); -+ id = snapshot_t(c, id)->tree; -+ rcu_read_unlock(); -+ -+ return id; -+} -+ -+static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id) -+{ -+ return snapshot_t(c, id)->parent; -+} -+ -+static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) -+{ -+ rcu_read_lock(); -+ id = __bch2_snapshot_parent_early(c, id); -+ rcu_read_unlock(); -+ -+ return id; -+} -+ -+static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id) -+{ -+#ifdef CONFIG_BCACHEFS_DEBUG -+ u32 parent = snapshot_t(c, id)->parent; -+ -+ if (parent && -+ snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1) -+ panic("id %u depth=%u parent %u depth=%u\n", -+ id, snapshot_t(c, id)->depth, -+ parent, snapshot_t(c, parent)->depth); -+ -+ return parent; -+#else -+ return snapshot_t(c, id)->parent; -+#endif -+} -+ -+static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) -+{ -+ rcu_read_lock(); -+ id = __bch2_snapshot_parent(c, id); -+ rcu_read_unlock(); -+ -+ return id; -+} -+ -+static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n) -+{ -+ rcu_read_lock(); -+ while (n--) -+ id = __bch2_snapshot_parent(c, id); -+ rcu_read_unlock(); -+ -+ return id; -+} -+ -+u32 bch2_snapshot_skiplist_get(struct bch_fs *, u32); -+ -+static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id) -+{ -+ u32 parent; -+ -+ rcu_read_lock(); -+ while ((parent = __bch2_snapshot_parent(c, id))) -+ id = parent; -+ rcu_read_unlock(); -+ -+ return id; -+} -+ -+static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id) -+{ -+ return snapshot_t(c, id)->equiv; -+} -+ -+static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) -+{ -+ rcu_read_lock(); -+ id = __bch2_snapshot_equiv(c, id); -+ rcu_read_unlock(); -+ -+ return id; -+} -+ -+static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id) -+{ -+ return id == bch2_snapshot_equiv(c, id); -+} -+ -+static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id) -+{ -+ const struct snapshot_t *s; -+ bool ret; -+ -+ rcu_read_lock(); -+ s = snapshot_t(c, id); -+ ret = s->children[0]; -+ rcu_read_unlock(); -+ -+ return ret; -+} -+ -+static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id) -+{ -+ return !bch2_snapshot_is_internal_node(c, id); -+} -+ -+static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id) -+{ -+ const struct snapshot_t *s; -+ u32 parent = __bch2_snapshot_parent(c, id); -+ -+ if (!parent) -+ return 0; -+ -+ s = snapshot_t(c, __bch2_snapshot_parent(c, id)); -+ if (id == s->children[0]) -+ return s->children[1]; -+ if (id == s->children[1]) -+ return s->children[0]; -+ return 0; -+} -+ -+static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent) -+{ -+ u32 depth; -+ -+ rcu_read_lock(); -+ depth = parent ? snapshot_t(c, parent)->depth + 1 : 0; -+ rcu_read_unlock(); -+ -+ return depth; -+} -+ -+bool __bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32); -+ -+static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) -+{ -+ return id == ancestor -+ ? true -+ : __bch2_snapshot_is_ancestor(c, id, ancestor); -+} -+ -+static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) -+{ -+ const struct snapshot_t *t; -+ bool ret; -+ -+ rcu_read_lock(); -+ t = snapshot_t(c, id); -+ ret = (t->children[0]|t->children[1]) != 0; -+ rcu_read_unlock(); -+ -+ return ret; -+} -+ -+static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id) -+{ -+ u32 *i; -+ -+ darray_for_each(*s, i) -+ if (*i == id) -+ return true; -+ return false; -+} -+ -+static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list *s, u32 id) -+{ -+ u32 *i; -+ -+ darray_for_each(*s, i) -+ if (bch2_snapshot_is_ancestor(c, id, *i)) -+ return true; -+ return false; -+} -+ -+static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id) -+{ -+ int ret; -+ -+ BUG_ON(snapshot_list_has_id(s, id)); -+ ret = darray_push(s, id); -+ if (ret) -+ bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size); -+ return ret; -+} -+ -+int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, -+ struct bch_snapshot *s); -+int bch2_snapshot_get_subvol(struct btree_trans *, u32, -+ struct bch_subvolume *); -+ -+/* only exported for tests: */ -+int bch2_snapshot_node_create(struct btree_trans *, u32, -+ u32 *, u32 *, unsigned); -+ -+int bch2_check_snapshot_trees(struct bch_fs *); -+int bch2_check_snapshots(struct bch_fs *); -+ -+int bch2_snapshot_node_set_deleted(struct btree_trans *, u32); -+void bch2_delete_dead_snapshots_work(struct work_struct *); -+ -+int __bch2_key_has_snapshot_overwrites(struct btree_trans *, enum btree_id, struct bpos); -+ -+static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans, -+ enum btree_id id, -+ struct bpos pos) -+{ -+ if (!btree_type_has_snapshots(id) || -+ bch2_snapshot_is_leaf(trans->c, pos.snapshot)) -+ return 0; -+ -+ return __bch2_key_has_snapshot_overwrites(trans, id, pos); -+} -+ -+int bch2_propagate_key_to_snapshot_leaves(struct btree_trans *, enum btree_id, -+ struct bkey_s_c, struct bpos *); -+ -+int bch2_snapshots_read(struct bch_fs *); -+void bch2_fs_snapshots_exit(struct bch_fs *); -+ -+#endif /* _BCACHEFS_SNAPSHOT_H */ -diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h -new file mode 100644 -index 000000000000..ae21a8cca1b4 ---- /dev/null -+++ b/fs/bcachefs/str_hash.h -@@ -0,0 +1,370 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_STR_HASH_H -+#define _BCACHEFS_STR_HASH_H -+ -+#include "btree_iter.h" -+#include "btree_update.h" -+#include "checksum.h" -+#include "error.h" -+#include "inode.h" -+#include "siphash.h" -+#include "subvolume.h" -+#include "super.h" -+ -+#include -+#include -+#include -+ -+static inline enum bch_str_hash_type -+bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt) -+{ -+ switch (opt) { -+ case BCH_STR_HASH_OPT_crc32c: -+ return BCH_STR_HASH_crc32c; -+ case BCH_STR_HASH_OPT_crc64: -+ return BCH_STR_HASH_crc64; -+ case BCH_STR_HASH_OPT_siphash: -+ return c->sb.features & (1ULL << BCH_FEATURE_new_siphash) -+ ? BCH_STR_HASH_siphash -+ : BCH_STR_HASH_siphash_old; -+ default: -+ BUG(); -+ } -+} -+ -+struct bch_hash_info { -+ u8 type; -+ /* -+ * For crc32 or crc64 string hashes the first key value of -+ * the siphash_key (k0) is used as the key. -+ */ -+ SIPHASH_KEY siphash_key; -+}; -+ -+static inline struct bch_hash_info -+bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi) -+{ -+ /* XXX ick */ -+ struct bch_hash_info info = { -+ .type = (bi->bi_flags >> INODE_STR_HASH_OFFSET) & -+ ~(~0U << INODE_STR_HASH_BITS), -+ .siphash_key = { .k0 = bi->bi_hash_seed } -+ }; -+ -+ if (unlikely(info.type == BCH_STR_HASH_siphash_old)) { -+ SHASH_DESC_ON_STACK(desc, c->sha256); -+ u8 digest[SHA256_DIGEST_SIZE]; -+ -+ desc->tfm = c->sha256; -+ -+ crypto_shash_digest(desc, (void *) &bi->bi_hash_seed, -+ sizeof(bi->bi_hash_seed), digest); -+ memcpy(&info.siphash_key, digest, sizeof(info.siphash_key)); -+ } -+ -+ return info; -+} -+ -+struct bch_str_hash_ctx { -+ union { -+ u32 crc32c; -+ u64 crc64; -+ SIPHASH_CTX siphash; -+ }; -+}; -+ -+static inline void bch2_str_hash_init(struct bch_str_hash_ctx *ctx, -+ const struct bch_hash_info *info) -+{ -+ switch (info->type) { -+ case BCH_STR_HASH_crc32c: -+ ctx->crc32c = crc32c(~0, &info->siphash_key.k0, -+ sizeof(info->siphash_key.k0)); -+ break; -+ case BCH_STR_HASH_crc64: -+ ctx->crc64 = crc64_be(~0, &info->siphash_key.k0, -+ sizeof(info->siphash_key.k0)); -+ break; -+ case BCH_STR_HASH_siphash_old: -+ case BCH_STR_HASH_siphash: -+ SipHash24_Init(&ctx->siphash, &info->siphash_key); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static inline void bch2_str_hash_update(struct bch_str_hash_ctx *ctx, -+ const struct bch_hash_info *info, -+ const void *data, size_t len) -+{ -+ switch (info->type) { -+ case BCH_STR_HASH_crc32c: -+ ctx->crc32c = crc32c(ctx->crc32c, data, len); -+ break; -+ case BCH_STR_HASH_crc64: -+ ctx->crc64 = crc64_be(ctx->crc64, data, len); -+ break; -+ case BCH_STR_HASH_siphash_old: -+ case BCH_STR_HASH_siphash: -+ SipHash24_Update(&ctx->siphash, data, len); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx, -+ const struct bch_hash_info *info) -+{ -+ switch (info->type) { -+ case BCH_STR_HASH_crc32c: -+ return ctx->crc32c; -+ case BCH_STR_HASH_crc64: -+ return ctx->crc64 >> 1; -+ case BCH_STR_HASH_siphash_old: -+ case BCH_STR_HASH_siphash: -+ return SipHash24_End(&ctx->siphash) >> 1; -+ default: -+ BUG(); -+ } -+} -+ -+struct bch_hash_desc { -+ enum btree_id btree_id; -+ u8 key_type; -+ -+ u64 (*hash_key)(const struct bch_hash_info *, const void *); -+ u64 (*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c); -+ bool (*cmp_key)(struct bkey_s_c, const void *); -+ bool (*cmp_bkey)(struct bkey_s_c, struct bkey_s_c); -+ bool (*is_visible)(subvol_inum inum, struct bkey_s_c); -+}; -+ -+static inline bool is_visible_key(struct bch_hash_desc desc, subvol_inum inum, struct bkey_s_c k) -+{ -+ return k.k->type == desc.key_type && -+ (!desc.is_visible || -+ !inum.inum || -+ desc.is_visible(inum, k)); -+} -+ -+static __always_inline int -+bch2_hash_lookup(struct btree_trans *trans, -+ struct btree_iter *iter, -+ const struct bch_hash_desc desc, -+ const struct bch_hash_info *info, -+ subvol_inum inum, const void *key, -+ unsigned flags) -+{ -+ struct bkey_s_c k; -+ u32 snapshot; -+ int ret; -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ return ret; -+ -+ for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, -+ SPOS(inum.inum, desc.hash_key(info, key), snapshot), -+ POS(inum.inum, U64_MAX), -+ BTREE_ITER_SLOTS|flags, k, ret) { -+ if (is_visible_key(desc, inum, k)) { -+ if (!desc.cmp_key(k, key)) -+ return 0; -+ } else if (k.k->type == KEY_TYPE_hash_whiteout) { -+ ; -+ } else { -+ /* hole, not found */ -+ break; -+ } -+ } -+ bch2_trans_iter_exit(trans, iter); -+ -+ return ret ?: -BCH_ERR_ENOENT_str_hash_lookup; -+} -+ -+static __always_inline int -+bch2_hash_hole(struct btree_trans *trans, -+ struct btree_iter *iter, -+ const struct bch_hash_desc desc, -+ const struct bch_hash_info *info, -+ subvol_inum inum, const void *key) -+{ -+ struct bkey_s_c k; -+ u32 snapshot; -+ int ret; -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ return ret; -+ -+ for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, -+ SPOS(inum.inum, desc.hash_key(info, key), snapshot), -+ POS(inum.inum, U64_MAX), -+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) -+ if (!is_visible_key(desc, inum, k)) -+ return 0; -+ bch2_trans_iter_exit(trans, iter); -+ -+ return ret ?: -BCH_ERR_ENOSPC_str_hash_create; -+} -+ -+static __always_inline -+int bch2_hash_needs_whiteout(struct btree_trans *trans, -+ const struct bch_hash_desc desc, -+ const struct bch_hash_info *info, -+ struct btree_iter *start) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ bch2_trans_copy_iter(&iter, start); -+ -+ bch2_btree_iter_advance(&iter); -+ -+ for_each_btree_key_continue_norestart(iter, BTREE_ITER_SLOTS, k, ret) { -+ if (k.k->type != desc.key_type && -+ k.k->type != KEY_TYPE_hash_whiteout) -+ break; -+ -+ if (k.k->type == desc.key_type && -+ desc.hash_bkey(info, k) <= start->pos.offset) { -+ ret = 1; -+ break; -+ } -+ } -+ -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static __always_inline -+int bch2_hash_set_snapshot(struct btree_trans *trans, -+ const struct bch_hash_desc desc, -+ const struct bch_hash_info *info, -+ subvol_inum inum, u32 snapshot, -+ struct bkey_i *insert, -+ int flags, -+ int update_flags) -+{ -+ struct btree_iter iter, slot = { NULL }; -+ struct bkey_s_c k; -+ bool found = false; -+ int ret; -+ -+ for_each_btree_key_upto_norestart(trans, iter, desc.btree_id, -+ SPOS(insert->k.p.inode, -+ desc.hash_bkey(info, bkey_i_to_s_c(insert)), -+ snapshot), -+ POS(insert->k.p.inode, U64_MAX), -+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { -+ if (is_visible_key(desc, inum, k)) { -+ if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert))) -+ goto found; -+ -+ /* hash collision: */ -+ continue; -+ } -+ -+ if (!slot.path && -+ !(flags & BCH_HASH_SET_MUST_REPLACE)) -+ bch2_trans_copy_iter(&slot, &iter); -+ -+ if (k.k->type != KEY_TYPE_hash_whiteout) -+ goto not_found; -+ } -+ -+ if (!ret) -+ ret = -BCH_ERR_ENOSPC_str_hash_create; -+out: -+ bch2_trans_iter_exit(trans, &slot); -+ bch2_trans_iter_exit(trans, &iter); -+ -+ return ret; -+found: -+ found = true; -+not_found: -+ -+ if (!found && (flags & BCH_HASH_SET_MUST_REPLACE)) { -+ ret = -BCH_ERR_ENOENT_str_hash_set_must_replace; -+ } else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) { -+ ret = -EEXIST; -+ } else { -+ if (!found && slot.path) -+ swap(iter, slot); -+ -+ insert->k.p = iter.pos; -+ ret = bch2_trans_update(trans, &iter, insert, 0); -+ } -+ -+ goto out; -+} -+ -+static __always_inline -+int bch2_hash_set(struct btree_trans *trans, -+ const struct bch_hash_desc desc, -+ const struct bch_hash_info *info, -+ subvol_inum inum, -+ struct bkey_i *insert, int flags) -+{ -+ u32 snapshot; -+ int ret; -+ -+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); -+ if (ret) -+ return ret; -+ -+ insert->k.p.inode = inum.inum; -+ -+ return bch2_hash_set_snapshot(trans, desc, info, inum, -+ snapshot, insert, flags, 0); -+} -+ -+static __always_inline -+int bch2_hash_delete_at(struct btree_trans *trans, -+ const struct bch_hash_desc desc, -+ const struct bch_hash_info *info, -+ struct btree_iter *iter, -+ unsigned update_flags) -+{ -+ struct bkey_i *delete; -+ int ret; -+ -+ delete = bch2_trans_kmalloc(trans, sizeof(*delete)); -+ ret = PTR_ERR_OR_ZERO(delete); -+ if (ret) -+ return ret; -+ -+ ret = bch2_hash_needs_whiteout(trans, desc, info, iter); -+ if (ret < 0) -+ return ret; -+ -+ bkey_init(&delete->k); -+ delete->k.p = iter->pos; -+ delete->k.type = ret ? KEY_TYPE_hash_whiteout : KEY_TYPE_deleted; -+ -+ return bch2_trans_update(trans, iter, delete, update_flags); -+} -+ -+static __always_inline -+int bch2_hash_delete(struct btree_trans *trans, -+ const struct bch_hash_desc desc, -+ const struct bch_hash_info *info, -+ subvol_inum inum, const void *key) -+{ -+ struct btree_iter iter; -+ int ret; -+ -+ ret = bch2_hash_lookup(trans, &iter, desc, info, inum, key, -+ BTREE_ITER_INTENT); -+ if (ret) -+ return ret; -+ -+ ret = bch2_hash_delete_at(trans, desc, info, &iter, 0); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+#endif /* _BCACHEFS_STR_HASH_H */ -diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c -new file mode 100644 -index 000000000000..fccd25aa3242 ---- /dev/null -+++ b/fs/bcachefs/subvolume.c -@@ -0,0 +1,437 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "btree_key_cache.h" -+#include "btree_update.h" -+#include "errcode.h" -+#include "error.h" -+#include "fs.h" -+#include "snapshot.h" -+#include "subvolume.h" -+ -+#include -+ -+static int bch2_subvolume_delete(struct btree_trans *, u32); -+ -+static int check_subvol(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k) -+{ -+ struct bch_fs *c = trans->c; -+ struct bkey_s_c_subvolume subvol; -+ struct bch_snapshot snapshot; -+ unsigned snapid; -+ int ret = 0; -+ -+ if (k.k->type != KEY_TYPE_subvolume) -+ return 0; -+ -+ subvol = bkey_s_c_to_subvolume(k); -+ snapid = le32_to_cpu(subvol.v->snapshot); -+ ret = bch2_snapshot_lookup(trans, snapid, &snapshot); -+ -+ if (bch2_err_matches(ret, ENOENT)) -+ bch_err(c, "subvolume %llu points to nonexistent snapshot %u", -+ k.k->p.offset, snapid); -+ if (ret) -+ return ret; -+ -+ if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { -+ bch2_fs_lazy_rw(c); -+ -+ ret = bch2_subvolume_delete(trans, iter->pos.offset); -+ if (ret) -+ bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); -+ return ret ?: -BCH_ERR_transaction_restart_nested; -+ } -+ -+ if (!BCH_SUBVOLUME_SNAP(subvol.v)) { -+ u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot)); -+ u32 snapshot_tree; -+ struct bch_snapshot_tree st; -+ -+ rcu_read_lock(); -+ snapshot_tree = snapshot_t(c, snapshot_root)->tree; -+ rcu_read_unlock(); -+ -+ ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st); -+ -+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, -+ "%s: snapshot tree %u not found", __func__, snapshot_tree); -+ -+ if (ret) -+ return ret; -+ -+ if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, -+ c, subvol_not_master_and_not_snapshot, -+ "subvolume %llu is not set as snapshot but is not master subvolume", -+ k.k->p.offset)) { -+ struct bkey_i_subvolume *s = -+ bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); -+ ret = PTR_ERR_OR_ZERO(s); -+ if (ret) -+ return ret; -+ -+ SET_BCH_SUBVOLUME_SNAP(&s->v, true); -+ } -+ } -+ -+fsck_err: -+ return ret; -+} -+ -+int bch2_check_subvols(struct bch_fs *c) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret; -+ -+ ret = bch2_trans_run(c, -+ for_each_btree_key_commit(trans, iter, -+ BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, -+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, -+ check_subvol(trans, &iter, k))); -+ if (ret) -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+/* Subvolumes: */ -+ -+int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, struct printbuf *err) -+{ -+ int ret = 0; -+ -+ bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || -+ bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, -+ subvol_pos_bad, -+ "invalid pos"); -+fsck_err: -+ return ret; -+} -+ -+void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); -+ -+ prt_printf(out, "root %llu snapshot id %u", -+ le64_to_cpu(s.v->inode), -+ le32_to_cpu(s.v->snapshot)); -+ -+ if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent)) -+ prt_printf(out, " parent %u", le32_to_cpu(s.v->parent)); -+} -+ -+static __always_inline int -+bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol, -+ bool inconsistent_if_not_found, -+ int iter_flags, -+ struct bch_subvolume *s) -+{ -+ int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol), -+ iter_flags, subvolume, s); -+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) && -+ inconsistent_if_not_found, -+ trans->c, "missing subvolume %u", subvol); -+ return ret; -+} -+ -+int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol, -+ bool inconsistent_if_not_found, -+ int iter_flags, -+ struct bch_subvolume *s) -+{ -+ return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s); -+} -+ -+int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, -+ struct bch_subvolume *subvol) -+{ -+ struct bch_snapshot snap; -+ -+ return bch2_snapshot_lookup(trans, snapshot, &snap) ?: -+ bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); -+} -+ -+int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, -+ u32 *snapid) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c_subvolume subvol; -+ int ret; -+ -+ subvol = bch2_bkey_get_iter_typed(trans, &iter, -+ BTREE_ID_subvolumes, POS(0, subvolid), -+ BTREE_ITER_CACHED|BTREE_ITER_WITH_UPDATES, -+ subvolume); -+ ret = bkey_err(subvol); -+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, -+ "missing subvolume %u", subvolid); -+ -+ if (likely(!ret)) -+ *snapid = le32_to_cpu(subvol.v->snapshot); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int bch2_subvolume_reparent(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_s_c k, -+ u32 old_parent, u32 new_parent) -+{ -+ struct bkey_i_subvolume *s; -+ int ret; -+ -+ if (k.k->type != KEY_TYPE_subvolume) -+ return 0; -+ -+ if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) && -+ le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent) -+ return 0; -+ -+ s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); -+ ret = PTR_ERR_OR_ZERO(s); -+ if (ret) -+ return ret; -+ -+ s->v.parent = cpu_to_le32(new_parent); -+ return 0; -+} -+ -+/* -+ * Separate from the snapshot tree in the snapshots btree, we record the tree -+ * structure of how snapshot subvolumes were created - the parent subvolume of -+ * each snapshot subvolume. -+ * -+ * When a subvolume is deleted, we scan for child subvolumes and reparant them, -+ * to avoid dangling references: -+ */ -+static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_delete) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bch_subvolume s; -+ -+ return lockrestart_do(trans, -+ bch2_subvolume_get(trans, subvolid_to_delete, true, -+ BTREE_ITER_CACHED, &s)) ?: -+ for_each_btree_key_commit(trans, iter, -+ BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, -+ NULL, NULL, BTREE_INSERT_NOFAIL, -+ bch2_subvolume_reparent(trans, &iter, k, -+ subvolid_to_delete, le32_to_cpu(s.parent))); -+} -+ -+/* -+ * Delete subvolume, mark snapshot ID as deleted, queue up snapshot -+ * deletion/cleanup: -+ */ -+static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c_subvolume subvol; -+ u32 snapid; -+ int ret = 0; -+ -+ subvol = bch2_bkey_get_iter_typed(trans, &iter, -+ BTREE_ID_subvolumes, POS(0, subvolid), -+ BTREE_ITER_CACHED|BTREE_ITER_INTENT, -+ subvolume); -+ ret = bkey_err(subvol); -+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, -+ "missing subvolume %u", subvolid); -+ if (ret) -+ return ret; -+ -+ snapid = le32_to_cpu(subvol.v->snapshot); -+ -+ ret = bch2_btree_delete_at(trans, &iter, 0) ?: -+ bch2_snapshot_node_set_deleted(trans, snapid); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) -+{ -+ return bch2_subvolumes_reparent(trans, subvolid) ?: -+ commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, -+ __bch2_subvolume_delete(trans, subvolid)); -+} -+ -+static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) -+{ -+ struct bch_fs *c = container_of(work, struct bch_fs, -+ snapshot_wait_for_pagecache_and_delete_work); -+ snapshot_id_list s; -+ u32 *id; -+ int ret = 0; -+ -+ while (!ret) { -+ mutex_lock(&c->snapshots_unlinked_lock); -+ s = c->snapshots_unlinked; -+ darray_init(&c->snapshots_unlinked); -+ mutex_unlock(&c->snapshots_unlinked_lock); -+ -+ if (!s.nr) -+ break; -+ -+ bch2_evict_subvolume_inodes(c, &s); -+ -+ for (id = s.data; id < s.data + s.nr; id++) { -+ ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id)); -+ if (ret) { -+ bch_err_msg(c, ret, "deleting subvolume %u", *id); -+ break; -+ } -+ } -+ -+ darray_exit(&s); -+ } -+ -+ bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache); -+} -+ -+struct subvolume_unlink_hook { -+ struct btree_trans_commit_hook h; -+ u32 subvol; -+}; -+ -+static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans, -+ struct btree_trans_commit_hook *_h) -+{ -+ struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h); -+ struct bch_fs *c = trans->c; -+ int ret = 0; -+ -+ mutex_lock(&c->snapshots_unlinked_lock); -+ if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) -+ ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol); -+ mutex_unlock(&c->snapshots_unlinked_lock); -+ -+ if (ret) -+ return ret; -+ -+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache)) -+ return -EROFS; -+ -+ if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work)) -+ bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache); -+ return 0; -+} -+ -+int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) -+{ -+ struct btree_iter iter; -+ struct bkey_i_subvolume *n; -+ struct subvolume_unlink_hook *h; -+ int ret = 0; -+ -+ h = bch2_trans_kmalloc(trans, sizeof(*h)); -+ ret = PTR_ERR_OR_ZERO(h); -+ if (ret) -+ return ret; -+ -+ h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook; -+ h->subvol = subvolid; -+ bch2_trans_commit_hook(trans, &h->h); -+ -+ n = bch2_bkey_get_mut_typed(trans, &iter, -+ BTREE_ID_subvolumes, POS(0, subvolid), -+ BTREE_ITER_CACHED, subvolume); -+ ret = PTR_ERR_OR_ZERO(n); -+ if (unlikely(ret)) { -+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, -+ "missing subvolume %u", subvolid); -+ return ret; -+ } -+ -+ SET_BCH_SUBVOLUME_UNLINKED(&n->v, true); -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+int bch2_subvolume_create(struct btree_trans *trans, u64 inode, -+ u32 src_subvolid, -+ u32 *new_subvolid, -+ u32 *new_snapshotid, -+ bool ro) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL }; -+ struct bkey_i_subvolume *new_subvol = NULL; -+ struct bkey_i_subvolume *src_subvol = NULL; -+ u32 parent = 0, new_nodes[2], snapshot_subvols[2]; -+ int ret = 0; -+ -+ ret = bch2_bkey_get_empty_slot(trans, &dst_iter, -+ BTREE_ID_subvolumes, POS(0, U32_MAX)); -+ if (ret == -BCH_ERR_ENOSPC_btree_slot) -+ ret = -BCH_ERR_ENOSPC_subvolume_create; -+ if (ret) -+ return ret; -+ -+ snapshot_subvols[0] = dst_iter.pos.offset; -+ snapshot_subvols[1] = src_subvolid; -+ -+ if (src_subvolid) { -+ /* Creating a snapshot: */ -+ -+ src_subvol = bch2_bkey_get_mut_typed(trans, &src_iter, -+ BTREE_ID_subvolumes, POS(0, src_subvolid), -+ BTREE_ITER_CACHED, subvolume); -+ ret = PTR_ERR_OR_ZERO(src_subvol); -+ if (unlikely(ret)) { -+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, -+ "subvolume %u not found", src_subvolid); -+ goto err; -+ } -+ -+ parent = le32_to_cpu(src_subvol->v.snapshot); -+ } -+ -+ ret = bch2_snapshot_node_create(trans, parent, new_nodes, -+ snapshot_subvols, -+ src_subvolid ? 2 : 1); -+ if (ret) -+ goto err; -+ -+ if (src_subvolid) { -+ src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]); -+ ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0); -+ if (ret) -+ goto err; -+ } -+ -+ new_subvol = bch2_bkey_alloc(trans, &dst_iter, 0, subvolume); -+ ret = PTR_ERR_OR_ZERO(new_subvol); -+ if (ret) -+ goto err; -+ -+ new_subvol->v.flags = 0; -+ new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]); -+ new_subvol->v.inode = cpu_to_le64(inode); -+ new_subvol->v.parent = cpu_to_le32(src_subvolid); -+ new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c)); -+ new_subvol->v.otime.hi = 0; -+ -+ SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro); -+ SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0); -+ -+ *new_subvolid = new_subvol->k.p.offset; -+ *new_snapshotid = new_nodes[0]; -+err: -+ bch2_trans_iter_exit(trans, &src_iter); -+ bch2_trans_iter_exit(trans, &dst_iter); -+ return ret; -+} -+ -+int bch2_fs_subvolumes_init(struct bch_fs *c) -+{ -+ INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work); -+ INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work, -+ bch2_subvolume_wait_for_pagecache_and_delete); -+ mutex_init(&c->snapshots_unlinked_lock); -+ return 0; -+} -diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h -new file mode 100644 -index 000000000000..a1003d30ab0a ---- /dev/null -+++ b/fs/bcachefs/subvolume.h -@@ -0,0 +1,35 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_SUBVOLUME_H -+#define _BCACHEFS_SUBVOLUME_H -+ -+#include "darray.h" -+#include "subvolume_types.h" -+ -+enum bkey_invalid_flags; -+ -+int bch2_check_subvols(struct bch_fs *); -+ -+int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+ -+#define bch2_bkey_ops_subvolume ((struct bkey_ops) { \ -+ .key_invalid = bch2_subvolume_invalid, \ -+ .val_to_text = bch2_subvolume_to_text, \ -+ .min_val_size = 16, \ -+}) -+ -+int bch2_subvolume_get(struct btree_trans *, unsigned, -+ bool, int, struct bch_subvolume *); -+int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); -+ -+int bch2_delete_dead_snapshots(struct bch_fs *); -+void bch2_delete_dead_snapshots_async(struct bch_fs *); -+ -+int bch2_subvolume_unlink(struct btree_trans *, u32); -+int bch2_subvolume_create(struct btree_trans *, u64, u32, -+ u32 *, u32 *, bool); -+ -+int bch2_fs_subvolumes_init(struct bch_fs *); -+ -+#endif /* _BCACHEFS_SUBVOLUME_H */ -diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h -new file mode 100644 -index 000000000000..86833445af20 ---- /dev/null -+++ b/fs/bcachefs/subvolume_types.h -@@ -0,0 +1,31 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_SUBVOLUME_TYPES_H -+#define _BCACHEFS_SUBVOLUME_TYPES_H -+ -+#include "darray.h" -+ -+typedef DARRAY(u32) snapshot_id_list; -+ -+#define IS_ANCESTOR_BITMAP 128 -+ -+struct snapshot_t { -+ u32 parent; -+ u32 skip[3]; -+ u32 depth; -+ u32 children[2]; -+ u32 subvol; /* Nonzero only if a subvolume points to this node: */ -+ u32 tree; -+ u32 equiv; -+ unsigned long is_ancestor[BITS_TO_LONGS(IS_ANCESTOR_BITMAP)]; -+}; -+ -+struct snapshot_table { -+ struct snapshot_t s[0]; -+}; -+ -+typedef struct { -+ u32 subvol; -+ u64 inum; -+} subvol_inum; -+ -+#endif /* _BCACHEFS_SUBVOLUME_TYPES_H */ -diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c -new file mode 100644 -index 000000000000..f4cad903f4d6 ---- /dev/null -+++ b/fs/bcachefs/super-io.c -@@ -0,0 +1,1266 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "checksum.h" -+#include "counters.h" -+#include "disk_groups.h" -+#include "ec.h" -+#include "error.h" -+#include "journal.h" -+#include "journal_sb.h" -+#include "journal_seq_blacklist.h" -+#include "recovery.h" -+#include "replicas.h" -+#include "quota.h" -+#include "sb-clean.h" -+#include "sb-errors.h" -+#include "sb-members.h" -+#include "super-io.h" -+#include "super.h" -+#include "trace.h" -+#include "vstructs.h" -+ -+#include -+#include -+ -+static const struct blk_holder_ops bch2_sb_handle_bdev_ops = { -+}; -+ -+struct bch2_metadata_version { -+ u16 version; -+ const char *name; -+ u64 recovery_passes; -+}; -+ -+static const struct bch2_metadata_version bch2_metadata_versions[] = { -+#define x(n, v, _recovery_passes) { \ -+ .version = v, \ -+ .name = #n, \ -+ .recovery_passes = _recovery_passes, \ -+}, -+ BCH_METADATA_VERSIONS() -+#undef x -+}; -+ -+void bch2_version_to_text(struct printbuf *out, unsigned v) -+{ -+ const char *str = "(unknown version)"; -+ -+ for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++) -+ if (bch2_metadata_versions[i].version == v) { -+ str = bch2_metadata_versions[i].name; -+ break; -+ } -+ -+ prt_printf(out, "%u.%u: %s", BCH_VERSION_MAJOR(v), BCH_VERSION_MINOR(v), str); -+} -+ -+unsigned bch2_latest_compatible_version(unsigned v) -+{ -+ if (!BCH_VERSION_MAJOR(v)) -+ return v; -+ -+ for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++) -+ if (bch2_metadata_versions[i].version > v && -+ BCH_VERSION_MAJOR(bch2_metadata_versions[i].version) == -+ BCH_VERSION_MAJOR(v)) -+ v = bch2_metadata_versions[i].version; -+ -+ return v; -+} -+ -+u64 bch2_upgrade_recovery_passes(struct bch_fs *c, -+ unsigned old_version, -+ unsigned new_version) -+{ -+ u64 ret = 0; -+ -+ for (const struct bch2_metadata_version *i = bch2_metadata_versions; -+ i < bch2_metadata_versions + ARRAY_SIZE(bch2_metadata_versions); -+ i++) -+ if (i->version > old_version && i->version <= new_version) { -+ if (i->recovery_passes & RECOVERY_PASS_ALL_FSCK) -+ ret |= bch2_fsck_recovery_passes(); -+ ret |= i->recovery_passes; -+ } -+ -+ return ret &= ~RECOVERY_PASS_ALL_FSCK; -+} -+ -+const char * const bch2_sb_fields[] = { -+#define x(name, nr) #name, -+ BCH_SB_FIELDS() -+#undef x -+ NULL -+}; -+ -+static int bch2_sb_field_validate(struct bch_sb *, struct bch_sb_field *, -+ struct printbuf *); -+ -+struct bch_sb_field *bch2_sb_field_get_id(struct bch_sb *sb, -+ enum bch_sb_field_type type) -+{ -+ struct bch_sb_field *f; -+ -+ /* XXX: need locking around superblock to access optional fields */ -+ -+ vstruct_for_each(sb, f) -+ if (le32_to_cpu(f->type) == type) -+ return f; -+ return NULL; -+} -+ -+static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb, -+ struct bch_sb_field *f, -+ unsigned u64s) -+{ -+ unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0; -+ unsigned sb_u64s = le32_to_cpu(sb->sb->u64s) + u64s - old_u64s; -+ -+ BUG_ON(__vstruct_bytes(struct bch_sb, sb_u64s) > sb->buffer_size); -+ -+ if (!f && !u64s) { -+ /* nothing to do: */ -+ } else if (!f) { -+ f = vstruct_last(sb->sb); -+ memset(f, 0, sizeof(u64) * u64s); -+ f->u64s = cpu_to_le32(u64s); -+ f->type = 0; -+ } else { -+ void *src, *dst; -+ -+ src = vstruct_end(f); -+ -+ if (u64s) { -+ f->u64s = cpu_to_le32(u64s); -+ dst = vstruct_end(f); -+ } else { -+ dst = f; -+ } -+ -+ memmove(dst, src, vstruct_end(sb->sb) - src); -+ -+ if (dst > src) -+ memset(src, 0, dst - src); -+ } -+ -+ sb->sb->u64s = cpu_to_le32(sb_u64s); -+ -+ return u64s ? f : NULL; -+} -+ -+void bch2_sb_field_delete(struct bch_sb_handle *sb, -+ enum bch_sb_field_type type) -+{ -+ struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type); -+ -+ if (f) -+ __bch2_sb_field_resize(sb, f, 0); -+} -+ -+/* Superblock realloc/free: */ -+ -+void bch2_free_super(struct bch_sb_handle *sb) -+{ -+ kfree(sb->bio); -+ if (!IS_ERR_OR_NULL(sb->bdev)) -+ blkdev_put(sb->bdev, sb->holder); -+ kfree(sb->holder); -+ -+ kfree(sb->sb); -+ memset(sb, 0, sizeof(*sb)); -+} -+ -+int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) -+{ -+ size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s); -+ size_t new_buffer_size; -+ struct bch_sb *new_sb; -+ struct bio *bio; -+ -+ if (sb->bdev) -+ new_bytes = max_t(size_t, new_bytes, bdev_logical_block_size(sb->bdev)); -+ -+ new_buffer_size = roundup_pow_of_two(new_bytes); -+ -+ if (sb->sb && sb->buffer_size >= new_buffer_size) -+ return 0; -+ -+ if (sb->sb && sb->have_layout) { -+ u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits; -+ -+ if (new_bytes > max_bytes) { -+ pr_err("%pg: superblock too big: want %zu but have %llu", -+ sb->bdev, new_bytes, max_bytes); -+ return -BCH_ERR_ENOSPC_sb; -+ } -+ } -+ -+ if (sb->buffer_size >= new_buffer_size && sb->sb) -+ return 0; -+ -+ if (dynamic_fault("bcachefs:add:super_realloc")) -+ return -BCH_ERR_ENOMEM_sb_realloc_injected; -+ -+ new_sb = krealloc(sb->sb, new_buffer_size, GFP_NOFS|__GFP_ZERO); -+ if (!new_sb) -+ return -BCH_ERR_ENOMEM_sb_buf_realloc; -+ -+ sb->sb = new_sb; -+ -+ if (sb->have_bio) { -+ unsigned nr_bvecs = buf_pages(sb->sb, new_buffer_size); -+ -+ bio = bio_kmalloc(nr_bvecs, GFP_KERNEL); -+ if (!bio) -+ return -BCH_ERR_ENOMEM_sb_bio_realloc; -+ -+ bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0); -+ -+ kfree(sb->bio); -+ sb->bio = bio; -+ } -+ -+ sb->buffer_size = new_buffer_size; -+ -+ return 0; -+} -+ -+struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, -+ enum bch_sb_field_type type, -+ unsigned u64s) -+{ -+ struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type); -+ ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0; -+ ssize_t d = -old_u64s + u64s; -+ -+ if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) -+ return NULL; -+ -+ if (sb->fs_sb) { -+ struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb); -+ struct bch_dev *ca; -+ unsigned i; -+ -+ lockdep_assert_held(&c->sb_lock); -+ -+ /* XXX: we're not checking that offline device have enough space */ -+ -+ for_each_online_member(ca, c, i) { -+ struct bch_sb_handle *dev_sb = &ca->disk_sb; -+ -+ if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) { -+ percpu_ref_put(&ca->ref); -+ return NULL; -+ } -+ } -+ } -+ -+ f = bch2_sb_field_get_id(sb->sb, type); -+ f = __bch2_sb_field_resize(sb, f, u64s); -+ if (f) -+ f->type = cpu_to_le32(type); -+ return f; -+} -+ -+/* Superblock validate: */ -+ -+static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out) -+{ -+ u64 offset, prev_offset, max_sectors; -+ unsigned i; -+ -+ BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512); -+ -+ if (!uuid_equal(&layout->magic, &BCACHE_MAGIC) && -+ !uuid_equal(&layout->magic, &BCHFS_MAGIC)) { -+ prt_printf(out, "Not a bcachefs superblock layout"); -+ return -BCH_ERR_invalid_sb_layout; -+ } -+ -+ if (layout->layout_type != 0) { -+ prt_printf(out, "Invalid superblock layout type %u", -+ layout->layout_type); -+ return -BCH_ERR_invalid_sb_layout_type; -+ } -+ -+ if (!layout->nr_superblocks) { -+ prt_printf(out, "Invalid superblock layout: no superblocks"); -+ return -BCH_ERR_invalid_sb_layout_nr_superblocks; -+ } -+ -+ if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset)) { -+ prt_printf(out, "Invalid superblock layout: too many superblocks"); -+ return -BCH_ERR_invalid_sb_layout_nr_superblocks; -+ } -+ -+ max_sectors = 1 << layout->sb_max_size_bits; -+ -+ prev_offset = le64_to_cpu(layout->sb_offset[0]); -+ -+ for (i = 1; i < layout->nr_superblocks; i++) { -+ offset = le64_to_cpu(layout->sb_offset[i]); -+ -+ if (offset < prev_offset + max_sectors) { -+ prt_printf(out, "Invalid superblock layout: superblocks overlap\n" -+ " (sb %u ends at %llu next starts at %llu", -+ i - 1, prev_offset + max_sectors, offset); -+ return -BCH_ERR_invalid_sb_layout_superblocks_overlap; -+ } -+ prev_offset = offset; -+ } -+ -+ return 0; -+} -+ -+static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out) -+{ -+ u16 version = le16_to_cpu(sb->version); -+ u16 version_min = le16_to_cpu(sb->version_min); -+ -+ if (!bch2_version_compatible(version)) { -+ prt_str(out, "Unsupported superblock version "); -+ bch2_version_to_text(out, version); -+ prt_str(out, " (min "); -+ bch2_version_to_text(out, bcachefs_metadata_version_min); -+ prt_str(out, ", max "); -+ bch2_version_to_text(out, bcachefs_metadata_version_current); -+ prt_str(out, ")"); -+ return -BCH_ERR_invalid_sb_version; -+ } -+ -+ if (!bch2_version_compatible(version_min)) { -+ prt_str(out, "Unsupported superblock version_min "); -+ bch2_version_to_text(out, version_min); -+ prt_str(out, " (min "); -+ bch2_version_to_text(out, bcachefs_metadata_version_min); -+ prt_str(out, ", max "); -+ bch2_version_to_text(out, bcachefs_metadata_version_current); -+ prt_str(out, ")"); -+ return -BCH_ERR_invalid_sb_version; -+ } -+ -+ if (version_min > version) { -+ prt_str(out, "Bad minimum version "); -+ bch2_version_to_text(out, version_min); -+ prt_str(out, ", greater than version field "); -+ bch2_version_to_text(out, version); -+ return -BCH_ERR_invalid_sb_version; -+ } -+ -+ return 0; -+} -+ -+static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out, -+ int rw) -+{ -+ struct bch_sb *sb = disk_sb->sb; -+ struct bch_sb_field *f; -+ struct bch_sb_field_members_v1 *mi; -+ enum bch_opt_id opt_id; -+ u16 block_size; -+ int ret; -+ -+ ret = bch2_sb_compatible(sb, out); -+ if (ret) -+ return ret; -+ -+ if (sb->features[1] || -+ (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) { -+ prt_printf(out, "Filesystem has incompatible features"); -+ return -BCH_ERR_invalid_sb_features; -+ } -+ -+ block_size = le16_to_cpu(sb->block_size); -+ -+ if (block_size > PAGE_SECTORS) { -+ prt_printf(out, "Block size too big (got %u, max %u)", -+ block_size, PAGE_SECTORS); -+ return -BCH_ERR_invalid_sb_block_size; -+ } -+ -+ if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) { -+ prt_printf(out, "Bad user UUID (got zeroes)"); -+ return -BCH_ERR_invalid_sb_uuid; -+ } -+ -+ if (bch2_is_zero(sb->uuid.b, sizeof(sb->uuid))) { -+ prt_printf(out, "Bad internal UUID (got zeroes)"); -+ return -BCH_ERR_invalid_sb_uuid; -+ } -+ -+ if (!sb->nr_devices || -+ sb->nr_devices > BCH_SB_MEMBERS_MAX) { -+ prt_printf(out, "Bad number of member devices %u (max %u)", -+ sb->nr_devices, BCH_SB_MEMBERS_MAX); -+ return -BCH_ERR_invalid_sb_too_many_members; -+ } -+ -+ if (sb->dev_idx >= sb->nr_devices) { -+ prt_printf(out, "Bad dev_idx (got %u, nr_devices %u)", -+ sb->dev_idx, sb->nr_devices); -+ return -BCH_ERR_invalid_sb_dev_idx; -+ } -+ -+ if (!sb->time_precision || -+ le32_to_cpu(sb->time_precision) > NSEC_PER_SEC) { -+ prt_printf(out, "Invalid time precision: %u (min 1, max %lu)", -+ le32_to_cpu(sb->time_precision), NSEC_PER_SEC); -+ return -BCH_ERR_invalid_sb_time_precision; -+ } -+ -+ if (rw == READ) { -+ /* -+ * Been seeing a bug where these are getting inexplicably -+ * zeroed, so we're now validating them, but we have to be -+ * careful not to preven people's filesystems from mounting: -+ */ -+ if (!BCH_SB_JOURNAL_FLUSH_DELAY(sb)) -+ SET_BCH_SB_JOURNAL_FLUSH_DELAY(sb, 1000); -+ if (!BCH_SB_JOURNAL_RECLAIM_DELAY(sb)) -+ SET_BCH_SB_JOURNAL_RECLAIM_DELAY(sb, 1000); -+ -+ if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb)) -+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version)); -+ } -+ -+ for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { -+ const struct bch_option *opt = bch2_opt_table + opt_id; -+ -+ if (opt->get_sb != BCH2_NO_SB_OPT) { -+ u64 v = bch2_opt_from_sb(sb, opt_id); -+ -+ prt_printf(out, "Invalid option "); -+ ret = bch2_opt_validate(opt, v, out); -+ if (ret) -+ return ret; -+ -+ printbuf_reset(out); -+ } -+ } -+ -+ /* validate layout */ -+ ret = validate_sb_layout(&sb->layout, out); -+ if (ret) -+ return ret; -+ -+ vstruct_for_each(sb, f) { -+ if (!f->u64s) { -+ prt_printf(out, "Invalid superblock: optional field with size 0 (type %u)", -+ le32_to_cpu(f->type)); -+ return -BCH_ERR_invalid_sb_field_size; -+ } -+ -+ if (vstruct_next(f) > vstruct_last(sb)) { -+ prt_printf(out, "Invalid superblock: optional field extends past end of superblock (type %u)", -+ le32_to_cpu(f->type)); -+ return -BCH_ERR_invalid_sb_field_size; -+ } -+ } -+ -+ /* members must be validated first: */ -+ mi = bch2_sb_field_get(sb, members_v1); -+ if (!mi) { -+ prt_printf(out, "Invalid superblock: member info area missing"); -+ return -BCH_ERR_invalid_sb_members_missing; -+ } -+ -+ ret = bch2_sb_field_validate(sb, &mi->field, out); -+ if (ret) -+ return ret; -+ -+ vstruct_for_each(sb, f) { -+ if (le32_to_cpu(f->type) == BCH_SB_FIELD_members_v1) -+ continue; -+ -+ ret = bch2_sb_field_validate(sb, f, out); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+/* device open: */ -+ -+static void bch2_sb_update(struct bch_fs *c) -+{ -+ struct bch_sb *src = c->disk_sb.sb; -+ struct bch_dev *ca; -+ unsigned i; -+ -+ lockdep_assert_held(&c->sb_lock); -+ -+ c->sb.uuid = src->uuid; -+ c->sb.user_uuid = src->user_uuid; -+ c->sb.version = le16_to_cpu(src->version); -+ c->sb.version_min = le16_to_cpu(src->version_min); -+ c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src); -+ c->sb.nr_devices = src->nr_devices; -+ c->sb.clean = BCH_SB_CLEAN(src); -+ c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src); -+ -+ c->sb.nsec_per_time_unit = le32_to_cpu(src->time_precision); -+ c->sb.time_units_per_sec = NSEC_PER_SEC / c->sb.nsec_per_time_unit; -+ -+ /* XXX this is wrong, we need a 96 or 128 bit integer type */ -+ c->sb.time_base_lo = div_u64(le64_to_cpu(src->time_base_lo), -+ c->sb.nsec_per_time_unit); -+ c->sb.time_base_hi = le32_to_cpu(src->time_base_hi); -+ -+ c->sb.features = le64_to_cpu(src->features[0]); -+ c->sb.compat = le64_to_cpu(src->compat[0]); -+ -+ for_each_member_device(ca, c, i) { -+ struct bch_member m = bch2_sb_member_get(src, i); -+ ca->mi = bch2_mi_to_cpu(&m); -+ } -+} -+ -+static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) -+{ -+ struct bch_sb_field *src_f, *dst_f; -+ struct bch_sb *dst = dst_handle->sb; -+ unsigned i; -+ -+ dst->version = src->version; -+ dst->version_min = src->version_min; -+ dst->seq = src->seq; -+ dst->uuid = src->uuid; -+ dst->user_uuid = src->user_uuid; -+ memcpy(dst->label, src->label, sizeof(dst->label)); -+ -+ dst->block_size = src->block_size; -+ dst->nr_devices = src->nr_devices; -+ -+ dst->time_base_lo = src->time_base_lo; -+ dst->time_base_hi = src->time_base_hi; -+ dst->time_precision = src->time_precision; -+ -+ memcpy(dst->flags, src->flags, sizeof(dst->flags)); -+ memcpy(dst->features, src->features, sizeof(dst->features)); -+ memcpy(dst->compat, src->compat, sizeof(dst->compat)); -+ -+ for (i = 0; i < BCH_SB_FIELD_NR; i++) { -+ int d; -+ -+ if ((1U << i) & BCH_SINGLE_DEVICE_SB_FIELDS) -+ continue; -+ -+ src_f = bch2_sb_field_get_id(src, i); -+ dst_f = bch2_sb_field_get_id(dst, i); -+ -+ d = (src_f ? le32_to_cpu(src_f->u64s) : 0) - -+ (dst_f ? le32_to_cpu(dst_f->u64s) : 0); -+ if (d > 0) { -+ int ret = bch2_sb_realloc(dst_handle, -+ le32_to_cpu(dst_handle->sb->u64s) + d); -+ -+ if (ret) -+ return ret; -+ -+ dst = dst_handle->sb; -+ dst_f = bch2_sb_field_get_id(dst, i); -+ } -+ -+ dst_f = __bch2_sb_field_resize(dst_handle, dst_f, -+ src_f ? le32_to_cpu(src_f->u64s) : 0); -+ -+ if (src_f) -+ memcpy(dst_f, src_f, vstruct_bytes(src_f)); -+ } -+ -+ return 0; -+} -+ -+int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src) -+{ -+ int ret; -+ -+ lockdep_assert_held(&c->sb_lock); -+ -+ ret = bch2_sb_realloc(&c->disk_sb, 0) ?: -+ __copy_super(&c->disk_sb, src) ?: -+ bch2_sb_replicas_to_cpu_replicas(c) ?: -+ bch2_sb_disk_groups_to_cpu(c); -+ if (ret) -+ return ret; -+ -+ bch2_sb_update(c); -+ return 0; -+} -+ -+int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca) -+{ -+ return __copy_super(&ca->disk_sb, c->disk_sb.sb); -+} -+ -+/* read superblock: */ -+ -+static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err) -+{ -+ struct bch_csum csum; -+ size_t bytes; -+ int ret; -+reread: -+ bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META); -+ sb->bio->bi_iter.bi_sector = offset; -+ bch2_bio_map(sb->bio, sb->sb, sb->buffer_size); -+ -+ ret = submit_bio_wait(sb->bio); -+ if (ret) { -+ prt_printf(err, "IO error: %i", ret); -+ return ret; -+ } -+ -+ if (!uuid_equal(&sb->sb->magic, &BCACHE_MAGIC) && -+ !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC)) { -+ prt_printf(err, "Not a bcachefs superblock"); -+ return -BCH_ERR_invalid_sb_magic; -+ } -+ -+ ret = bch2_sb_compatible(sb->sb, err); -+ if (ret) -+ return ret; -+ -+ bytes = vstruct_bytes(sb->sb); -+ -+ if (bytes > 512 << sb->sb->layout.sb_max_size_bits) { -+ prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)", -+ bytes, 512UL << sb->sb->layout.sb_max_size_bits); -+ return -BCH_ERR_invalid_sb_too_big; -+ } -+ -+ if (bytes > sb->buffer_size) { -+ ret = bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s)); -+ if (ret) -+ return ret; -+ goto reread; -+ } -+ -+ if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) { -+ prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb)); -+ return -BCH_ERR_invalid_sb_csum_type; -+ } -+ -+ /* XXX: verify MACs */ -+ csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb), -+ null_nonce(), sb->sb); -+ -+ if (bch2_crc_cmp(csum, sb->sb->csum)) { -+ prt_printf(err, "bad checksum"); -+ return -BCH_ERR_invalid_sb_csum; -+ } -+ -+ sb->seq = le64_to_cpu(sb->sb->seq); -+ -+ return 0; -+} -+ -+int bch2_read_super(const char *path, struct bch_opts *opts, -+ struct bch_sb_handle *sb) -+{ -+ u64 offset = opt_get(*opts, sb); -+ struct bch_sb_layout layout; -+ struct printbuf err = PRINTBUF; -+ __le64 *i; -+ int ret; -+#ifndef __KERNEL__ -+retry: -+#endif -+ memset(sb, 0, sizeof(*sb)); -+ sb->mode = BLK_OPEN_READ; -+ sb->have_bio = true; -+ sb->holder = kmalloc(1, GFP_KERNEL); -+ if (!sb->holder) -+ return -ENOMEM; -+ -+#ifndef __KERNEL__ -+ if (opt_get(*opts, direct_io) == false) -+ sb->mode |= BLK_OPEN_BUFFERED; -+#endif -+ -+ if (!opt_get(*opts, noexcl)) -+ sb->mode |= BLK_OPEN_EXCL; -+ -+ if (!opt_get(*opts, nochanges)) -+ sb->mode |= BLK_OPEN_WRITE; -+ -+ sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops); -+ if (IS_ERR(sb->bdev) && -+ PTR_ERR(sb->bdev) == -EACCES && -+ opt_get(*opts, read_only)) { -+ sb->mode &= ~BLK_OPEN_WRITE; -+ -+ sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops); -+ if (!IS_ERR(sb->bdev)) -+ opt_set(*opts, nochanges, true); -+ } -+ -+ if (IS_ERR(sb->bdev)) { -+ ret = PTR_ERR(sb->bdev); -+ goto out; -+ } -+ -+ ret = bch2_sb_realloc(sb, 0); -+ if (ret) { -+ prt_printf(&err, "error allocating memory for superblock"); -+ goto err; -+ } -+ -+ if (bch2_fs_init_fault("read_super")) { -+ prt_printf(&err, "dynamic fault"); -+ ret = -EFAULT; -+ goto err; -+ } -+ -+ ret = read_one_super(sb, offset, &err); -+ if (!ret) -+ goto got_super; -+ -+ if (opt_defined(*opts, sb)) -+ goto err; -+ -+ printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s\n", -+ path, err.buf); -+ printbuf_reset(&err); -+ -+ /* -+ * Error reading primary superblock - read location of backup -+ * superblocks: -+ */ -+ bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META); -+ sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR; -+ /* -+ * use sb buffer to read layout, since sb buffer is page aligned but -+ * layout won't be: -+ */ -+ bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout)); -+ -+ ret = submit_bio_wait(sb->bio); -+ if (ret) { -+ prt_printf(&err, "IO error: %i", ret); -+ goto err; -+ } -+ -+ memcpy(&layout, sb->sb, sizeof(layout)); -+ ret = validate_sb_layout(&layout, &err); -+ if (ret) -+ goto err; -+ -+ for (i = layout.sb_offset; -+ i < layout.sb_offset + layout.nr_superblocks; i++) { -+ offset = le64_to_cpu(*i); -+ -+ if (offset == opt_get(*opts, sb)) -+ continue; -+ -+ ret = read_one_super(sb, offset, &err); -+ if (!ret) -+ goto got_super; -+ } -+ -+ goto err; -+ -+got_super: -+ if (le16_to_cpu(sb->sb->block_size) << 9 < -+ bdev_logical_block_size(sb->bdev) && -+ opt_get(*opts, direct_io)) { -+#ifndef __KERNEL__ -+ opt_set(*opts, direct_io, false); -+ bch2_free_super(sb); -+ goto retry; -+#endif -+ prt_printf(&err, "block size (%u) smaller than device block size (%u)", -+ le16_to_cpu(sb->sb->block_size) << 9, -+ bdev_logical_block_size(sb->bdev)); -+ ret = -BCH_ERR_block_size_too_small; -+ goto err; -+ } -+ -+ ret = 0; -+ sb->have_layout = true; -+ -+ ret = bch2_sb_validate(sb, &err, READ); -+ if (ret) { -+ printk(KERN_ERR "bcachefs (%s): error validating superblock: %s\n", -+ path, err.buf); -+ goto err_no_print; -+ } -+out: -+ printbuf_exit(&err); -+ return ret; -+err: -+ printk(KERN_ERR "bcachefs (%s): error reading superblock: %s\n", -+ path, err.buf); -+err_no_print: -+ bch2_free_super(sb); -+ goto out; -+} -+ -+/* write superblock: */ -+ -+static void write_super_endio(struct bio *bio) -+{ -+ struct bch_dev *ca = bio->bi_private; -+ -+ /* XXX: return errors directly */ -+ -+ if (bch2_dev_io_err_on(bio->bi_status, ca, -+ bio_data_dir(bio) -+ ? BCH_MEMBER_ERROR_write -+ : BCH_MEMBER_ERROR_read, -+ "superblock %s error: %s", -+ bio_data_dir(bio) ? "write" : "read", -+ bch2_blk_status_to_str(bio->bi_status))) -+ ca->sb_write_error = 1; -+ -+ closure_put(&ca->fs->sb_write); -+ percpu_ref_put(&ca->io_ref); -+} -+ -+static void read_back_super(struct bch_fs *c, struct bch_dev *ca) -+{ -+ struct bch_sb *sb = ca->disk_sb.sb; -+ struct bio *bio = ca->disk_sb.bio; -+ -+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META); -+ bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]); -+ bio->bi_end_io = write_super_endio; -+ bio->bi_private = ca; -+ bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE); -+ -+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], -+ bio_sectors(bio)); -+ -+ percpu_ref_get(&ca->io_ref); -+ closure_bio_submit(bio, &c->sb_write); -+} -+ -+static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) -+{ -+ struct bch_sb *sb = ca->disk_sb.sb; -+ struct bio *bio = ca->disk_sb.bio; -+ -+ sb->offset = sb->layout.sb_offset[idx]; -+ -+ SET_BCH_SB_CSUM_TYPE(sb, bch2_csum_opt_to_type(c->opts.metadata_checksum, false)); -+ sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb), -+ null_nonce(), sb); -+ -+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META); -+ bio->bi_iter.bi_sector = le64_to_cpu(sb->offset); -+ bio->bi_end_io = write_super_endio; -+ bio->bi_private = ca; -+ bch2_bio_map(bio, sb, -+ roundup((size_t) vstruct_bytes(sb), -+ bdev_logical_block_size(ca->disk_sb.bdev))); -+ -+ this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb], -+ bio_sectors(bio)); -+ -+ percpu_ref_get(&ca->io_ref); -+ closure_bio_submit(bio, &c->sb_write); -+} -+ -+int bch2_write_super(struct bch_fs *c) -+{ -+ struct closure *cl = &c->sb_write; -+ struct bch_dev *ca; -+ struct printbuf err = PRINTBUF; -+ unsigned i, sb = 0, nr_wrote; -+ struct bch_devs_mask sb_written; -+ bool wrote, can_mount_without_written, can_mount_with_written; -+ unsigned degraded_flags = BCH_FORCE_IF_DEGRADED; -+ int ret = 0; -+ -+ trace_and_count(c, write_super, c, _RET_IP_); -+ -+ if (c->opts.very_degraded) -+ degraded_flags |= BCH_FORCE_IF_LOST; -+ -+ lockdep_assert_held(&c->sb_lock); -+ -+ closure_init_stack(cl); -+ memset(&sb_written, 0, sizeof(sb_written)); -+ -+ /* Make sure we're using the new magic numbers: */ -+ c->disk_sb.sb->magic = BCHFS_MAGIC; -+ c->disk_sb.sb->layout.magic = BCHFS_MAGIC; -+ -+ le64_add_cpu(&c->disk_sb.sb->seq, 1); -+ -+ if (test_bit(BCH_FS_ERROR, &c->flags)) -+ SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1); -+ if (test_bit(BCH_FS_TOPOLOGY_ERROR, &c->flags)) -+ SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 1); -+ -+ SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN); -+ -+ bch2_sb_counters_from_cpu(c); -+ bch2_sb_members_from_cpu(c); -+ bch2_sb_members_cpy_v2_v1(&c->disk_sb); -+ bch2_sb_errors_from_cpu(c); -+ -+ for_each_online_member(ca, c, i) -+ bch2_sb_from_fs(c, ca); -+ -+ for_each_online_member(ca, c, i) { -+ printbuf_reset(&err); -+ -+ ret = bch2_sb_validate(&ca->disk_sb, &err, WRITE); -+ if (ret) { -+ bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf); -+ percpu_ref_put(&ca->io_ref); -+ goto out; -+ } -+ } -+ -+ if (c->opts.nochanges) -+ goto out; -+ -+ /* -+ * Defer writing the superblock until filesystem initialization is -+ * complete - don't write out a partly initialized superblock: -+ */ -+ if (!BCH_SB_INITIALIZED(c->disk_sb.sb)) -+ goto out; -+ -+ for_each_online_member(ca, c, i) { -+ __set_bit(ca->dev_idx, sb_written.d); -+ ca->sb_write_error = 0; -+ } -+ -+ for_each_online_member(ca, c, i) -+ read_back_super(c, ca); -+ closure_sync(cl); -+ -+ for_each_online_member(ca, c, i) { -+ if (ca->sb_write_error) -+ continue; -+ -+ if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) { -+ bch2_fs_fatal_error(c, -+ "Superblock write was silently dropped! (seq %llu expected %llu)", -+ le64_to_cpu(ca->sb_read_scratch->seq), -+ ca->disk_sb.seq); -+ percpu_ref_put(&ca->io_ref); -+ ret = -BCH_ERR_erofs_sb_err; -+ goto out; -+ } -+ -+ if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) { -+ bch2_fs_fatal_error(c, -+ "Superblock modified by another process (seq %llu expected %llu)", -+ le64_to_cpu(ca->sb_read_scratch->seq), -+ ca->disk_sb.seq); -+ percpu_ref_put(&ca->io_ref); -+ ret = -BCH_ERR_erofs_sb_err; -+ goto out; -+ } -+ } -+ -+ do { -+ wrote = false; -+ for_each_online_member(ca, c, i) -+ if (!ca->sb_write_error && -+ sb < ca->disk_sb.sb->layout.nr_superblocks) { -+ write_one_super(c, ca, sb); -+ wrote = true; -+ } -+ closure_sync(cl); -+ sb++; -+ } while (wrote); -+ -+ for_each_online_member(ca, c, i) { -+ if (ca->sb_write_error) -+ __clear_bit(ca->dev_idx, sb_written.d); -+ else -+ ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq); -+ } -+ -+ nr_wrote = dev_mask_nr(&sb_written); -+ -+ can_mount_with_written = -+ bch2_have_enough_devs(c, sb_written, degraded_flags, false); -+ -+ for (i = 0; i < ARRAY_SIZE(sb_written.d); i++) -+ sb_written.d[i] = ~sb_written.d[i]; -+ -+ can_mount_without_written = -+ bch2_have_enough_devs(c, sb_written, degraded_flags, false); -+ -+ /* -+ * If we would be able to mount _without_ the devices we successfully -+ * wrote superblocks to, we weren't able to write to enough devices: -+ * -+ * Exception: if we can mount without the successes because we haven't -+ * written anything (new filesystem), we continue if we'd be able to -+ * mount with the devices we did successfully write to: -+ */ -+ if (bch2_fs_fatal_err_on(!nr_wrote || -+ !can_mount_with_written || -+ (can_mount_without_written && -+ !can_mount_with_written), c, -+ "Unable to write superblock to sufficient devices (from %ps)", -+ (void *) _RET_IP_)) -+ ret = -1; -+out: -+ /* Make new options visible after they're persistent: */ -+ bch2_sb_update(c); -+ printbuf_exit(&err); -+ return ret; -+} -+ -+void __bch2_check_set_feature(struct bch_fs *c, unsigned feat) -+{ -+ mutex_lock(&c->sb_lock); -+ if (!(c->sb.features & (1ULL << feat))) { -+ c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat); -+ -+ bch2_write_super(c); -+ } -+ mutex_unlock(&c->sb_lock); -+} -+ -+/* Downgrade if superblock is at a higher version than currently supported: */ -+void bch2_sb_maybe_downgrade(struct bch_fs *c) -+{ -+ lockdep_assert_held(&c->sb_lock); -+ -+ /* -+ * Downgrade, if superblock is at a higher version than currently -+ * supported: -+ */ -+ if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) -+ SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); -+ if (c->sb.version > bcachefs_metadata_version_current) -+ c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); -+ if (c->sb.version_min > bcachefs_metadata_version_current) -+ c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current); -+ c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1); -+} -+ -+void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) -+{ -+ lockdep_assert_held(&c->sb_lock); -+ -+ c->disk_sb.sb->version = cpu_to_le16(new_version); -+ c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); -+} -+ -+static const struct bch_sb_field_ops *bch2_sb_field_ops[] = { -+#define x(f, nr) \ -+ [BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f, -+ BCH_SB_FIELDS() -+#undef x -+}; -+ -+static const struct bch_sb_field_ops bch2_sb_field_null_ops; -+ -+static const struct bch_sb_field_ops *bch2_sb_field_type_ops(unsigned type) -+{ -+ return likely(type < ARRAY_SIZE(bch2_sb_field_ops)) -+ ? bch2_sb_field_ops[type] -+ : &bch2_sb_field_null_ops; -+} -+ -+static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, -+ struct printbuf *err) -+{ -+ unsigned type = le32_to_cpu(f->type); -+ struct printbuf field_err = PRINTBUF; -+ const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); -+ int ret; -+ -+ ret = ops->validate ? ops->validate(sb, f, &field_err) : 0; -+ if (ret) { -+ prt_printf(err, "Invalid superblock section %s: %s", -+ bch2_sb_fields[type], field_err.buf); -+ prt_newline(err); -+ bch2_sb_field_to_text(err, sb, f); -+ } -+ -+ printbuf_exit(&field_err); -+ return ret; -+} -+ -+void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, -+ struct bch_sb_field *f) -+{ -+ unsigned type = le32_to_cpu(f->type); -+ const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); -+ -+ if (!out->nr_tabstops) -+ printbuf_tabstop_push(out, 32); -+ -+ if (type < BCH_SB_FIELD_NR) -+ prt_printf(out, "%s", bch2_sb_fields[type]); -+ else -+ prt_printf(out, "(unknown field %u)", type); -+ -+ prt_printf(out, " (size %zu):", vstruct_bytes(f)); -+ prt_newline(out); -+ -+ if (ops->to_text) { -+ printbuf_indent_add(out, 2); -+ ops->to_text(out, sb, f); -+ printbuf_indent_sub(out, 2); -+ } -+} -+ -+void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l) -+{ -+ unsigned i; -+ -+ prt_printf(out, "Type: %u", l->layout_type); -+ prt_newline(out); -+ -+ prt_str(out, "Superblock max size: "); -+ prt_units_u64(out, 512 << l->sb_max_size_bits); -+ prt_newline(out); -+ -+ prt_printf(out, "Nr superblocks: %u", l->nr_superblocks); -+ prt_newline(out); -+ -+ prt_str(out, "Offsets: "); -+ for (i = 0; i < l->nr_superblocks; i++) { -+ if (i) -+ prt_str(out, ", "); -+ prt_printf(out, "%llu", le64_to_cpu(l->sb_offset[i])); -+ } -+ prt_newline(out); -+} -+ -+void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, -+ bool print_layout, unsigned fields) -+{ -+ struct bch_sb_field *f; -+ u64 fields_have = 0; -+ unsigned nr_devices = 0; -+ -+ if (!out->nr_tabstops) -+ printbuf_tabstop_push(out, 44); -+ -+ for (int i = 0; i < sb->nr_devices; i++) -+ nr_devices += bch2_dev_exists(sb, i); -+ -+ prt_printf(out, "External UUID:"); -+ prt_tab(out); -+ pr_uuid(out, sb->user_uuid.b); -+ prt_newline(out); -+ -+ prt_printf(out, "Internal UUID:"); -+ prt_tab(out); -+ pr_uuid(out, sb->uuid.b); -+ prt_newline(out); -+ -+ prt_str(out, "Device index:"); -+ prt_tab(out); -+ prt_printf(out, "%u", sb->dev_idx); -+ prt_newline(out); -+ -+ prt_str(out, "Label:"); -+ prt_tab(out); -+ prt_printf(out, "%.*s", (int) sizeof(sb->label), sb->label); -+ prt_newline(out); -+ -+ prt_str(out, "Version:"); -+ prt_tab(out); -+ bch2_version_to_text(out, le16_to_cpu(sb->version)); -+ prt_newline(out); -+ -+ prt_str(out, "Version upgrade complete:"); -+ prt_tab(out); -+ bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb)); -+ prt_newline(out); -+ -+ prt_printf(out, "Oldest version on disk:"); -+ prt_tab(out); -+ bch2_version_to_text(out, le16_to_cpu(sb->version_min)); -+ prt_newline(out); -+ -+ prt_printf(out, "Created:"); -+ prt_tab(out); -+ if (sb->time_base_lo) -+ bch2_prt_datetime(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC)); -+ else -+ prt_printf(out, "(not set)"); -+ prt_newline(out); -+ -+ prt_printf(out, "Sequence number:"); -+ prt_tab(out); -+ prt_printf(out, "%llu", le64_to_cpu(sb->seq)); -+ prt_newline(out); -+ -+ prt_printf(out, "Superblock size:"); -+ prt_tab(out); -+ prt_printf(out, "%zu", vstruct_bytes(sb)); -+ prt_newline(out); -+ -+ prt_printf(out, "Clean:"); -+ prt_tab(out); -+ prt_printf(out, "%llu", BCH_SB_CLEAN(sb)); -+ prt_newline(out); -+ -+ prt_printf(out, "Devices:"); -+ prt_tab(out); -+ prt_printf(out, "%u", nr_devices); -+ prt_newline(out); -+ -+ prt_printf(out, "Sections:"); -+ vstruct_for_each(sb, f) -+ fields_have |= 1 << le32_to_cpu(f->type); -+ prt_tab(out); -+ prt_bitflags(out, bch2_sb_fields, fields_have); -+ prt_newline(out); -+ -+ prt_printf(out, "Features:"); -+ prt_tab(out); -+ prt_bitflags(out, bch2_sb_features, le64_to_cpu(sb->features[0])); -+ prt_newline(out); -+ -+ prt_printf(out, "Compat features:"); -+ prt_tab(out); -+ prt_bitflags(out, bch2_sb_compat, le64_to_cpu(sb->compat[0])); -+ prt_newline(out); -+ -+ prt_newline(out); -+ prt_printf(out, "Options:"); -+ prt_newline(out); -+ printbuf_indent_add(out, 2); -+ { -+ enum bch_opt_id id; -+ -+ for (id = 0; id < bch2_opts_nr; id++) { -+ const struct bch_option *opt = bch2_opt_table + id; -+ -+ if (opt->get_sb != BCH2_NO_SB_OPT) { -+ u64 v = bch2_opt_from_sb(sb, id); -+ -+ prt_printf(out, "%s:", opt->attr.name); -+ prt_tab(out); -+ bch2_opt_to_text(out, NULL, sb, opt, v, -+ OPT_HUMAN_READABLE|OPT_SHOW_FULL_LIST); -+ prt_newline(out); -+ } -+ } -+ } -+ -+ printbuf_indent_sub(out, 2); -+ -+ if (print_layout) { -+ prt_newline(out); -+ prt_printf(out, "layout:"); -+ prt_newline(out); -+ printbuf_indent_add(out, 2); -+ bch2_sb_layout_to_text(out, &sb->layout); -+ printbuf_indent_sub(out, 2); -+ } -+ -+ vstruct_for_each(sb, f) -+ if (fields & (1 << le32_to_cpu(f->type))) { -+ prt_newline(out); -+ bch2_sb_field_to_text(out, sb, f); -+ } -+} -diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h -new file mode 100644 -index 000000000000..f5abd102bff7 ---- /dev/null -+++ b/fs/bcachefs/super-io.h -@@ -0,0 +1,94 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_SUPER_IO_H -+#define _BCACHEFS_SUPER_IO_H -+ -+#include "extents.h" -+#include "eytzinger.h" -+#include "super_types.h" -+#include "super.h" -+#include "sb-members.h" -+ -+#include -+ -+static inline bool bch2_version_compatible(u16 version) -+{ -+ return BCH_VERSION_MAJOR(version) <= BCH_VERSION_MAJOR(bcachefs_metadata_version_current) && -+ version >= bcachefs_metadata_version_min; -+} -+ -+void bch2_version_to_text(struct printbuf *, unsigned); -+unsigned bch2_latest_compatible_version(unsigned); -+ -+u64 bch2_upgrade_recovery_passes(struct bch_fs *c, -+ unsigned, -+ unsigned); -+ -+static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f) -+{ -+ return le32_to_cpu(f->u64s) * sizeof(u64); -+} -+ -+#define field_to_type(_f, _name) \ -+ container_of_or_null(_f, struct bch_sb_field_##_name, field) -+ -+struct bch_sb_field *bch2_sb_field_get_id(struct bch_sb *, enum bch_sb_field_type); -+#define bch2_sb_field_get(_sb, _name) \ -+ field_to_type(bch2_sb_field_get_id(_sb, BCH_SB_FIELD_##_name), _name) -+ -+struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *, -+ enum bch_sb_field_type, unsigned); -+#define bch2_sb_field_resize(_sb, _name, _u64s) \ -+ field_to_type(bch2_sb_field_resize_id(_sb, BCH_SB_FIELD_##_name, _u64s), _name) -+ -+void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type); -+ -+extern const char * const bch2_sb_fields[]; -+ -+struct bch_sb_field_ops { -+ int (*validate)(struct bch_sb *, struct bch_sb_field *, struct printbuf *); -+ void (*to_text)(struct printbuf *, struct bch_sb *, struct bch_sb_field *); -+}; -+ -+static inline __le64 bch2_sb_magic(struct bch_fs *c) -+{ -+ __le64 ret; -+ -+ memcpy(&ret, &c->sb.uuid, sizeof(ret)); -+ return ret; -+} -+ -+static inline __u64 jset_magic(struct bch_fs *c) -+{ -+ return __le64_to_cpu(bch2_sb_magic(c) ^ JSET_MAGIC); -+} -+ -+static inline __u64 bset_magic(struct bch_fs *c) -+{ -+ return __le64_to_cpu(bch2_sb_magic(c) ^ BSET_MAGIC); -+} -+ -+int bch2_sb_to_fs(struct bch_fs *, struct bch_sb *); -+int bch2_sb_from_fs(struct bch_fs *, struct bch_dev *); -+ -+void bch2_free_super(struct bch_sb_handle *); -+int bch2_sb_realloc(struct bch_sb_handle *, unsigned); -+ -+int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *); -+int bch2_write_super(struct bch_fs *); -+void __bch2_check_set_feature(struct bch_fs *, unsigned); -+ -+static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat) -+{ -+ if (!(c->sb.features & (1ULL << feat))) -+ __bch2_check_set_feature(c, feat); -+} -+ -+void bch2_sb_maybe_downgrade(struct bch_fs *); -+void bch2_sb_upgrade(struct bch_fs *, unsigned); -+ -+void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, -+ struct bch_sb_field *); -+void bch2_sb_layout_to_text(struct printbuf *, struct bch_sb_layout *); -+void bch2_sb_to_text(struct printbuf *, struct bch_sb *, bool, unsigned); -+ -+#endif /* _BCACHEFS_SUPER_IO_H */ -diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c -new file mode 100644 -index 000000000000..24672bb31cbe ---- /dev/null -+++ b/fs/bcachefs/super.c -@@ -0,0 +1,2017 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * bcachefs setup/teardown code, and some metadata io - read a superblock and -+ * figure out what to do with it. -+ * -+ * Copyright 2010, 2011 Kent Overstreet -+ * Copyright 2012 Google, Inc. -+ */ -+ -+#include "bcachefs.h" -+#include "alloc_background.h" -+#include "alloc_foreground.h" -+#include "bkey_sort.h" -+#include "btree_cache.h" -+#include "btree_gc.h" -+#include "btree_journal_iter.h" -+#include "btree_key_cache.h" -+#include "btree_update_interior.h" -+#include "btree_io.h" -+#include "btree_write_buffer.h" -+#include "buckets_waiting_for_journal.h" -+#include "chardev.h" -+#include "checksum.h" -+#include "clock.h" -+#include "compress.h" -+#include "counters.h" -+#include "debug.h" -+#include "disk_groups.h" -+#include "ec.h" -+#include "errcode.h" -+#include "error.h" -+#include "fs.h" -+#include "fs-io.h" -+#include "fs-io-buffered.h" -+#include "fs-io-direct.h" -+#include "fsck.h" -+#include "inode.h" -+#include "io_read.h" -+#include "io_write.h" -+#include "journal.h" -+#include "journal_reclaim.h" -+#include "journal_seq_blacklist.h" -+#include "move.h" -+#include "migrate.h" -+#include "movinggc.h" -+#include "nocow_locking.h" -+#include "quota.h" -+#include "rebalance.h" -+#include "recovery.h" -+#include "replicas.h" -+#include "sb-clean.h" -+#include "sb-errors.h" -+#include "sb-members.h" -+#include "snapshot.h" -+#include "subvolume.h" -+#include "super.h" -+#include "super-io.h" -+#include "sysfs.h" -+#include "trace.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Kent Overstreet "); -+MODULE_DESCRIPTION("bcachefs filesystem"); -+ -+#define KTYPE(type) \ -+static const struct attribute_group type ## _group = { \ -+ .attrs = type ## _files \ -+}; \ -+ \ -+static const struct attribute_group *type ## _groups[] = { \ -+ &type ## _group, \ -+ NULL \ -+}; \ -+ \ -+static const struct kobj_type type ## _ktype = { \ -+ .release = type ## _release, \ -+ .sysfs_ops = &type ## _sysfs_ops, \ -+ .default_groups = type ## _groups \ -+} -+ -+static void bch2_fs_release(struct kobject *); -+static void bch2_dev_release(struct kobject *); -+static void bch2_fs_counters_release(struct kobject *k) -+{ -+} -+ -+static void bch2_fs_internal_release(struct kobject *k) -+{ -+} -+ -+static void bch2_fs_opts_dir_release(struct kobject *k) -+{ -+} -+ -+static void bch2_fs_time_stats_release(struct kobject *k) -+{ -+} -+ -+KTYPE(bch2_fs); -+KTYPE(bch2_fs_counters); -+KTYPE(bch2_fs_internal); -+KTYPE(bch2_fs_opts_dir); -+KTYPE(bch2_fs_time_stats); -+KTYPE(bch2_dev); -+ -+static struct kset *bcachefs_kset; -+static LIST_HEAD(bch_fs_list); -+static DEFINE_MUTEX(bch_fs_list_lock); -+ -+DECLARE_WAIT_QUEUE_HEAD(bch2_read_only_wait); -+ -+static void bch2_dev_free(struct bch_dev *); -+static int bch2_dev_alloc(struct bch_fs *, unsigned); -+static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *); -+static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *); -+ -+struct bch_fs *bch2_dev_to_fs(dev_t dev) -+{ -+ struct bch_fs *c; -+ struct bch_dev *ca; -+ unsigned i; -+ -+ mutex_lock(&bch_fs_list_lock); -+ rcu_read_lock(); -+ -+ list_for_each_entry(c, &bch_fs_list, list) -+ for_each_member_device_rcu(ca, c, i, NULL) -+ if (ca->disk_sb.bdev && ca->disk_sb.bdev->bd_dev == dev) { -+ closure_get(&c->cl); -+ goto found; -+ } -+ c = NULL; -+found: -+ rcu_read_unlock(); -+ mutex_unlock(&bch_fs_list_lock); -+ -+ return c; -+} -+ -+static struct bch_fs *__bch2_uuid_to_fs(__uuid_t uuid) -+{ -+ struct bch_fs *c; -+ -+ lockdep_assert_held(&bch_fs_list_lock); -+ -+ list_for_each_entry(c, &bch_fs_list, list) -+ if (!memcmp(&c->disk_sb.sb->uuid, &uuid, sizeof(uuid))) -+ return c; -+ -+ return NULL; -+} -+ -+struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid) -+{ -+ struct bch_fs *c; -+ -+ mutex_lock(&bch_fs_list_lock); -+ c = __bch2_uuid_to_fs(uuid); -+ if (c) -+ closure_get(&c->cl); -+ mutex_unlock(&bch_fs_list_lock); -+ -+ return c; -+} -+ -+static void bch2_dev_usage_journal_reserve(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ unsigned i, nr = 0, u64s = -+ ((sizeof(struct jset_entry_dev_usage) + -+ sizeof(struct jset_entry_dev_usage_type) * BCH_DATA_NR)) / -+ sizeof(u64); -+ -+ rcu_read_lock(); -+ for_each_member_device_rcu(ca, c, i, NULL) -+ nr++; -+ rcu_read_unlock(); -+ -+ bch2_journal_entry_res_resize(&c->journal, -+ &c->dev_usage_journal_res, u64s * nr); -+} -+ -+/* Filesystem RO/RW: */ -+ -+/* -+ * For startup/shutdown of RW stuff, the dependencies are: -+ * -+ * - foreground writes depend on copygc and rebalance (to free up space) -+ * -+ * - copygc and rebalance depend on mark and sweep gc (they actually probably -+ * don't because they either reserve ahead of time or don't block if -+ * allocations fail, but allocations can require mark and sweep gc to run -+ * because of generation number wraparound) -+ * -+ * - all of the above depends on the allocator threads -+ * -+ * - allocator depends on the journal (when it rewrites prios and gens) -+ */ -+ -+static void __bch2_fs_read_only(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ unsigned i, clean_passes = 0; -+ u64 seq = 0; -+ -+ bch2_fs_ec_stop(c); -+ bch2_open_buckets_stop(c, NULL, true); -+ bch2_rebalance_stop(c); -+ bch2_copygc_stop(c); -+ bch2_gc_thread_stop(c); -+ bch2_fs_ec_flush(c); -+ -+ bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu", -+ journal_cur_seq(&c->journal)); -+ -+ do { -+ clean_passes++; -+ -+ if (bch2_btree_interior_updates_flush(c) || -+ bch2_journal_flush_all_pins(&c->journal) || -+ bch2_btree_flush_all_writes(c) || -+ seq != atomic64_read(&c->journal.seq)) { -+ seq = atomic64_read(&c->journal.seq); -+ clean_passes = 0; -+ } -+ } while (clean_passes < 2); -+ -+ bch_verbose(c, "flushing journal and stopping allocators complete, journal seq %llu", -+ journal_cur_seq(&c->journal)); -+ -+ if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) && -+ !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) -+ set_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags); -+ bch2_fs_journal_stop(&c->journal); -+ -+ /* -+ * After stopping journal: -+ */ -+ for_each_member_device(ca, c, i) -+ bch2_dev_allocator_remove(c, ca); -+} -+ -+#ifndef BCH_WRITE_REF_DEBUG -+static void bch2_writes_disabled(struct percpu_ref *writes) -+{ -+ struct bch_fs *c = container_of(writes, struct bch_fs, writes); -+ -+ set_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); -+ wake_up(&bch2_read_only_wait); -+} -+#endif -+ -+void bch2_fs_read_only(struct bch_fs *c) -+{ -+ if (!test_bit(BCH_FS_RW, &c->flags)) { -+ bch2_journal_reclaim_stop(&c->journal); -+ return; -+ } -+ -+ BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags)); -+ -+ /* -+ * Block new foreground-end write operations from starting - any new -+ * writes will return -EROFS: -+ */ -+ set_bit(BCH_FS_GOING_RO, &c->flags); -+#ifndef BCH_WRITE_REF_DEBUG -+ percpu_ref_kill(&c->writes); -+#else -+ for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++) -+ bch2_write_ref_put(c, i); -+#endif -+ -+ /* -+ * If we're not doing an emergency shutdown, we want to wait on -+ * outstanding writes to complete so they don't see spurious errors due -+ * to shutting down the allocator: -+ * -+ * If we are doing an emergency shutdown outstanding writes may -+ * hang until we shutdown the allocator so we don't want to wait -+ * on outstanding writes before shutting everything down - but -+ * we do need to wait on them before returning and signalling -+ * that going RO is complete: -+ */ -+ wait_event(bch2_read_only_wait, -+ test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) || -+ test_bit(BCH_FS_EMERGENCY_RO, &c->flags)); -+ -+ __bch2_fs_read_only(c); -+ -+ wait_event(bch2_read_only_wait, -+ test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags)); -+ -+ clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); -+ clear_bit(BCH_FS_GOING_RO, &c->flags); -+ -+ if (!bch2_journal_error(&c->journal) && -+ !test_bit(BCH_FS_ERROR, &c->flags) && -+ !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) && -+ test_bit(BCH_FS_STARTED, &c->flags) && -+ test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags) && -+ !c->opts.norecovery) { -+ BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal)); -+ BUG_ON(atomic_read(&c->btree_cache.dirty)); -+ BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty)); -+ BUG_ON(c->btree_write_buffer.state.nr); -+ -+ bch_verbose(c, "marking filesystem clean"); -+ bch2_fs_mark_clean(c); -+ } -+ -+ clear_bit(BCH_FS_RW, &c->flags); -+} -+ -+static void bch2_fs_read_only_work(struct work_struct *work) -+{ -+ struct bch_fs *c = -+ container_of(work, struct bch_fs, read_only_work); -+ -+ down_write(&c->state_lock); -+ bch2_fs_read_only(c); -+ up_write(&c->state_lock); -+} -+ -+static void bch2_fs_read_only_async(struct bch_fs *c) -+{ -+ queue_work(system_long_wq, &c->read_only_work); -+} -+ -+bool bch2_fs_emergency_read_only(struct bch_fs *c) -+{ -+ bool ret = !test_and_set_bit(BCH_FS_EMERGENCY_RO, &c->flags); -+ -+ bch2_journal_halt(&c->journal); -+ bch2_fs_read_only_async(c); -+ -+ wake_up(&bch2_read_only_wait); -+ return ret; -+} -+ -+static int bch2_fs_read_write_late(struct bch_fs *c) -+{ -+ int ret; -+ -+ /* -+ * Data move operations can't run until after check_snapshots has -+ * completed, and bch2_snapshot_is_ancestor() is available. -+ * -+ * Ideally we'd start copygc/rebalance earlier instead of waiting for -+ * all of recovery/fsck to complete: -+ */ -+ ret = bch2_copygc_start(c); -+ if (ret) { -+ bch_err(c, "error starting copygc thread"); -+ return ret; -+ } -+ -+ ret = bch2_rebalance_start(c); -+ if (ret) { -+ bch_err(c, "error starting rebalance thread"); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int __bch2_fs_read_write(struct bch_fs *c, bool early) -+{ -+ struct bch_dev *ca; -+ unsigned i; -+ int ret; -+ -+ if (test_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags)) { -+ bch_err(c, "cannot go rw, unfixed btree errors"); -+ return -BCH_ERR_erofs_unfixed_errors; -+ } -+ -+ if (test_bit(BCH_FS_RW, &c->flags)) -+ return 0; -+ -+ if (c->opts.norecovery) -+ return -BCH_ERR_erofs_norecovery; -+ -+ /* -+ * nochanges is used for fsck -n mode - we have to allow going rw -+ * during recovery for that to work: -+ */ -+ if (c->opts.nochanges && (!early || c->opts.read_only)) -+ return -BCH_ERR_erofs_nochanges; -+ -+ bch_info(c, "going read-write"); -+ -+ ret = bch2_sb_members_v2_init(c); -+ if (ret) -+ goto err; -+ -+ ret = bch2_fs_mark_dirty(c); -+ if (ret) -+ goto err; -+ -+ clear_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags); -+ -+ /* -+ * First journal write must be a flush write: after a clean shutdown we -+ * don't read the journal, so the first journal write may end up -+ * overwriting whatever was there previously, and there must always be -+ * at least one non-flush write in the journal or recovery will fail: -+ */ -+ set_bit(JOURNAL_NEED_FLUSH_WRITE, &c->journal.flags); -+ -+ for_each_rw_member(ca, c, i) -+ bch2_dev_allocator_add(c, ca); -+ bch2_recalc_capacity(c); -+ -+ ret = bch2_gc_thread_start(c); -+ if (ret) { -+ bch_err(c, "error starting gc thread"); -+ return ret; -+ } -+ -+ ret = bch2_journal_reclaim_start(&c->journal); -+ if (ret) -+ goto err; -+ -+ if (!early) { -+ ret = bch2_fs_read_write_late(c); -+ if (ret) -+ goto err; -+ } -+ -+#ifndef BCH_WRITE_REF_DEBUG -+ percpu_ref_reinit(&c->writes); -+#else -+ for (i = 0; i < BCH_WRITE_REF_NR; i++) { -+ BUG_ON(atomic_long_read(&c->writes[i])); -+ atomic_long_inc(&c->writes[i]); -+ } -+#endif -+ set_bit(BCH_FS_RW, &c->flags); -+ set_bit(BCH_FS_WAS_RW, &c->flags); -+ -+ bch2_do_discards(c); -+ bch2_do_invalidates(c); -+ bch2_do_stripe_deletes(c); -+ bch2_do_pending_node_rewrites(c); -+ return 0; -+err: -+ __bch2_fs_read_only(c); -+ return ret; -+} -+ -+int bch2_fs_read_write(struct bch_fs *c) -+{ -+ return __bch2_fs_read_write(c, false); -+} -+ -+int bch2_fs_read_write_early(struct bch_fs *c) -+{ -+ lockdep_assert_held(&c->state_lock); -+ -+ return __bch2_fs_read_write(c, true); -+} -+ -+/* Filesystem startup/shutdown: */ -+ -+static void __bch2_fs_free(struct bch_fs *c) -+{ -+ unsigned i; -+ -+ for (i = 0; i < BCH_TIME_STAT_NR; i++) -+ bch2_time_stats_exit(&c->times[i]); -+ -+ bch2_free_pending_node_rewrites(c); -+ bch2_fs_sb_errors_exit(c); -+ bch2_fs_counters_exit(c); -+ bch2_fs_snapshots_exit(c); -+ bch2_fs_quota_exit(c); -+ bch2_fs_fs_io_direct_exit(c); -+ bch2_fs_fs_io_buffered_exit(c); -+ bch2_fs_fsio_exit(c); -+ bch2_fs_ec_exit(c); -+ bch2_fs_encryption_exit(c); -+ bch2_fs_nocow_locking_exit(c); -+ bch2_fs_io_write_exit(c); -+ bch2_fs_io_read_exit(c); -+ bch2_fs_buckets_waiting_for_journal_exit(c); -+ bch2_fs_btree_interior_update_exit(c); -+ bch2_fs_btree_iter_exit(c); -+ bch2_fs_btree_key_cache_exit(&c->btree_key_cache); -+ bch2_fs_btree_cache_exit(c); -+ bch2_fs_replicas_exit(c); -+ bch2_fs_journal_exit(&c->journal); -+ bch2_io_clock_exit(&c->io_clock[WRITE]); -+ bch2_io_clock_exit(&c->io_clock[READ]); -+ bch2_fs_compress_exit(c); -+ bch2_journal_keys_free(&c->journal_keys); -+ bch2_journal_entries_free(c); -+ bch2_fs_btree_write_buffer_exit(c); -+ percpu_free_rwsem(&c->mark_lock); -+ free_percpu(c->online_reserved); -+ -+ darray_exit(&c->btree_roots_extra); -+ free_percpu(c->pcpu); -+ mempool_exit(&c->large_bkey_pool); -+ mempool_exit(&c->btree_bounce_pool); -+ bioset_exit(&c->btree_bio); -+ mempool_exit(&c->fill_iter); -+#ifndef BCH_WRITE_REF_DEBUG -+ percpu_ref_exit(&c->writes); -+#endif -+ kfree(rcu_dereference_protected(c->disk_groups, 1)); -+ kfree(c->journal_seq_blacklist_table); -+ kfree(c->unused_inode_hints); -+ -+ if (c->write_ref_wq) -+ destroy_workqueue(c->write_ref_wq); -+ if (c->io_complete_wq) -+ destroy_workqueue(c->io_complete_wq); -+ if (c->copygc_wq) -+ destroy_workqueue(c->copygc_wq); -+ if (c->btree_io_complete_wq) -+ destroy_workqueue(c->btree_io_complete_wq); -+ if (c->btree_update_wq) -+ destroy_workqueue(c->btree_update_wq); -+ -+ bch2_free_super(&c->disk_sb); -+ kvpfree(c, sizeof(*c)); -+ module_put(THIS_MODULE); -+} -+ -+static void bch2_fs_release(struct kobject *kobj) -+{ -+ struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); -+ -+ __bch2_fs_free(c); -+} -+ -+void __bch2_fs_stop(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ unsigned i; -+ -+ bch_verbose(c, "shutting down"); -+ -+ set_bit(BCH_FS_STOPPING, &c->flags); -+ -+ cancel_work_sync(&c->journal_seq_blacklist_gc_work); -+ -+ down_write(&c->state_lock); -+ bch2_fs_read_only(c); -+ up_write(&c->state_lock); -+ -+ for_each_member_device(ca, c, i) -+ if (ca->kobj.state_in_sysfs && -+ ca->disk_sb.bdev) -+ sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs"); -+ -+ if (c->kobj.state_in_sysfs) -+ kobject_del(&c->kobj); -+ -+ bch2_fs_debug_exit(c); -+ bch2_fs_chardev_exit(c); -+ -+ kobject_put(&c->counters_kobj); -+ kobject_put(&c->time_stats); -+ kobject_put(&c->opts_dir); -+ kobject_put(&c->internal); -+ -+ /* btree prefetch might have kicked off reads in the background: */ -+ bch2_btree_flush_all_reads(c); -+ -+ for_each_member_device(ca, c, i) -+ cancel_work_sync(&ca->io_error_work); -+ -+ cancel_work_sync(&c->read_only_work); -+} -+ -+void bch2_fs_free(struct bch_fs *c) -+{ -+ unsigned i; -+ -+ mutex_lock(&bch_fs_list_lock); -+ list_del(&c->list); -+ mutex_unlock(&bch_fs_list_lock); -+ -+ closure_sync(&c->cl); -+ closure_debug_destroy(&c->cl); -+ -+ for (i = 0; i < c->sb.nr_devices; i++) { -+ struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true); -+ -+ if (ca) { -+ bch2_free_super(&ca->disk_sb); -+ bch2_dev_free(ca); -+ } -+ } -+ -+ bch_verbose(c, "shutdown complete"); -+ -+ kobject_put(&c->kobj); -+} -+ -+void bch2_fs_stop(struct bch_fs *c) -+{ -+ __bch2_fs_stop(c); -+ bch2_fs_free(c); -+} -+ -+static int bch2_fs_online(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ unsigned i; -+ int ret = 0; -+ -+ lockdep_assert_held(&bch_fs_list_lock); -+ -+ if (__bch2_uuid_to_fs(c->sb.uuid)) { -+ bch_err(c, "filesystem UUID already open"); -+ return -EINVAL; -+ } -+ -+ ret = bch2_fs_chardev_init(c); -+ if (ret) { -+ bch_err(c, "error creating character device"); -+ return ret; -+ } -+ -+ bch2_fs_debug_init(c); -+ -+ ret = kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ?: -+ kobject_add(&c->internal, &c->kobj, "internal") ?: -+ kobject_add(&c->opts_dir, &c->kobj, "options") ?: -+ kobject_add(&c->time_stats, &c->kobj, "time_stats") ?: -+ kobject_add(&c->counters_kobj, &c->kobj, "counters") ?: -+ bch2_opts_create_sysfs_files(&c->opts_dir); -+ if (ret) { -+ bch_err(c, "error creating sysfs objects"); -+ return ret; -+ } -+ -+ down_write(&c->state_lock); -+ -+ for_each_member_device(ca, c, i) { -+ ret = bch2_dev_sysfs_online(c, ca); -+ if (ret) { -+ bch_err(c, "error creating sysfs objects"); -+ percpu_ref_put(&ca->ref); -+ goto err; -+ } -+ } -+ -+ BUG_ON(!list_empty(&c->list)); -+ list_add(&c->list, &bch_fs_list); -+err: -+ up_write(&c->state_lock); -+ return ret; -+} -+ -+static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) -+{ -+ struct bch_fs *c; -+ struct printbuf name = PRINTBUF; -+ unsigned i, iter_size; -+ int ret = 0; -+ -+ c = kvpmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO); -+ if (!c) { -+ c = ERR_PTR(-BCH_ERR_ENOMEM_fs_alloc); -+ goto out; -+ } -+ -+ __module_get(THIS_MODULE); -+ -+ closure_init(&c->cl, NULL); -+ -+ c->kobj.kset = bcachefs_kset; -+ kobject_init(&c->kobj, &bch2_fs_ktype); -+ kobject_init(&c->internal, &bch2_fs_internal_ktype); -+ kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype); -+ kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype); -+ kobject_init(&c->counters_kobj, &bch2_fs_counters_ktype); -+ -+ c->minor = -1; -+ c->disk_sb.fs_sb = true; -+ -+ init_rwsem(&c->state_lock); -+ mutex_init(&c->sb_lock); -+ mutex_init(&c->replicas_gc_lock); -+ mutex_init(&c->btree_root_lock); -+ INIT_WORK(&c->read_only_work, bch2_fs_read_only_work); -+ -+ init_rwsem(&c->gc_lock); -+ mutex_init(&c->gc_gens_lock); -+ -+ for (i = 0; i < BCH_TIME_STAT_NR; i++) -+ bch2_time_stats_init(&c->times[i]); -+ -+ bch2_fs_copygc_init(c); -+ bch2_fs_btree_key_cache_init_early(&c->btree_key_cache); -+ bch2_fs_btree_interior_update_init_early(c); -+ bch2_fs_allocator_background_init(c); -+ bch2_fs_allocator_foreground_init(c); -+ bch2_fs_rebalance_init(c); -+ bch2_fs_quota_init(c); -+ bch2_fs_ec_init_early(c); -+ bch2_fs_move_init(c); -+ bch2_fs_sb_errors_init_early(c); -+ -+ INIT_LIST_HEAD(&c->list); -+ -+ mutex_init(&c->usage_scratch_lock); -+ -+ mutex_init(&c->bio_bounce_pages_lock); -+ mutex_init(&c->snapshot_table_lock); -+ init_rwsem(&c->snapshot_create_lock); -+ -+ spin_lock_init(&c->btree_write_error_lock); -+ -+ INIT_WORK(&c->journal_seq_blacklist_gc_work, -+ bch2_blacklist_entries_gc); -+ -+ INIT_LIST_HEAD(&c->journal_iters); -+ -+ INIT_LIST_HEAD(&c->fsck_error_msgs); -+ mutex_init(&c->fsck_error_msgs_lock); -+ -+ seqcount_init(&c->gc_pos_lock); -+ -+ seqcount_init(&c->usage_lock); -+ -+ sema_init(&c->io_in_flight, 128); -+ -+ INIT_LIST_HEAD(&c->vfs_inodes_list); -+ mutex_init(&c->vfs_inodes_lock); -+ -+ c->copy_gc_enabled = 1; -+ c->rebalance.enabled = 1; -+ c->promote_whole_extents = true; -+ -+ c->journal.flush_write_time = &c->times[BCH_TIME_journal_flush_write]; -+ c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write]; -+ c->journal.blocked_time = &c->times[BCH_TIME_blocked_journal]; -+ c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq]; -+ -+ bch2_fs_btree_cache_init_early(&c->btree_cache); -+ -+ mutex_init(&c->sectors_available_lock); -+ -+ ret = percpu_init_rwsem(&c->mark_lock); -+ if (ret) -+ goto err; -+ -+ mutex_lock(&c->sb_lock); -+ ret = bch2_sb_to_fs(c, sb); -+ mutex_unlock(&c->sb_lock); -+ -+ if (ret) -+ goto err; -+ -+ pr_uuid(&name, c->sb.user_uuid.b); -+ strscpy(c->name, name.buf, sizeof(c->name)); -+ printbuf_exit(&name); -+ -+ ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0; -+ if (ret) -+ goto err; -+ -+ /* Compat: */ -+ if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_inode_v2 && -+ !BCH_SB_JOURNAL_FLUSH_DELAY(sb)) -+ SET_BCH_SB_JOURNAL_FLUSH_DELAY(sb, 1000); -+ -+ if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_inode_v2 && -+ !BCH_SB_JOURNAL_RECLAIM_DELAY(sb)) -+ SET_BCH_SB_JOURNAL_RECLAIM_DELAY(sb, 100); -+ -+ c->opts = bch2_opts_default; -+ ret = bch2_opts_from_sb(&c->opts, sb); -+ if (ret) -+ goto err; -+ -+ bch2_opts_apply(&c->opts, opts); -+ -+ c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc; -+ if (c->opts.inodes_use_key_cache) -+ c->btree_key_cache_btrees |= 1U << BTREE_ID_inodes; -+ c->btree_key_cache_btrees |= 1U << BTREE_ID_logged_ops; -+ -+ c->block_bits = ilog2(block_sectors(c)); -+ c->btree_foreground_merge_threshold = BTREE_FOREGROUND_MERGE_THRESHOLD(c); -+ -+ if (bch2_fs_init_fault("fs_alloc")) { -+ bch_err(c, "fs_alloc fault injected"); -+ ret = -EFAULT; -+ goto err; -+ } -+ -+ iter_size = sizeof(struct sort_iter) + -+ (btree_blocks(c) + 1) * 2 * -+ sizeof(struct sort_iter_set); -+ -+ c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus())); -+ -+ if (!(c->btree_update_wq = alloc_workqueue("bcachefs", -+ WQ_FREEZABLE|WQ_UNBOUND|WQ_MEM_RECLAIM, 512)) || -+ !(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io", -+ WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) || -+ !(c->copygc_wq = alloc_workqueue("bcachefs_copygc", -+ WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || -+ !(c->io_complete_wq = alloc_workqueue("bcachefs_io", -+ WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 1)) || -+ !(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref", -+ WQ_FREEZABLE, 0)) || -+#ifndef BCH_WRITE_REF_DEBUG -+ percpu_ref_init(&c->writes, bch2_writes_disabled, -+ PERCPU_REF_INIT_DEAD, GFP_KERNEL) || -+#endif -+ mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) || -+ bioset_init(&c->btree_bio, 1, -+ max(offsetof(struct btree_read_bio, bio), -+ offsetof(struct btree_write_bio, wbio.bio)), -+ BIOSET_NEED_BVECS) || -+ !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) || -+ !(c->online_reserved = alloc_percpu(u64)) || -+ mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, -+ btree_bytes(c)) || -+ mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || -+ !(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits, -+ sizeof(u64), GFP_KERNEL))) { -+ ret = -BCH_ERR_ENOMEM_fs_other_alloc; -+ goto err; -+ } -+ -+ ret = bch2_fs_counters_init(c) ?: -+ bch2_fs_sb_errors_init(c) ?: -+ bch2_io_clock_init(&c->io_clock[READ]) ?: -+ bch2_io_clock_init(&c->io_clock[WRITE]) ?: -+ bch2_fs_journal_init(&c->journal) ?: -+ bch2_fs_replicas_init(c) ?: -+ bch2_fs_btree_cache_init(c) ?: -+ bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?: -+ bch2_fs_btree_iter_init(c) ?: -+ bch2_fs_btree_interior_update_init(c) ?: -+ bch2_fs_buckets_waiting_for_journal_init(c) ?: -+ bch2_fs_btree_write_buffer_init(c) ?: -+ bch2_fs_subvolumes_init(c) ?: -+ bch2_fs_io_read_init(c) ?: -+ bch2_fs_io_write_init(c) ?: -+ bch2_fs_nocow_locking_init(c) ?: -+ bch2_fs_encryption_init(c) ?: -+ bch2_fs_compress_init(c) ?: -+ bch2_fs_ec_init(c) ?: -+ bch2_fs_fsio_init(c) ?: -+ bch2_fs_fs_io_buffered_init(c) ?: -+ bch2_fs_fs_io_direct_init(c); -+ if (ret) -+ goto err; -+ -+ for (i = 0; i < c->sb.nr_devices; i++) -+ if (bch2_dev_exists(c->disk_sb.sb, i) && -+ bch2_dev_alloc(c, i)) { -+ ret = -EEXIST; -+ goto err; -+ } -+ -+ bch2_journal_entry_res_resize(&c->journal, -+ &c->btree_root_journal_res, -+ BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_BTREE_PTR_U64s_MAX)); -+ bch2_dev_usage_journal_reserve(c); -+ bch2_journal_entry_res_resize(&c->journal, -+ &c->clock_journal_res, -+ (sizeof(struct jset_entry_clock) / sizeof(u64)) * 2); -+ -+ mutex_lock(&bch_fs_list_lock); -+ ret = bch2_fs_online(c); -+ mutex_unlock(&bch_fs_list_lock); -+ -+ if (ret) -+ goto err; -+out: -+ return c; -+err: -+ bch2_fs_free(c); -+ c = ERR_PTR(ret); -+ goto out; -+} -+ -+noinline_for_stack -+static void print_mount_opts(struct bch_fs *c) -+{ -+ enum bch_opt_id i; -+ struct printbuf p = PRINTBUF; -+ bool first = true; -+ -+ prt_str(&p, "mounting version "); -+ bch2_version_to_text(&p, c->sb.version); -+ -+ if (c->opts.read_only) { -+ prt_str(&p, " opts="); -+ first = false; -+ prt_printf(&p, "ro"); -+ } -+ -+ for (i = 0; i < bch2_opts_nr; i++) { -+ const struct bch_option *opt = &bch2_opt_table[i]; -+ u64 v = bch2_opt_get_by_id(&c->opts, i); -+ -+ if (!(opt->flags & OPT_MOUNT)) -+ continue; -+ -+ if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) -+ continue; -+ -+ prt_str(&p, first ? " opts=" : ","); -+ first = false; -+ bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE); -+ } -+ -+ bch_info(c, "%s", p.buf); -+ printbuf_exit(&p); -+} -+ -+int bch2_fs_start(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ time64_t now = ktime_get_real_seconds(); -+ unsigned i; -+ int ret; -+ -+ print_mount_opts(c); -+ -+ down_write(&c->state_lock); -+ -+ BUG_ON(test_bit(BCH_FS_STARTED, &c->flags)); -+ -+ mutex_lock(&c->sb_lock); -+ -+ ret = bch2_sb_members_v2_init(c); -+ if (ret) { -+ mutex_unlock(&c->sb_lock); -+ goto err; -+ } -+ -+ for_each_online_member(ca, c, i) -+ bch2_members_v2_get_mut(c->disk_sb.sb, i)->last_mount = cpu_to_le64(now); -+ -+ mutex_unlock(&c->sb_lock); -+ -+ for_each_rw_member(ca, c, i) -+ bch2_dev_allocator_add(c, ca); -+ bch2_recalc_capacity(c); -+ -+ ret = BCH_SB_INITIALIZED(c->disk_sb.sb) -+ ? bch2_fs_recovery(c) -+ : bch2_fs_initialize(c); -+ if (ret) -+ goto err; -+ -+ ret = bch2_opts_check_may_set(c); -+ if (ret) -+ goto err; -+ -+ if (bch2_fs_init_fault("fs_start")) { -+ bch_err(c, "fs_start fault injected"); -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ set_bit(BCH_FS_STARTED, &c->flags); -+ -+ if (c->opts.read_only || c->opts.nochanges) { -+ bch2_fs_read_only(c); -+ } else { -+ ret = !test_bit(BCH_FS_RW, &c->flags) -+ ? bch2_fs_read_write(c) -+ : bch2_fs_read_write_late(c); -+ if (ret) -+ goto err; -+ } -+ -+ ret = 0; -+out: -+ up_write(&c->state_lock); -+ return ret; -+err: -+ bch_err_msg(c, ret, "starting filesystem"); -+ goto out; -+} -+ -+static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c) -+{ -+ struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx); -+ -+ if (le16_to_cpu(sb->block_size) != block_sectors(c)) -+ return -BCH_ERR_mismatched_block_size; -+ -+ if (le16_to_cpu(m.bucket_size) < -+ BCH_SB_BTREE_NODE_SIZE(c->disk_sb.sb)) -+ return -BCH_ERR_bucket_size_too_small; -+ -+ return 0; -+} -+ -+static int bch2_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb) -+{ -+ struct bch_sb *newest = -+ le64_to_cpu(fs->seq) > le64_to_cpu(sb->seq) ? fs : sb; -+ -+ if (!uuid_equal(&fs->uuid, &sb->uuid)) -+ return -BCH_ERR_device_not_a_member_of_filesystem; -+ -+ if (!bch2_dev_exists(newest, sb->dev_idx)) -+ return -BCH_ERR_device_has_been_removed; -+ -+ if (fs->block_size != sb->block_size) -+ return -BCH_ERR_mismatched_block_size; -+ -+ return 0; -+} -+ -+/* Device startup/shutdown: */ -+ -+static void bch2_dev_release(struct kobject *kobj) -+{ -+ struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); -+ -+ kfree(ca); -+} -+ -+static void bch2_dev_free(struct bch_dev *ca) -+{ -+ cancel_work_sync(&ca->io_error_work); -+ -+ if (ca->kobj.state_in_sysfs && -+ ca->disk_sb.bdev) -+ sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs"); -+ -+ if (ca->kobj.state_in_sysfs) -+ kobject_del(&ca->kobj); -+ -+ bch2_free_super(&ca->disk_sb); -+ bch2_dev_journal_exit(ca); -+ -+ free_percpu(ca->io_done); -+ bioset_exit(&ca->replica_set); -+ bch2_dev_buckets_free(ca); -+ free_page((unsigned long) ca->sb_read_scratch); -+ -+ bch2_time_stats_exit(&ca->io_latency[WRITE]); -+ bch2_time_stats_exit(&ca->io_latency[READ]); -+ -+ percpu_ref_exit(&ca->io_ref); -+ percpu_ref_exit(&ca->ref); -+ kobject_put(&ca->kobj); -+} -+ -+static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca) -+{ -+ -+ lockdep_assert_held(&c->state_lock); -+ -+ if (percpu_ref_is_zero(&ca->io_ref)) -+ return; -+ -+ __bch2_dev_read_only(c, ca); -+ -+ reinit_completion(&ca->io_ref_completion); -+ percpu_ref_kill(&ca->io_ref); -+ wait_for_completion(&ca->io_ref_completion); -+ -+ if (ca->kobj.state_in_sysfs) { -+ sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs"); -+ sysfs_remove_link(&ca->kobj, "block"); -+ } -+ -+ bch2_free_super(&ca->disk_sb); -+ bch2_dev_journal_exit(ca); -+} -+ -+static void bch2_dev_ref_complete(struct percpu_ref *ref) -+{ -+ struct bch_dev *ca = container_of(ref, struct bch_dev, ref); -+ -+ complete(&ca->ref_completion); -+} -+ -+static void bch2_dev_io_ref_complete(struct percpu_ref *ref) -+{ -+ struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref); -+ -+ complete(&ca->io_ref_completion); -+} -+ -+static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca) -+{ -+ int ret; -+ -+ if (!c->kobj.state_in_sysfs) -+ return 0; -+ -+ if (!ca->kobj.state_in_sysfs) { -+ ret = kobject_add(&ca->kobj, &c->kobj, -+ "dev-%u", ca->dev_idx); -+ if (ret) -+ return ret; -+ } -+ -+ if (ca->disk_sb.bdev) { -+ struct kobject *block = bdev_kobj(ca->disk_sb.bdev); -+ -+ ret = sysfs_create_link(block, &ca->kobj, "bcachefs"); -+ if (ret) -+ return ret; -+ -+ ret = sysfs_create_link(&ca->kobj, block, "block"); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, -+ struct bch_member *member) -+{ -+ struct bch_dev *ca; -+ unsigned i; -+ -+ ca = kzalloc(sizeof(*ca), GFP_KERNEL); -+ if (!ca) -+ return NULL; -+ -+ kobject_init(&ca->kobj, &bch2_dev_ktype); -+ init_completion(&ca->ref_completion); -+ init_completion(&ca->io_ref_completion); -+ -+ init_rwsem(&ca->bucket_lock); -+ -+ INIT_WORK(&ca->io_error_work, bch2_io_error_work); -+ -+ bch2_time_stats_init(&ca->io_latency[READ]); -+ bch2_time_stats_init(&ca->io_latency[WRITE]); -+ -+ ca->mi = bch2_mi_to_cpu(member); -+ -+ for (i = 0; i < ARRAY_SIZE(member->errors); i++) -+ atomic64_set(&ca->errors[i], le64_to_cpu(member->errors[i])); -+ -+ ca->uuid = member->uuid; -+ -+ ca->nr_btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE, -+ ca->mi.bucket_size / btree_sectors(c)); -+ -+ if (percpu_ref_init(&ca->ref, bch2_dev_ref_complete, -+ 0, GFP_KERNEL) || -+ percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete, -+ PERCPU_REF_INIT_DEAD, GFP_KERNEL) || -+ !(ca->sb_read_scratch = (void *) __get_free_page(GFP_KERNEL)) || -+ bch2_dev_buckets_alloc(c, ca) || -+ bioset_init(&ca->replica_set, 4, -+ offsetof(struct bch_write_bio, bio), 0) || -+ !(ca->io_done = alloc_percpu(*ca->io_done))) -+ goto err; -+ -+ return ca; -+err: -+ bch2_dev_free(ca); -+ return NULL; -+} -+ -+static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca, -+ unsigned dev_idx) -+{ -+ ca->dev_idx = dev_idx; -+ __set_bit(ca->dev_idx, ca->self.d); -+ scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx); -+ -+ ca->fs = c; -+ rcu_assign_pointer(c->devs[ca->dev_idx], ca); -+ -+ if (bch2_dev_sysfs_online(c, ca)) -+ pr_warn("error creating sysfs objects"); -+} -+ -+static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) -+{ -+ struct bch_member member = bch2_sb_member_get(c->disk_sb.sb, dev_idx); -+ struct bch_dev *ca = NULL; -+ int ret = 0; -+ -+ if (bch2_fs_init_fault("dev_alloc")) -+ goto err; -+ -+ ca = __bch2_dev_alloc(c, &member); -+ if (!ca) -+ goto err; -+ -+ ca->fs = c; -+ -+ bch2_dev_attach(c, ca, dev_idx); -+ return ret; -+err: -+ if (ca) -+ bch2_dev_free(ca); -+ return -BCH_ERR_ENOMEM_dev_alloc; -+} -+ -+static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) -+{ -+ unsigned ret; -+ -+ if (bch2_dev_is_online(ca)) { -+ bch_err(ca, "already have device online in slot %u", -+ sb->sb->dev_idx); -+ return -BCH_ERR_device_already_online; -+ } -+ -+ if (get_capacity(sb->bdev->bd_disk) < -+ ca->mi.bucket_size * ca->mi.nbuckets) { -+ bch_err(ca, "cannot online: device too small"); -+ return -BCH_ERR_device_size_too_small; -+ } -+ -+ BUG_ON(!percpu_ref_is_zero(&ca->io_ref)); -+ -+ ret = bch2_dev_journal_init(ca, sb->sb); -+ if (ret) -+ return ret; -+ -+ /* Commit: */ -+ ca->disk_sb = *sb; -+ memset(sb, 0, sizeof(*sb)); -+ -+ ca->dev = ca->disk_sb.bdev->bd_dev; -+ -+ percpu_ref_reinit(&ca->io_ref); -+ -+ return 0; -+} -+ -+static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) -+{ -+ struct bch_dev *ca; -+ int ret; -+ -+ lockdep_assert_held(&c->state_lock); -+ -+ if (le64_to_cpu(sb->sb->seq) > -+ le64_to_cpu(c->disk_sb.sb->seq)) -+ bch2_sb_to_fs(c, sb->sb); -+ -+ BUG_ON(sb->sb->dev_idx >= c->sb.nr_devices || -+ !c->devs[sb->sb->dev_idx]); -+ -+ ca = bch_dev_locked(c, sb->sb->dev_idx); -+ -+ ret = __bch2_dev_attach_bdev(ca, sb); -+ if (ret) -+ return ret; -+ -+ bch2_dev_sysfs_online(c, ca); -+ -+ if (c->sb.nr_devices == 1) -+ snprintf(c->name, sizeof(c->name), "%pg", ca->disk_sb.bdev); -+ snprintf(ca->name, sizeof(ca->name), "%pg", ca->disk_sb.bdev); -+ -+ rebalance_wakeup(c); -+ return 0; -+} -+ -+/* Device management: */ -+ -+/* -+ * Note: this function is also used by the error paths - when a particular -+ * device sees an error, we call it to determine whether we can just set the -+ * device RO, or - if this function returns false - we'll set the whole -+ * filesystem RO: -+ * -+ * XXX: maybe we should be more explicit about whether we're changing state -+ * because we got an error or what have you? -+ */ -+bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, -+ enum bch_member_state new_state, int flags) -+{ -+ struct bch_devs_mask new_online_devs; -+ struct bch_dev *ca2; -+ int i, nr_rw = 0, required; -+ -+ lockdep_assert_held(&c->state_lock); -+ -+ switch (new_state) { -+ case BCH_MEMBER_STATE_rw: -+ return true; -+ case BCH_MEMBER_STATE_ro: -+ if (ca->mi.state != BCH_MEMBER_STATE_rw) -+ return true; -+ -+ /* do we have enough devices to write to? */ -+ for_each_member_device(ca2, c, i) -+ if (ca2 != ca) -+ nr_rw += ca2->mi.state == BCH_MEMBER_STATE_rw; -+ -+ required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED) -+ ? c->opts.metadata_replicas -+ : c->opts.metadata_replicas_required, -+ !(flags & BCH_FORCE_IF_DATA_DEGRADED) -+ ? c->opts.data_replicas -+ : c->opts.data_replicas_required); -+ -+ return nr_rw >= required; -+ case BCH_MEMBER_STATE_failed: -+ case BCH_MEMBER_STATE_spare: -+ if (ca->mi.state != BCH_MEMBER_STATE_rw && -+ ca->mi.state != BCH_MEMBER_STATE_ro) -+ return true; -+ -+ /* do we have enough devices to read from? */ -+ new_online_devs = bch2_online_devs(c); -+ __clear_bit(ca->dev_idx, new_online_devs.d); -+ -+ return bch2_have_enough_devs(c, new_online_devs, flags, false); -+ default: -+ BUG(); -+ } -+} -+ -+static bool bch2_fs_may_start(struct bch_fs *c) -+{ -+ struct bch_dev *ca; -+ unsigned i, flags = 0; -+ -+ if (c->opts.very_degraded) -+ flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST; -+ -+ if (c->opts.degraded) -+ flags |= BCH_FORCE_IF_DEGRADED; -+ -+ if (!c->opts.degraded && -+ !c->opts.very_degraded) { -+ mutex_lock(&c->sb_lock); -+ -+ for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { -+ if (!bch2_dev_exists(c->disk_sb.sb, i)) -+ continue; -+ -+ ca = bch_dev_locked(c, i); -+ -+ if (!bch2_dev_is_online(ca) && -+ (ca->mi.state == BCH_MEMBER_STATE_rw || -+ ca->mi.state == BCH_MEMBER_STATE_ro)) { -+ mutex_unlock(&c->sb_lock); -+ return false; -+ } -+ } -+ mutex_unlock(&c->sb_lock); -+ } -+ -+ return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true); -+} -+ -+static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) -+{ -+ /* -+ * The allocator thread itself allocates btree nodes, so stop it first: -+ */ -+ bch2_dev_allocator_remove(c, ca); -+ bch2_dev_journal_stop(&c->journal, ca); -+} -+ -+static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) -+{ -+ lockdep_assert_held(&c->state_lock); -+ -+ BUG_ON(ca->mi.state != BCH_MEMBER_STATE_rw); -+ -+ bch2_dev_allocator_add(c, ca); -+ bch2_recalc_capacity(c); -+} -+ -+int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, -+ enum bch_member_state new_state, int flags) -+{ -+ struct bch_member *m; -+ int ret = 0; -+ -+ if (ca->mi.state == new_state) -+ return 0; -+ -+ if (!bch2_dev_state_allowed(c, ca, new_state, flags)) -+ return -BCH_ERR_device_state_not_allowed; -+ -+ if (new_state != BCH_MEMBER_STATE_rw) -+ __bch2_dev_read_only(c, ca); -+ -+ bch_notice(ca, "%s", bch2_member_states[new_state]); -+ -+ mutex_lock(&c->sb_lock); -+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -+ SET_BCH_MEMBER_STATE(m, new_state); -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ -+ if (new_state == BCH_MEMBER_STATE_rw) -+ __bch2_dev_read_write(c, ca); -+ -+ rebalance_wakeup(c); -+ -+ return ret; -+} -+ -+int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, -+ enum bch_member_state new_state, int flags) -+{ -+ int ret; -+ -+ down_write(&c->state_lock); -+ ret = __bch2_dev_set_state(c, ca, new_state, flags); -+ up_write(&c->state_lock); -+ -+ return ret; -+} -+ -+/* Device add/removal: */ -+ -+static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) -+{ -+ struct bpos start = POS(ca->dev_idx, 0); -+ struct bpos end = POS(ca->dev_idx, U64_MAX); -+ int ret; -+ -+ /* -+ * We clear the LRU and need_discard btrees first so that we don't race -+ * with bch2_do_invalidates() and bch2_do_discards() -+ */ -+ ret = bch2_btree_delete_range(c, BTREE_ID_lru, start, end, -+ BTREE_TRIGGER_NORUN, NULL) ?: -+ bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end, -+ BTREE_TRIGGER_NORUN, NULL) ?: -+ bch2_btree_delete_range(c, BTREE_ID_freespace, start, end, -+ BTREE_TRIGGER_NORUN, NULL) ?: -+ bch2_btree_delete_range(c, BTREE_ID_backpointers, start, end, -+ BTREE_TRIGGER_NORUN, NULL) ?: -+ bch2_btree_delete_range(c, BTREE_ID_alloc, start, end, -+ BTREE_TRIGGER_NORUN, NULL) ?: -+ bch2_btree_delete_range(c, BTREE_ID_bucket_gens, start, end, -+ BTREE_TRIGGER_NORUN, NULL); -+ if (ret) -+ bch_err_msg(c, ret, "removing dev alloc info"); -+ -+ return ret; -+} -+ -+int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) -+{ -+ struct bch_member *m; -+ unsigned dev_idx = ca->dev_idx, data; -+ int ret; -+ -+ down_write(&c->state_lock); -+ -+ /* -+ * We consume a reference to ca->ref, regardless of whether we succeed -+ * or fail: -+ */ -+ percpu_ref_put(&ca->ref); -+ -+ if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) { -+ bch_err(ca, "Cannot remove without losing data"); -+ ret = -BCH_ERR_device_state_not_allowed; -+ goto err; -+ } -+ -+ __bch2_dev_read_only(c, ca); -+ -+ ret = bch2_dev_data_drop(c, ca->dev_idx, flags); -+ if (ret) { -+ bch_err_msg(ca, ret, "dropping data"); -+ goto err; -+ } -+ -+ ret = bch2_dev_remove_alloc(c, ca); -+ if (ret) { -+ bch_err_msg(ca, ret, "deleting alloc info"); -+ goto err; -+ } -+ -+ ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx); -+ if (ret) { -+ bch_err_msg(ca, ret, "flushing journal"); -+ goto err; -+ } -+ -+ ret = bch2_journal_flush(&c->journal); -+ if (ret) { -+ bch_err(ca, "journal error"); -+ goto err; -+ } -+ -+ ret = bch2_replicas_gc2(c); -+ if (ret) { -+ bch_err_msg(ca, ret, "in replicas_gc2()"); -+ goto err; -+ } -+ -+ data = bch2_dev_has_data(c, ca); -+ if (data) { -+ struct printbuf data_has = PRINTBUF; -+ -+ prt_bitflags(&data_has, bch2_data_types, data); -+ bch_err(ca, "Remove failed, still has data (%s)", data_has.buf); -+ printbuf_exit(&data_has); -+ ret = -EBUSY; -+ goto err; -+ } -+ -+ __bch2_dev_offline(c, ca); -+ -+ mutex_lock(&c->sb_lock); -+ rcu_assign_pointer(c->devs[ca->dev_idx], NULL); -+ mutex_unlock(&c->sb_lock); -+ -+ percpu_ref_kill(&ca->ref); -+ wait_for_completion(&ca->ref_completion); -+ -+ bch2_dev_free(ca); -+ -+ /* -+ * At this point the device object has been removed in-core, but the -+ * on-disk journal might still refer to the device index via sb device -+ * usage entries. Recovery fails if it sees usage information for an -+ * invalid device. Flush journal pins to push the back of the journal -+ * past now invalid device index references before we update the -+ * superblock, but after the device object has been removed so any -+ * further journal writes elide usage info for the device. -+ */ -+ bch2_journal_flush_all_pins(&c->journal); -+ -+ /* -+ * Free this device's slot in the bch_member array - all pointers to -+ * this device must be gone: -+ */ -+ mutex_lock(&c->sb_lock); -+ m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx); -+ memset(&m->uuid, 0, sizeof(m->uuid)); -+ -+ bch2_write_super(c); -+ -+ mutex_unlock(&c->sb_lock); -+ up_write(&c->state_lock); -+ -+ bch2_dev_usage_journal_reserve(c); -+ return 0; -+err: -+ if (ca->mi.state == BCH_MEMBER_STATE_rw && -+ !percpu_ref_is_zero(&ca->io_ref)) -+ __bch2_dev_read_write(c, ca); -+ up_write(&c->state_lock); -+ return ret; -+} -+ -+/* Add new device to running filesystem: */ -+int bch2_dev_add(struct bch_fs *c, const char *path) -+{ -+ struct bch_opts opts = bch2_opts_empty(); -+ struct bch_sb_handle sb; -+ struct bch_dev *ca = NULL; -+ struct bch_sb_field_members_v2 *mi; -+ struct bch_member dev_mi; -+ unsigned dev_idx, nr_devices, u64s; -+ struct printbuf errbuf = PRINTBUF; -+ struct printbuf label = PRINTBUF; -+ int ret; -+ -+ ret = bch2_read_super(path, &opts, &sb); -+ if (ret) { -+ bch_err_msg(c, ret, "reading super"); -+ goto err; -+ } -+ -+ dev_mi = bch2_sb_member_get(sb.sb, sb.sb->dev_idx); -+ -+ if (BCH_MEMBER_GROUP(&dev_mi)) { -+ bch2_disk_path_to_text_sb(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1); -+ if (label.allocation_failure) { -+ ret = -ENOMEM; -+ goto err; -+ } -+ } -+ -+ ret = bch2_dev_may_add(sb.sb, c); -+ if (ret) { -+ bch_err_fn(c, ret); -+ goto err; -+ } -+ -+ ca = __bch2_dev_alloc(c, &dev_mi); -+ if (!ca) { -+ ret = -ENOMEM; -+ goto err; -+ } -+ -+ bch2_dev_usage_init(ca); -+ -+ ret = __bch2_dev_attach_bdev(ca, &sb); -+ if (ret) -+ goto err; -+ -+ ret = bch2_dev_journal_alloc(ca); -+ if (ret) { -+ bch_err_msg(c, ret, "allocating journal"); -+ goto err; -+ } -+ -+ down_write(&c->state_lock); -+ mutex_lock(&c->sb_lock); -+ -+ ret = bch2_sb_from_fs(c, ca); -+ if (ret) { -+ bch_err_msg(c, ret, "setting up new superblock"); -+ goto err_unlock; -+ } -+ -+ if (dynamic_fault("bcachefs:add:no_slot")) -+ goto no_slot; -+ -+ for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++) -+ if (!bch2_dev_exists(c->disk_sb.sb, dev_idx)) -+ goto have_slot; -+no_slot: -+ ret = -BCH_ERR_ENOSPC_sb_members; -+ bch_err_msg(c, ret, "setting up new superblock"); -+ goto err_unlock; -+ -+have_slot: -+ nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices); -+ -+ mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); -+ u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) + -+ le16_to_cpu(mi->member_bytes) * nr_devices, sizeof(u64)); -+ -+ mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s); -+ if (!mi) { -+ ret = -BCH_ERR_ENOSPC_sb_members; -+ bch_err_msg(c, ret, "setting up new superblock"); -+ goto err_unlock; -+ } -+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx); -+ -+ /* success: */ -+ -+ *m = dev_mi; -+ m->last_mount = cpu_to_le64(ktime_get_real_seconds()); -+ c->disk_sb.sb->nr_devices = nr_devices; -+ -+ ca->disk_sb.sb->dev_idx = dev_idx; -+ bch2_dev_attach(c, ca, dev_idx); -+ -+ if (BCH_MEMBER_GROUP(&dev_mi)) { -+ ret = __bch2_dev_group_set(c, ca, label.buf); -+ if (ret) { -+ bch_err_msg(c, ret, "creating new label"); -+ goto err_unlock; -+ } -+ } -+ -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ -+ bch2_dev_usage_journal_reserve(c); -+ -+ ret = bch2_trans_mark_dev_sb(c, ca); -+ if (ret) { -+ bch_err_msg(ca, ret, "marking new superblock"); -+ goto err_late; -+ } -+ -+ ret = bch2_fs_freespace_init(c); -+ if (ret) { -+ bch_err_msg(ca, ret, "initializing free space"); -+ goto err_late; -+ } -+ -+ ca->new_fs_bucket_idx = 0; -+ -+ if (ca->mi.state == BCH_MEMBER_STATE_rw) -+ __bch2_dev_read_write(c, ca); -+ -+ up_write(&c->state_lock); -+ return 0; -+ -+err_unlock: -+ mutex_unlock(&c->sb_lock); -+ up_write(&c->state_lock); -+err: -+ if (ca) -+ bch2_dev_free(ca); -+ bch2_free_super(&sb); -+ printbuf_exit(&label); -+ printbuf_exit(&errbuf); -+ return ret; -+err_late: -+ up_write(&c->state_lock); -+ ca = NULL; -+ goto err; -+} -+ -+/* Hot add existing device to running filesystem: */ -+int bch2_dev_online(struct bch_fs *c, const char *path) -+{ -+ struct bch_opts opts = bch2_opts_empty(); -+ struct bch_sb_handle sb = { NULL }; -+ struct bch_dev *ca; -+ unsigned dev_idx; -+ int ret; -+ -+ down_write(&c->state_lock); -+ -+ ret = bch2_read_super(path, &opts, &sb); -+ if (ret) { -+ up_write(&c->state_lock); -+ return ret; -+ } -+ -+ dev_idx = sb.sb->dev_idx; -+ -+ ret = bch2_dev_in_fs(c->disk_sb.sb, sb.sb); -+ if (ret) { -+ bch_err_msg(c, ret, "bringing %s online", path); -+ goto err; -+ } -+ -+ ret = bch2_dev_attach_bdev(c, &sb); -+ if (ret) -+ goto err; -+ -+ ca = bch_dev_locked(c, dev_idx); -+ -+ ret = bch2_trans_mark_dev_sb(c, ca); -+ if (ret) { -+ bch_err_msg(c, ret, "bringing %s online: error from bch2_trans_mark_dev_sb", path); -+ goto err; -+ } -+ -+ if (ca->mi.state == BCH_MEMBER_STATE_rw) -+ __bch2_dev_read_write(c, ca); -+ -+ if (!ca->mi.freespace_initialized) { -+ ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); -+ bch_err_msg(ca, ret, "initializing free space"); -+ if (ret) -+ goto err; -+ } -+ -+ if (!ca->journal.nr) { -+ ret = bch2_dev_journal_alloc(ca); -+ bch_err_msg(ca, ret, "allocating journal"); -+ if (ret) -+ goto err; -+ } -+ -+ mutex_lock(&c->sb_lock); -+ bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = -+ cpu_to_le64(ktime_get_real_seconds()); -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ -+ up_write(&c->state_lock); -+ return 0; -+err: -+ up_write(&c->state_lock); -+ bch2_free_super(&sb); -+ return ret; -+} -+ -+int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) -+{ -+ down_write(&c->state_lock); -+ -+ if (!bch2_dev_is_online(ca)) { -+ bch_err(ca, "Already offline"); -+ up_write(&c->state_lock); -+ return 0; -+ } -+ -+ if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) { -+ bch_err(ca, "Cannot offline required disk"); -+ up_write(&c->state_lock); -+ return -BCH_ERR_device_state_not_allowed; -+ } -+ -+ __bch2_dev_offline(c, ca); -+ -+ up_write(&c->state_lock); -+ return 0; -+} -+ -+int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) -+{ -+ struct bch_member *m; -+ u64 old_nbuckets; -+ int ret = 0; -+ -+ down_write(&c->state_lock); -+ old_nbuckets = ca->mi.nbuckets; -+ -+ if (nbuckets < ca->mi.nbuckets) { -+ bch_err(ca, "Cannot shrink yet"); -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ if (bch2_dev_is_online(ca) && -+ get_capacity(ca->disk_sb.bdev->bd_disk) < -+ ca->mi.bucket_size * nbuckets) { -+ bch_err(ca, "New size larger than device"); -+ ret = -BCH_ERR_device_size_too_small; -+ goto err; -+ } -+ -+ ret = bch2_dev_buckets_resize(c, ca, nbuckets); -+ if (ret) { -+ bch_err_msg(ca, ret, "resizing buckets"); -+ goto err; -+ } -+ -+ ret = bch2_trans_mark_dev_sb(c, ca); -+ if (ret) -+ goto err; -+ -+ mutex_lock(&c->sb_lock); -+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -+ m->nbuckets = cpu_to_le64(nbuckets); -+ -+ bch2_write_super(c); -+ mutex_unlock(&c->sb_lock); -+ -+ if (ca->mi.freespace_initialized) { -+ ret = bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets); -+ if (ret) -+ goto err; -+ -+ /* -+ * XXX: this is all wrong transactionally - we'll be able to do -+ * this correctly after the disk space accounting rewrite -+ */ -+ ca->usage_base->d[BCH_DATA_free].buckets += nbuckets - old_nbuckets; -+ } -+ -+ bch2_recalc_capacity(c); -+err: -+ up_write(&c->state_lock); -+ return ret; -+} -+ -+/* return with ref on ca->ref: */ -+struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name) -+{ -+ struct bch_dev *ca; -+ unsigned i; -+ -+ rcu_read_lock(); -+ for_each_member_device_rcu(ca, c, i, NULL) -+ if (!strcmp(name, ca->name)) -+ goto found; -+ ca = ERR_PTR(-BCH_ERR_ENOENT_dev_not_found); -+found: -+ rcu_read_unlock(); -+ -+ return ca; -+} -+ -+/* Filesystem open: */ -+ -+struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, -+ struct bch_opts opts) -+{ -+ DARRAY(struct bch_sb_handle) sbs = { 0 }; -+ struct bch_fs *c = NULL; -+ struct bch_sb_handle *sb, *best = NULL; -+ struct printbuf errbuf = PRINTBUF; -+ int ret = 0; -+ -+ if (!try_module_get(THIS_MODULE)) -+ return ERR_PTR(-ENODEV); -+ -+ if (!nr_devices) { -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ ret = darray_make_room(&sbs, nr_devices); -+ if (ret) -+ goto err; -+ -+ for (unsigned i = 0; i < nr_devices; i++) { -+ struct bch_sb_handle sb = { NULL }; -+ -+ ret = bch2_read_super(devices[i], &opts, &sb); -+ if (ret) -+ goto err; -+ -+ BUG_ON(darray_push(&sbs, sb)); -+ } -+ -+ darray_for_each(sbs, sb) -+ if (!best || le64_to_cpu(sb->sb->seq) > le64_to_cpu(best->sb->seq)) -+ best = sb; -+ -+ darray_for_each_reverse(sbs, sb) { -+ if (sb != best && !bch2_dev_exists(best->sb, sb->sb->dev_idx)) { -+ pr_info("%pg has been removed, skipping", sb->bdev); -+ bch2_free_super(sb); -+ darray_remove_item(&sbs, sb); -+ best -= best > sb; -+ continue; -+ } -+ -+ ret = bch2_dev_in_fs(best->sb, sb->sb); -+ if (ret) -+ goto err_print; -+ } -+ -+ c = bch2_fs_alloc(best->sb, opts); -+ ret = PTR_ERR_OR_ZERO(c); -+ if (ret) -+ goto err; -+ -+ down_write(&c->state_lock); -+ darray_for_each(sbs, sb) { -+ ret = bch2_dev_attach_bdev(c, sb); -+ if (ret) { -+ up_write(&c->state_lock); -+ goto err; -+ } -+ } -+ up_write(&c->state_lock); -+ -+ if (!bch2_fs_may_start(c)) { -+ ret = -BCH_ERR_insufficient_devices_to_start; -+ goto err_print; -+ } -+ -+ if (!c->opts.nostart) { -+ ret = bch2_fs_start(c); -+ if (ret) -+ goto err; -+ } -+out: -+ darray_for_each(sbs, sb) -+ bch2_free_super(sb); -+ darray_exit(&sbs); -+ printbuf_exit(&errbuf); -+ module_put(THIS_MODULE); -+ return c; -+err_print: -+ pr_err("bch_fs_open err opening %s: %s", -+ devices[0], bch2_err_str(ret)); -+err: -+ if (!IS_ERR_OR_NULL(c)) -+ bch2_fs_stop(c); -+ c = ERR_PTR(ret); -+ goto out; -+} -+ -+/* Global interfaces/init */ -+ -+static void bcachefs_exit(void) -+{ -+ bch2_debug_exit(); -+ bch2_vfs_exit(); -+ bch2_chardev_exit(); -+ bch2_btree_key_cache_exit(); -+ if (bcachefs_kset) -+ kset_unregister(bcachefs_kset); -+} -+ -+static int __init bcachefs_init(void) -+{ -+ bch2_bkey_pack_test(); -+ -+ if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) || -+ bch2_btree_key_cache_init() || -+ bch2_chardev_init() || -+ bch2_vfs_init() || -+ bch2_debug_init()) -+ goto err; -+ -+ return 0; -+err: -+ bcachefs_exit(); -+ return -ENOMEM; -+} -+ -+#define BCH_DEBUG_PARAM(name, description) \ -+ bool bch2_##name; \ -+ module_param_named(name, bch2_##name, bool, 0644); \ -+ MODULE_PARM_DESC(name, description); -+BCH_DEBUG_PARAMS() -+#undef BCH_DEBUG_PARAM -+ -+__maybe_unused -+static unsigned bch2_metadata_version = bcachefs_metadata_version_current; -+module_param_named(version, bch2_metadata_version, uint, 0400); -+ -+module_exit(bcachefs_exit); -+module_init(bcachefs_init); -diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h -new file mode 100644 -index 000000000000..bf762df18012 ---- /dev/null -+++ b/fs/bcachefs/super.h -@@ -0,0 +1,52 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_SUPER_H -+#define _BCACHEFS_SUPER_H -+ -+#include "extents.h" -+ -+#include "bcachefs_ioctl.h" -+ -+#include -+ -+struct bch_fs *bch2_dev_to_fs(dev_t); -+struct bch_fs *bch2_uuid_to_fs(__uuid_t); -+ -+bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *, -+ enum bch_member_state, int); -+int __bch2_dev_set_state(struct bch_fs *, struct bch_dev *, -+ enum bch_member_state, int); -+int bch2_dev_set_state(struct bch_fs *, struct bch_dev *, -+ enum bch_member_state, int); -+ -+int bch2_dev_fail(struct bch_dev *, int); -+int bch2_dev_remove(struct bch_fs *, struct bch_dev *, int); -+int bch2_dev_add(struct bch_fs *, const char *); -+int bch2_dev_online(struct bch_fs *, const char *); -+int bch2_dev_offline(struct bch_fs *, struct bch_dev *, int); -+int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64); -+struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *); -+ -+bool bch2_fs_emergency_read_only(struct bch_fs *); -+void bch2_fs_read_only(struct bch_fs *); -+ -+int bch2_fs_read_write(struct bch_fs *); -+int bch2_fs_read_write_early(struct bch_fs *); -+ -+/* -+ * Only for use in the recovery/fsck path: -+ */ -+static inline void bch2_fs_lazy_rw(struct bch_fs *c) -+{ -+ if (!test_bit(BCH_FS_RW, &c->flags) && -+ !test_bit(BCH_FS_WAS_RW, &c->flags)) -+ bch2_fs_read_write_early(c); -+} -+ -+void __bch2_fs_stop(struct bch_fs *); -+void bch2_fs_free(struct bch_fs *); -+void bch2_fs_stop(struct bch_fs *); -+ -+int bch2_fs_start(struct bch_fs *); -+struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts); -+ -+#endif /* _BCACHEFS_SUPER_H */ -diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h -new file mode 100644 -index 000000000000..7dda4985b99f ---- /dev/null -+++ b/fs/bcachefs/super_types.h -@@ -0,0 +1,40 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_SUPER_TYPES_H -+#define _BCACHEFS_SUPER_TYPES_H -+ -+struct bch_sb_handle { -+ struct bch_sb *sb; -+ struct block_device *bdev; -+ struct bio *bio; -+ void *holder; -+ size_t buffer_size; -+ blk_mode_t mode; -+ unsigned have_layout:1; -+ unsigned have_bio:1; -+ unsigned fs_sb:1; -+ u64 seq; -+}; -+ -+struct bch_devs_mask { -+ unsigned long d[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)]; -+}; -+ -+struct bch_devs_list { -+ u8 nr; -+ u8 devs[BCH_BKEY_PTRS_MAX]; -+}; -+ -+struct bch_member_cpu { -+ u64 nbuckets; /* device size */ -+ u16 first_bucket; /* index of first bucket used */ -+ u16 bucket_size; /* sectors */ -+ u16 group; -+ u8 state; -+ u8 discard; -+ u8 data_allowed; -+ u8 durability; -+ u8 freespace_initialized; -+ u8 valid; -+}; -+ -+#endif /* _BCACHEFS_SUPER_TYPES_H */ -diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c -new file mode 100644 -index 000000000000..662366ce9e00 ---- /dev/null -+++ b/fs/bcachefs/sysfs.c -@@ -0,0 +1,1034 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * bcache sysfs interfaces -+ * -+ * Copyright 2010, 2011 Kent Overstreet -+ * Copyright 2012 Google, Inc. -+ */ -+ -+#ifndef NO_BCACHEFS_SYSFS -+ -+#include "bcachefs.h" -+#include "alloc_background.h" -+#include "alloc_foreground.h" -+#include "sysfs.h" -+#include "btree_cache.h" -+#include "btree_io.h" -+#include "btree_iter.h" -+#include "btree_key_cache.h" -+#include "btree_update.h" -+#include "btree_update_interior.h" -+#include "btree_gc.h" -+#include "buckets.h" -+#include "clock.h" -+#include "disk_groups.h" -+#include "ec.h" -+#include "inode.h" -+#include "journal.h" -+#include "keylist.h" -+#include "move.h" -+#include "movinggc.h" -+#include "nocow_locking.h" -+#include "opts.h" -+#include "rebalance.h" -+#include "replicas.h" -+#include "super-io.h" -+#include "tests.h" -+ -+#include -+#include -+#include -+ -+#include "util.h" -+ -+#define SYSFS_OPS(type) \ -+const struct sysfs_ops type ## _sysfs_ops = { \ -+ .show = type ## _show, \ -+ .store = type ## _store \ -+} -+ -+#define SHOW(fn) \ -+static ssize_t fn ## _to_text(struct printbuf *, \ -+ struct kobject *, struct attribute *); \ -+ \ -+static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ -+ char *buf) \ -+{ \ -+ struct printbuf out = PRINTBUF; \ -+ ssize_t ret = fn ## _to_text(&out, kobj, attr); \ -+ \ -+ if (out.pos && out.buf[out.pos - 1] != '\n') \ -+ prt_newline(&out); \ -+ \ -+ if (!ret && out.allocation_failure) \ -+ ret = -ENOMEM; \ -+ \ -+ if (!ret) { \ -+ ret = min_t(size_t, out.pos, PAGE_SIZE - 1); \ -+ memcpy(buf, out.buf, ret); \ -+ } \ -+ printbuf_exit(&out); \ -+ return bch2_err_class(ret); \ -+} \ -+ \ -+static ssize_t fn ## _to_text(struct printbuf *out, struct kobject *kobj,\ -+ struct attribute *attr) -+ -+#define STORE(fn) \ -+static ssize_t fn ## _store_inner(struct kobject *, struct attribute *,\ -+ const char *, size_t); \ -+ \ -+static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\ -+ const char *buf, size_t size) \ -+{ \ -+ return bch2_err_class(fn##_store_inner(kobj, attr, buf, size)); \ -+} \ -+ \ -+static ssize_t fn ## _store_inner(struct kobject *kobj, struct attribute *attr,\ -+ const char *buf, size_t size) -+ -+#define __sysfs_attribute(_name, _mode) \ -+ static struct attribute sysfs_##_name = \ -+ { .name = #_name, .mode = _mode } -+ -+#define write_attribute(n) __sysfs_attribute(n, 0200) -+#define read_attribute(n) __sysfs_attribute(n, 0444) -+#define rw_attribute(n) __sysfs_attribute(n, 0644) -+ -+#define sysfs_printf(file, fmt, ...) \ -+do { \ -+ if (attr == &sysfs_ ## file) \ -+ prt_printf(out, fmt "\n", __VA_ARGS__); \ -+} while (0) -+ -+#define sysfs_print(file, var) \ -+do { \ -+ if (attr == &sysfs_ ## file) \ -+ snprint(out, var); \ -+} while (0) -+ -+#define sysfs_hprint(file, val) \ -+do { \ -+ if (attr == &sysfs_ ## file) \ -+ prt_human_readable_s64(out, val); \ -+} while (0) -+ -+#define sysfs_strtoul(file, var) \ -+do { \ -+ if (attr == &sysfs_ ## file) \ -+ return strtoul_safe(buf, var) ?: (ssize_t) size; \ -+} while (0) -+ -+#define sysfs_strtoul_clamp(file, var, min, max) \ -+do { \ -+ if (attr == &sysfs_ ## file) \ -+ return strtoul_safe_clamp(buf, var, min, max) \ -+ ?: (ssize_t) size; \ -+} while (0) -+ -+#define strtoul_or_return(cp) \ -+({ \ -+ unsigned long _v; \ -+ int _r = kstrtoul(cp, 10, &_v); \ -+ if (_r) \ -+ return _r; \ -+ _v; \ -+}) -+ -+write_attribute(trigger_gc); -+write_attribute(trigger_discards); -+write_attribute(trigger_invalidates); -+write_attribute(prune_cache); -+write_attribute(btree_wakeup); -+rw_attribute(btree_gc_periodic); -+rw_attribute(gc_gens_pos); -+ -+read_attribute(uuid); -+read_attribute(minor); -+read_attribute(bucket_size); -+read_attribute(first_bucket); -+read_attribute(nbuckets); -+rw_attribute(durability); -+read_attribute(io_done); -+read_attribute(io_errors); -+write_attribute(io_errors_reset); -+ -+read_attribute(io_latency_read); -+read_attribute(io_latency_write); -+read_attribute(io_latency_stats_read); -+read_attribute(io_latency_stats_write); -+read_attribute(congested); -+ -+read_attribute(btree_write_stats); -+ -+read_attribute(btree_cache_size); -+read_attribute(compression_stats); -+read_attribute(journal_debug); -+read_attribute(btree_updates); -+read_attribute(btree_cache); -+read_attribute(btree_key_cache); -+read_attribute(stripes_heap); -+read_attribute(open_buckets); -+read_attribute(open_buckets_partial); -+read_attribute(write_points); -+read_attribute(nocow_lock_table); -+ -+#ifdef BCH_WRITE_REF_DEBUG -+read_attribute(write_refs); -+ -+static const char * const bch2_write_refs[] = { -+#define x(n) #n, -+ BCH_WRITE_REFS() -+#undef x -+ NULL -+}; -+ -+static void bch2_write_refs_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ bch2_printbuf_tabstop_push(out, 24); -+ -+ for (unsigned i = 0; i < ARRAY_SIZE(c->writes); i++) { -+ prt_str(out, bch2_write_refs[i]); -+ prt_tab(out); -+ prt_printf(out, "%li", atomic_long_read(&c->writes[i])); -+ prt_newline(out); -+ } -+} -+#endif -+ -+read_attribute(internal_uuid); -+read_attribute(disk_groups); -+ -+read_attribute(has_data); -+read_attribute(alloc_debug); -+ -+#define x(t, n, ...) read_attribute(t); -+BCH_PERSISTENT_COUNTERS() -+#undef x -+ -+rw_attribute(discard); -+rw_attribute(label); -+ -+rw_attribute(copy_gc_enabled); -+read_attribute(copy_gc_wait); -+ -+rw_attribute(rebalance_enabled); -+sysfs_pd_controller_attribute(rebalance); -+read_attribute(rebalance_status); -+rw_attribute(promote_whole_extents); -+ -+read_attribute(new_stripes); -+ -+read_attribute(io_timers_read); -+read_attribute(io_timers_write); -+ -+read_attribute(moving_ctxts); -+ -+#ifdef CONFIG_BCACHEFS_TESTS -+write_attribute(perf_test); -+#endif /* CONFIG_BCACHEFS_TESTS */ -+ -+#define x(_name) \ -+ static struct attribute sysfs_time_stat_##_name = \ -+ { .name = #_name, .mode = 0444 }; -+ BCH_TIME_STATS() -+#undef x -+ -+static struct attribute sysfs_state_rw = { -+ .name = "state", -+ .mode = 0444, -+}; -+ -+static size_t bch2_btree_cache_size(struct bch_fs *c) -+{ -+ size_t ret = 0; -+ struct btree *b; -+ -+ mutex_lock(&c->btree_cache.lock); -+ list_for_each_entry(b, &c->btree_cache.live, list) -+ ret += btree_bytes(c); -+ -+ mutex_unlock(&c->btree_cache.lock); -+ return ret; -+} -+ -+static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ enum btree_id id; -+ u64 nr_uncompressed_extents = 0, -+ nr_compressed_extents = 0, -+ nr_incompressible_extents = 0, -+ uncompressed_sectors = 0, -+ incompressible_sectors = 0, -+ compressed_sectors_compressed = 0, -+ compressed_sectors_uncompressed = 0; -+ int ret = 0; -+ -+ if (!test_bit(BCH_FS_STARTED, &c->flags)) -+ return -EPERM; -+ -+ trans = bch2_trans_get(c); -+ -+ for (id = 0; id < BTREE_ID_NR; id++) { -+ if (!btree_type_has_ptrs(id)) -+ continue; -+ -+ for_each_btree_key(trans, iter, id, POS_MIN, -+ BTREE_ITER_ALL_SNAPSHOTS, k, ret) { -+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); -+ const union bch_extent_entry *entry; -+ struct extent_ptr_decoded p; -+ bool compressed = false, uncompressed = false, incompressible = false; -+ -+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { -+ switch (p.crc.compression_type) { -+ case BCH_COMPRESSION_TYPE_none: -+ uncompressed = true; -+ uncompressed_sectors += k.k->size; -+ break; -+ case BCH_COMPRESSION_TYPE_incompressible: -+ incompressible = true; -+ incompressible_sectors += k.k->size; -+ break; -+ default: -+ compressed_sectors_compressed += -+ p.crc.compressed_size; -+ compressed_sectors_uncompressed += -+ p.crc.uncompressed_size; -+ compressed = true; -+ break; -+ } -+ } -+ -+ if (incompressible) -+ nr_incompressible_extents++; -+ else if (uncompressed) -+ nr_uncompressed_extents++; -+ else if (compressed) -+ nr_compressed_extents++; -+ } -+ bch2_trans_iter_exit(trans, &iter); -+ } -+ -+ bch2_trans_put(trans); -+ -+ if (ret) -+ return ret; -+ -+ prt_printf(out, "uncompressed:\n"); -+ prt_printf(out, " nr extents: %llu\n", nr_uncompressed_extents); -+ prt_printf(out, " size: "); -+ prt_human_readable_u64(out, uncompressed_sectors << 9); -+ prt_printf(out, "\n"); -+ -+ prt_printf(out, "compressed:\n"); -+ prt_printf(out, " nr extents: %llu\n", nr_compressed_extents); -+ prt_printf(out, " compressed size: "); -+ prt_human_readable_u64(out, compressed_sectors_compressed << 9); -+ prt_printf(out, "\n"); -+ prt_printf(out, " uncompressed size: "); -+ prt_human_readable_u64(out, compressed_sectors_uncompressed << 9); -+ prt_printf(out, "\n"); -+ -+ prt_printf(out, "incompressible:\n"); -+ prt_printf(out, " nr extents: %llu\n", nr_incompressible_extents); -+ prt_printf(out, " size: "); -+ prt_human_readable_u64(out, incompressible_sectors << 9); -+ prt_printf(out, "\n"); -+ return 0; -+} -+ -+static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c) -+{ -+ prt_printf(out, "%s: ", bch2_btree_id_str(c->gc_gens_btree)); -+ bch2_bpos_to_text(out, c->gc_gens_pos); -+ prt_printf(out, "\n"); -+} -+ -+static void bch2_btree_wakeup_all(struct bch_fs *c) -+{ -+ struct btree_trans *trans; -+ -+ seqmutex_lock(&c->btree_trans_lock); -+ list_for_each_entry(trans, &c->btree_trans_list, list) { -+ struct btree_bkey_cached_common *b = READ_ONCE(trans->locking); -+ -+ if (b) -+ six_lock_wakeup_all(&b->lock); -+ -+ } -+ seqmutex_unlock(&c->btree_trans_lock); -+} -+ -+SHOW(bch2_fs) -+{ -+ struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); -+ -+ sysfs_print(minor, c->minor); -+ sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b); -+ -+ sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c)); -+ -+ if (attr == &sysfs_btree_write_stats) -+ bch2_btree_write_stats_to_text(out, c); -+ -+ sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic); -+ -+ if (attr == &sysfs_gc_gens_pos) -+ bch2_gc_gens_pos_to_text(out, c); -+ -+ sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); -+ -+ sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled); -+ sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */ -+ -+ if (attr == &sysfs_copy_gc_wait) -+ bch2_copygc_wait_to_text(out, c); -+ -+ if (attr == &sysfs_rebalance_status) -+ bch2_rebalance_status_to_text(out, c); -+ -+ sysfs_print(promote_whole_extents, c->promote_whole_extents); -+ -+ /* Debugging: */ -+ -+ if (attr == &sysfs_journal_debug) -+ bch2_journal_debug_to_text(out, &c->journal); -+ -+ if (attr == &sysfs_btree_updates) -+ bch2_btree_updates_to_text(out, c); -+ -+ if (attr == &sysfs_btree_cache) -+ bch2_btree_cache_to_text(out, c); -+ -+ if (attr == &sysfs_btree_key_cache) -+ bch2_btree_key_cache_to_text(out, &c->btree_key_cache); -+ -+ if (attr == &sysfs_stripes_heap) -+ bch2_stripes_heap_to_text(out, c); -+ -+ if (attr == &sysfs_open_buckets) -+ bch2_open_buckets_to_text(out, c); -+ -+ if (attr == &sysfs_open_buckets_partial) -+ bch2_open_buckets_partial_to_text(out, c); -+ -+ if (attr == &sysfs_write_points) -+ bch2_write_points_to_text(out, c); -+ -+ if (attr == &sysfs_compression_stats) -+ bch2_compression_stats_to_text(out, c); -+ -+ if (attr == &sysfs_new_stripes) -+ bch2_new_stripes_to_text(out, c); -+ -+ if (attr == &sysfs_io_timers_read) -+ bch2_io_timers_to_text(out, &c->io_clock[READ]); -+ -+ if (attr == &sysfs_io_timers_write) -+ bch2_io_timers_to_text(out, &c->io_clock[WRITE]); -+ -+ if (attr == &sysfs_moving_ctxts) -+ bch2_fs_moving_ctxts_to_text(out, c); -+ -+#ifdef BCH_WRITE_REF_DEBUG -+ if (attr == &sysfs_write_refs) -+ bch2_write_refs_to_text(out, c); -+#endif -+ -+ if (attr == &sysfs_nocow_lock_table) -+ bch2_nocow_locks_to_text(out, &c->nocow_locks); -+ -+ if (attr == &sysfs_disk_groups) -+ bch2_disk_groups_to_text(out, c); -+ -+ return 0; -+} -+ -+STORE(bch2_fs) -+{ -+ struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); -+ -+ if (attr == &sysfs_btree_gc_periodic) { -+ ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic) -+ ?: (ssize_t) size; -+ -+ wake_up_process(c->gc_thread); -+ return ret; -+ } -+ -+ if (attr == &sysfs_copy_gc_enabled) { -+ ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled) -+ ?: (ssize_t) size; -+ -+ if (c->copygc_thread) -+ wake_up_process(c->copygc_thread); -+ return ret; -+ } -+ -+ if (attr == &sysfs_rebalance_enabled) { -+ ssize_t ret = strtoul_safe(buf, c->rebalance.enabled) -+ ?: (ssize_t) size; -+ -+ rebalance_wakeup(c); -+ return ret; -+ } -+ -+ sysfs_pd_controller_store(rebalance, &c->rebalance.pd); -+ -+ sysfs_strtoul(promote_whole_extents, c->promote_whole_extents); -+ -+ /* Debugging: */ -+ -+ if (!test_bit(BCH_FS_STARTED, &c->flags)) -+ return -EPERM; -+ -+ /* Debugging: */ -+ -+ if (!test_bit(BCH_FS_RW, &c->flags)) -+ return -EROFS; -+ -+ if (attr == &sysfs_prune_cache) { -+ struct shrink_control sc; -+ -+ sc.gfp_mask = GFP_KERNEL; -+ sc.nr_to_scan = strtoul_or_return(buf); -+ c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc); -+ } -+ -+ if (attr == &sysfs_btree_wakeup) -+ bch2_btree_wakeup_all(c); -+ -+ if (attr == &sysfs_trigger_gc) { -+ /* -+ * Full gc is currently incompatible with btree key cache: -+ */ -+#if 0 -+ down_read(&c->state_lock); -+ bch2_gc(c, false, false); -+ up_read(&c->state_lock); -+#else -+ bch2_gc_gens(c); -+#endif -+ } -+ -+ if (attr == &sysfs_trigger_discards) -+ bch2_do_discards(c); -+ -+ if (attr == &sysfs_trigger_invalidates) -+ bch2_do_invalidates(c); -+ -+#ifdef CONFIG_BCACHEFS_TESTS -+ if (attr == &sysfs_perf_test) { -+ char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp; -+ char *test = strsep(&p, " \t\n"); -+ char *nr_str = strsep(&p, " \t\n"); -+ char *threads_str = strsep(&p, " \t\n"); -+ unsigned threads; -+ u64 nr; -+ int ret = -EINVAL; -+ -+ if (threads_str && -+ !(ret = kstrtouint(threads_str, 10, &threads)) && -+ !(ret = bch2_strtoull_h(nr_str, &nr))) -+ ret = bch2_btree_perf_test(c, test, nr, threads); -+ kfree(tmp); -+ -+ if (ret) -+ size = ret; -+ } -+#endif -+ return size; -+} -+SYSFS_OPS(bch2_fs); -+ -+struct attribute *bch2_fs_files[] = { -+ &sysfs_minor, -+ &sysfs_btree_cache_size, -+ &sysfs_btree_write_stats, -+ -+ &sysfs_promote_whole_extents, -+ -+ &sysfs_compression_stats, -+ -+#ifdef CONFIG_BCACHEFS_TESTS -+ &sysfs_perf_test, -+#endif -+ NULL -+}; -+ -+/* counters dir */ -+ -+SHOW(bch2_fs_counters) -+{ -+ struct bch_fs *c = container_of(kobj, struct bch_fs, counters_kobj); -+ u64 counter = 0; -+ u64 counter_since_mount = 0; -+ -+ printbuf_tabstop_push(out, 32); -+ -+ #define x(t, ...) \ -+ if (attr == &sysfs_##t) { \ -+ counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\ -+ counter_since_mount = counter - c->counters_on_mount[BCH_COUNTER_##t];\ -+ prt_printf(out, "since mount:"); \ -+ prt_tab(out); \ -+ prt_human_readable_u64(out, counter_since_mount); \ -+ prt_newline(out); \ -+ \ -+ prt_printf(out, "since filesystem creation:"); \ -+ prt_tab(out); \ -+ prt_human_readable_u64(out, counter); \ -+ prt_newline(out); \ -+ } -+ BCH_PERSISTENT_COUNTERS() -+ #undef x -+ return 0; -+} -+ -+STORE(bch2_fs_counters) { -+ return 0; -+} -+ -+SYSFS_OPS(bch2_fs_counters); -+ -+struct attribute *bch2_fs_counters_files[] = { -+#define x(t, ...) \ -+ &sysfs_##t, -+ BCH_PERSISTENT_COUNTERS() -+#undef x -+ NULL -+}; -+/* internal dir - just a wrapper */ -+ -+SHOW(bch2_fs_internal) -+{ -+ struct bch_fs *c = container_of(kobj, struct bch_fs, internal); -+ -+ return bch2_fs_to_text(out, &c->kobj, attr); -+} -+ -+STORE(bch2_fs_internal) -+{ -+ struct bch_fs *c = container_of(kobj, struct bch_fs, internal); -+ -+ return bch2_fs_store(&c->kobj, attr, buf, size); -+} -+SYSFS_OPS(bch2_fs_internal); -+ -+struct attribute *bch2_fs_internal_files[] = { -+ &sysfs_journal_debug, -+ &sysfs_btree_updates, -+ &sysfs_btree_cache, -+ &sysfs_btree_key_cache, -+ &sysfs_new_stripes, -+ &sysfs_stripes_heap, -+ &sysfs_open_buckets, -+ &sysfs_open_buckets_partial, -+ &sysfs_write_points, -+#ifdef BCH_WRITE_REF_DEBUG -+ &sysfs_write_refs, -+#endif -+ &sysfs_nocow_lock_table, -+ &sysfs_io_timers_read, -+ &sysfs_io_timers_write, -+ -+ &sysfs_trigger_gc, -+ &sysfs_trigger_discards, -+ &sysfs_trigger_invalidates, -+ &sysfs_prune_cache, -+ &sysfs_btree_wakeup, -+ -+ &sysfs_gc_gens_pos, -+ -+ &sysfs_copy_gc_enabled, -+ &sysfs_copy_gc_wait, -+ -+ &sysfs_rebalance_enabled, -+ &sysfs_rebalance_status, -+ sysfs_pd_controller_files(rebalance), -+ -+ &sysfs_moving_ctxts, -+ -+ &sysfs_internal_uuid, -+ -+ &sysfs_disk_groups, -+ NULL -+}; -+ -+/* options */ -+ -+SHOW(bch2_fs_opts_dir) -+{ -+ struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir); -+ const struct bch_option *opt = container_of(attr, struct bch_option, attr); -+ int id = opt - bch2_opt_table; -+ u64 v = bch2_opt_get_by_id(&c->opts, id); -+ -+ bch2_opt_to_text(out, c, c->disk_sb.sb, opt, v, OPT_SHOW_FULL_LIST); -+ prt_char(out, '\n'); -+ -+ return 0; -+} -+ -+STORE(bch2_fs_opts_dir) -+{ -+ struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir); -+ const struct bch_option *opt = container_of(attr, struct bch_option, attr); -+ int ret, id = opt - bch2_opt_table; -+ char *tmp; -+ u64 v; -+ -+ /* -+ * We don't need to take c->writes for correctness, but it eliminates an -+ * unsightly error message in the dmesg log when we're RO: -+ */ -+ if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))) -+ return -EROFS; -+ -+ tmp = kstrdup(buf, GFP_KERNEL); -+ if (!tmp) { -+ ret = -ENOMEM; -+ goto err; -+ } -+ -+ ret = bch2_opt_parse(c, opt, strim(tmp), &v, NULL); -+ kfree(tmp); -+ -+ if (ret < 0) -+ goto err; -+ -+ ret = bch2_opt_check_may_set(c, id, v); -+ if (ret < 0) -+ goto err; -+ -+ bch2_opt_set_sb(c, opt, v); -+ bch2_opt_set_by_id(&c->opts, id, v); -+ -+ if ((id == Opt_background_target || -+ id == Opt_background_compression) && v) -+ bch2_set_rebalance_needs_scan(c, 0); -+ -+ ret = size; -+err: -+ bch2_write_ref_put(c, BCH_WRITE_REF_sysfs); -+ return ret; -+} -+SYSFS_OPS(bch2_fs_opts_dir); -+ -+struct attribute *bch2_fs_opts_dir_files[] = { NULL }; -+ -+int bch2_opts_create_sysfs_files(struct kobject *kobj) -+{ -+ const struct bch_option *i; -+ int ret; -+ -+ for (i = bch2_opt_table; -+ i < bch2_opt_table + bch2_opts_nr; -+ i++) { -+ if (!(i->flags & OPT_FS)) -+ continue; -+ -+ ret = sysfs_create_file(kobj, &i->attr); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+/* time stats */ -+ -+SHOW(bch2_fs_time_stats) -+{ -+ struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats); -+ -+#define x(name) \ -+ if (attr == &sysfs_time_stat_##name) \ -+ bch2_time_stats_to_text(out, &c->times[BCH_TIME_##name]); -+ BCH_TIME_STATS() -+#undef x -+ -+ return 0; -+} -+ -+STORE(bch2_fs_time_stats) -+{ -+ return size; -+} -+SYSFS_OPS(bch2_fs_time_stats); -+ -+struct attribute *bch2_fs_time_stats_files[] = { -+#define x(name) \ -+ &sysfs_time_stat_##name, -+ BCH_TIME_STATS() -+#undef x -+ NULL -+}; -+ -+static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) -+{ -+ struct bch_fs *c = ca->fs; -+ struct bch_dev_usage stats = bch2_dev_usage_read(ca); -+ unsigned i, nr[BCH_DATA_NR]; -+ -+ memset(nr, 0, sizeof(nr)); -+ -+ for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++) -+ nr[c->open_buckets[i].data_type]++; -+ -+ printbuf_tabstop_push(out, 8); -+ printbuf_tabstop_push(out, 16); -+ printbuf_tabstop_push(out, 16); -+ printbuf_tabstop_push(out, 16); -+ printbuf_tabstop_push(out, 16); -+ -+ prt_tab(out); -+ prt_str(out, "buckets"); -+ prt_tab_rjust(out); -+ prt_str(out, "sectors"); -+ prt_tab_rjust(out); -+ prt_str(out, "fragmented"); -+ prt_tab_rjust(out); -+ prt_newline(out); -+ -+ for (i = 0; i < BCH_DATA_NR; i++) { -+ prt_str(out, bch2_data_types[i]); -+ prt_tab(out); -+ prt_u64(out, stats.d[i].buckets); -+ prt_tab_rjust(out); -+ prt_u64(out, stats.d[i].sectors); -+ prt_tab_rjust(out); -+ prt_u64(out, stats.d[i].fragmented); -+ prt_tab_rjust(out); -+ prt_newline(out); -+ } -+ -+ prt_str(out, "ec"); -+ prt_tab(out); -+ prt_u64(out, stats.buckets_ec); -+ prt_tab_rjust(out); -+ prt_newline(out); -+ -+ prt_newline(out); -+ -+ prt_printf(out, "reserves:"); -+ prt_newline(out); -+ for (i = 0; i < BCH_WATERMARK_NR; i++) { -+ prt_str(out, bch2_watermarks[i]); -+ prt_tab(out); -+ prt_u64(out, bch2_dev_buckets_reserved(ca, i)); -+ prt_tab_rjust(out); -+ prt_newline(out); -+ } -+ -+ prt_newline(out); -+ -+ printbuf_tabstops_reset(out); -+ printbuf_tabstop_push(out, 24); -+ -+ prt_str(out, "freelist_wait"); -+ prt_tab(out); -+ prt_str(out, c->freelist_wait.list.first ? "waiting" : "empty"); -+ prt_newline(out); -+ -+ prt_str(out, "open buckets allocated"); -+ prt_tab(out); -+ prt_u64(out, OPEN_BUCKETS_COUNT - c->open_buckets_nr_free); -+ prt_newline(out); -+ -+ prt_str(out, "open buckets this dev"); -+ prt_tab(out); -+ prt_u64(out, ca->nr_open_buckets); -+ prt_newline(out); -+ -+ prt_str(out, "open buckets total"); -+ prt_tab(out); -+ prt_u64(out, OPEN_BUCKETS_COUNT); -+ prt_newline(out); -+ -+ prt_str(out, "open_buckets_wait"); -+ prt_tab(out); -+ prt_str(out, c->open_buckets_wait.list.first ? "waiting" : "empty"); -+ prt_newline(out); -+ -+ prt_str(out, "open_buckets_btree"); -+ prt_tab(out); -+ prt_u64(out, nr[BCH_DATA_btree]); -+ prt_newline(out); -+ -+ prt_str(out, "open_buckets_user"); -+ prt_tab(out); -+ prt_u64(out, nr[BCH_DATA_user]); -+ prt_newline(out); -+ -+ prt_str(out, "buckets_to_invalidate"); -+ prt_tab(out); -+ prt_u64(out, should_invalidate_buckets(ca, stats)); -+ prt_newline(out); -+ -+ prt_str(out, "btree reserve cache"); -+ prt_tab(out); -+ prt_u64(out, c->btree_reserve_cache_nr); -+ prt_newline(out); -+} -+ -+static const char * const bch2_rw[] = { -+ "read", -+ "write", -+ NULL -+}; -+ -+static void dev_io_done_to_text(struct printbuf *out, struct bch_dev *ca) -+{ -+ int rw, i; -+ -+ for (rw = 0; rw < 2; rw++) { -+ prt_printf(out, "%s:\n", bch2_rw[rw]); -+ -+ for (i = 1; i < BCH_DATA_NR; i++) -+ prt_printf(out, "%-12s:%12llu\n", -+ bch2_data_types[i], -+ percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9); -+ } -+} -+ -+SHOW(bch2_dev) -+{ -+ struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); -+ struct bch_fs *c = ca->fs; -+ -+ sysfs_printf(uuid, "%pU\n", ca->uuid.b); -+ -+ sysfs_print(bucket_size, bucket_bytes(ca)); -+ sysfs_print(first_bucket, ca->mi.first_bucket); -+ sysfs_print(nbuckets, ca->mi.nbuckets); -+ sysfs_print(durability, ca->mi.durability); -+ sysfs_print(discard, ca->mi.discard); -+ -+ if (attr == &sysfs_label) { -+ if (ca->mi.group) -+ bch2_disk_path_to_text(out, c, ca->mi.group - 1); -+ prt_char(out, '\n'); -+ } -+ -+ if (attr == &sysfs_has_data) { -+ prt_bitflags(out, bch2_data_types, bch2_dev_has_data(c, ca)); -+ prt_char(out, '\n'); -+ } -+ -+ if (attr == &sysfs_state_rw) { -+ prt_string_option(out, bch2_member_states, ca->mi.state); -+ prt_char(out, '\n'); -+ } -+ -+ if (attr == &sysfs_io_done) -+ dev_io_done_to_text(out, ca); -+ -+ if (attr == &sysfs_io_errors) -+ bch2_dev_io_errors_to_text(out, ca); -+ -+ sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ])); -+ sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE])); -+ -+ if (attr == &sysfs_io_latency_stats_read) -+ bch2_time_stats_to_text(out, &ca->io_latency[READ]); -+ -+ if (attr == &sysfs_io_latency_stats_write) -+ bch2_time_stats_to_text(out, &ca->io_latency[WRITE]); -+ -+ sysfs_printf(congested, "%u%%", -+ clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX) -+ * 100 / CONGESTED_MAX); -+ -+ if (attr == &sysfs_alloc_debug) -+ dev_alloc_debug_to_text(out, ca); -+ -+ return 0; -+} -+ -+STORE(bch2_dev) -+{ -+ struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); -+ struct bch_fs *c = ca->fs; -+ struct bch_member *mi; -+ -+ if (attr == &sysfs_discard) { -+ bool v = strtoul_or_return(buf); -+ -+ mutex_lock(&c->sb_lock); -+ mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -+ -+ if (v != BCH_MEMBER_DISCARD(mi)) { -+ SET_BCH_MEMBER_DISCARD(mi, v); -+ bch2_write_super(c); -+ } -+ mutex_unlock(&c->sb_lock); -+ } -+ -+ if (attr == &sysfs_durability) { -+ u64 v = strtoul_or_return(buf); -+ -+ mutex_lock(&c->sb_lock); -+ mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); -+ -+ if (v + 1 != BCH_MEMBER_DURABILITY(mi)) { -+ SET_BCH_MEMBER_DURABILITY(mi, v + 1); -+ bch2_write_super(c); -+ } -+ mutex_unlock(&c->sb_lock); -+ } -+ -+ if (attr == &sysfs_label) { -+ char *tmp; -+ int ret; -+ -+ tmp = kstrdup(buf, GFP_KERNEL); -+ if (!tmp) -+ return -ENOMEM; -+ -+ ret = bch2_dev_group_set(c, ca, strim(tmp)); -+ kfree(tmp); -+ if (ret) -+ return ret; -+ } -+ -+ if (attr == &sysfs_io_errors_reset) -+ bch2_dev_errors_reset(ca); -+ -+ return size; -+} -+SYSFS_OPS(bch2_dev); -+ -+struct attribute *bch2_dev_files[] = { -+ &sysfs_uuid, -+ &sysfs_bucket_size, -+ &sysfs_first_bucket, -+ &sysfs_nbuckets, -+ &sysfs_durability, -+ -+ /* settings: */ -+ &sysfs_discard, -+ &sysfs_state_rw, -+ &sysfs_label, -+ -+ &sysfs_has_data, -+ &sysfs_io_done, -+ &sysfs_io_errors, -+ &sysfs_io_errors_reset, -+ -+ &sysfs_io_latency_read, -+ &sysfs_io_latency_write, -+ &sysfs_io_latency_stats_read, -+ &sysfs_io_latency_stats_write, -+ &sysfs_congested, -+ -+ /* debug: */ -+ &sysfs_alloc_debug, -+ NULL -+}; -+ -+#endif /* _BCACHEFS_SYSFS_H_ */ -diff --git a/fs/bcachefs/sysfs.h b/fs/bcachefs/sysfs.h -new file mode 100644 -index 000000000000..222cd5062702 ---- /dev/null -+++ b/fs/bcachefs/sysfs.h -@@ -0,0 +1,48 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_SYSFS_H_ -+#define _BCACHEFS_SYSFS_H_ -+ -+#include -+ -+#ifndef NO_BCACHEFS_SYSFS -+ -+struct attribute; -+struct sysfs_ops; -+ -+extern struct attribute *bch2_fs_files[]; -+extern struct attribute *bch2_fs_counters_files[]; -+extern struct attribute *bch2_fs_internal_files[]; -+extern struct attribute *bch2_fs_opts_dir_files[]; -+extern struct attribute *bch2_fs_time_stats_files[]; -+extern struct attribute *bch2_dev_files[]; -+ -+extern const struct sysfs_ops bch2_fs_sysfs_ops; -+extern const struct sysfs_ops bch2_fs_counters_sysfs_ops; -+extern const struct sysfs_ops bch2_fs_internal_sysfs_ops; -+extern const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops; -+extern const struct sysfs_ops bch2_fs_time_stats_sysfs_ops; -+extern const struct sysfs_ops bch2_dev_sysfs_ops; -+ -+int bch2_opts_create_sysfs_files(struct kobject *); -+ -+#else -+ -+static struct attribute *bch2_fs_files[] = {}; -+static struct attribute *bch2_fs_counters_files[] = {}; -+static struct attribute *bch2_fs_internal_files[] = {}; -+static struct attribute *bch2_fs_opts_dir_files[] = {}; -+static struct attribute *bch2_fs_time_stats_files[] = {}; -+static struct attribute *bch2_dev_files[] = {}; -+ -+static const struct sysfs_ops bch2_fs_sysfs_ops; -+static const struct sysfs_ops bch2_fs_counters_sysfs_ops; -+static const struct sysfs_ops bch2_fs_internal_sysfs_ops; -+static const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops; -+static const struct sysfs_ops bch2_fs_time_stats_sysfs_ops; -+static const struct sysfs_ops bch2_dev_sysfs_ops; -+ -+static inline int bch2_opts_create_sysfs_files(struct kobject *kobj) { return 0; } -+ -+#endif /* NO_BCACHEFS_SYSFS */ -+ -+#endif /* _BCACHEFS_SYSFS_H_ */ -diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c -new file mode 100644 -index 000000000000..2fc9e60c754b ---- /dev/null -+++ b/fs/bcachefs/tests.c -@@ -0,0 +1,919 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#ifdef CONFIG_BCACHEFS_TESTS -+ -+#include "bcachefs.h" -+#include "btree_update.h" -+#include "journal_reclaim.h" -+#include "snapshot.h" -+#include "tests.h" -+ -+#include "linux/kthread.h" -+#include "linux/random.h" -+ -+static void delete_test_keys(struct bch_fs *c) -+{ -+ int ret; -+ -+ ret = bch2_btree_delete_range(c, BTREE_ID_extents, -+ SPOS(0, 0, U32_MAX), -+ POS(0, U64_MAX), -+ 0, NULL); -+ BUG_ON(ret); -+ -+ ret = bch2_btree_delete_range(c, BTREE_ID_xattrs, -+ SPOS(0, 0, U32_MAX), -+ POS(0, U64_MAX), -+ 0, NULL); -+ BUG_ON(ret); -+} -+ -+/* unit tests */ -+ -+static int test_delete(struct bch_fs *c, u64 nr) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_i_cookie k; -+ int ret; -+ -+ bkey_cookie_init(&k.k_i); -+ k.k.p.snapshot = U32_MAX; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, k.k.p, -+ BTREE_ITER_INTENT); -+ -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_btree_iter_traverse(&iter) ?: -+ bch2_trans_update(trans, &iter, &k.k_i, 0)); -+ bch_err_msg(c, ret, "update error"); -+ if (ret) -+ goto err; -+ -+ pr_info("deleting once"); -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_btree_iter_traverse(&iter) ?: -+ bch2_btree_delete_at(trans, &iter, 0)); -+ bch_err_msg(c, ret, "delete error (first)"); -+ if (ret) -+ goto err; -+ -+ pr_info("deleting twice"); -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_btree_iter_traverse(&iter) ?: -+ bch2_btree_delete_at(trans, &iter, 0)); -+ bch_err_msg(c, ret, "delete error (second)"); -+ if (ret) -+ goto err; -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static int test_delete_written(struct bch_fs *c, u64 nr) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_i_cookie k; -+ int ret; -+ -+ bkey_cookie_init(&k.k_i); -+ k.k.p.snapshot = U32_MAX; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, k.k.p, -+ BTREE_ITER_INTENT); -+ -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_btree_iter_traverse(&iter) ?: -+ bch2_trans_update(trans, &iter, &k.k_i, 0)); -+ bch_err_msg(c, ret, "update error"); -+ if (ret) -+ goto err; -+ -+ bch2_trans_unlock(trans); -+ bch2_journal_flush_all_pins(&c->journal); -+ -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_btree_iter_traverse(&iter) ?: -+ bch2_btree_delete_at(trans, &iter, 0)); -+ bch_err_msg(c, ret, "delete error"); -+ if (ret) -+ goto err; -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static int test_iterate(struct bch_fs *c, u64 nr) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter = { NULL }; -+ struct bkey_s_c k; -+ u64 i; -+ int ret = 0; -+ -+ delete_test_keys(c); -+ -+ pr_info("inserting test keys"); -+ -+ for (i = 0; i < nr; i++) { -+ struct bkey_i_cookie ck; -+ -+ bkey_cookie_init(&ck.k_i); -+ ck.k.p.offset = i; -+ ck.k.p.snapshot = U32_MAX; -+ -+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &ck.k_i, NULL, 0); -+ bch_err_msg(c, ret, "insert error"); -+ if (ret) -+ goto err; -+ } -+ -+ pr_info("iterating forwards"); -+ -+ i = 0; -+ -+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs, -+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -+ 0, k, ({ -+ BUG_ON(k.k->p.offset != i++); -+ 0; -+ })); -+ bch_err_msg(c, ret, "error iterating forwards"); -+ if (ret) -+ goto err; -+ -+ BUG_ON(i != nr); -+ -+ pr_info("iterating backwards"); -+ -+ ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs, -+ SPOS(0, U64_MAX, U32_MAX), 0, k, -+ ({ -+ BUG_ON(k.k->p.offset != --i); -+ 0; -+ })); -+ bch_err_msg(c, ret, "error iterating backwards"); -+ if (ret) -+ goto err; -+ -+ BUG_ON(i); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static int test_iterate_extents(struct bch_fs *c, u64 nr) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter = { NULL }; -+ struct bkey_s_c k; -+ u64 i; -+ int ret = 0; -+ -+ delete_test_keys(c); -+ -+ pr_info("inserting test extents"); -+ -+ for (i = 0; i < nr; i += 8) { -+ struct bkey_i_cookie ck; -+ -+ bkey_cookie_init(&ck.k_i); -+ ck.k.p.offset = i + 8; -+ ck.k.p.snapshot = U32_MAX; -+ ck.k.size = 8; -+ -+ ret = bch2_btree_insert(c, BTREE_ID_extents, &ck.k_i, NULL, 0); -+ bch_err_msg(c, ret, "insert error"); -+ if (ret) -+ goto err; -+ } -+ -+ pr_info("iterating forwards"); -+ -+ i = 0; -+ -+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents, -+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -+ 0, k, ({ -+ BUG_ON(bkey_start_offset(k.k) != i); -+ i = k.k->p.offset; -+ 0; -+ })); -+ bch_err_msg(c, ret, "error iterating forwards"); -+ if (ret) -+ goto err; -+ -+ BUG_ON(i != nr); -+ -+ pr_info("iterating backwards"); -+ -+ ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_extents, -+ SPOS(0, U64_MAX, U32_MAX), 0, k, -+ ({ -+ BUG_ON(k.k->p.offset != i); -+ i = bkey_start_offset(k.k); -+ 0; -+ })); -+ bch_err_msg(c, ret, "error iterating backwards"); -+ if (ret) -+ goto err; -+ -+ BUG_ON(i); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static int test_iterate_slots(struct bch_fs *c, u64 nr) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter = { NULL }; -+ struct bkey_s_c k; -+ u64 i; -+ int ret = 0; -+ -+ delete_test_keys(c); -+ -+ pr_info("inserting test keys"); -+ -+ for (i = 0; i < nr; i++) { -+ struct bkey_i_cookie ck; -+ -+ bkey_cookie_init(&ck.k_i); -+ ck.k.p.offset = i * 2; -+ ck.k.p.snapshot = U32_MAX; -+ -+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &ck.k_i, NULL, 0); -+ bch_err_msg(c, ret, "insert error"); -+ if (ret) -+ goto err; -+ } -+ -+ pr_info("iterating forwards"); -+ -+ i = 0; -+ -+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs, -+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -+ 0, k, ({ -+ BUG_ON(k.k->p.offset != i); -+ i += 2; -+ 0; -+ })); -+ bch_err_msg(c, ret, "error iterating forwards"); -+ if (ret) -+ goto err; -+ -+ BUG_ON(i != nr * 2); -+ -+ pr_info("iterating forwards by slots"); -+ -+ i = 0; -+ -+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs, -+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -+ BTREE_ITER_SLOTS, k, ({ -+ if (i >= nr * 2) -+ break; -+ -+ BUG_ON(k.k->p.offset != i); -+ BUG_ON(bkey_deleted(k.k) != (i & 1)); -+ -+ i++; -+ 0; -+ })); -+ if (ret < 0) { -+ bch_err_msg(c, ret, "error iterating forwards by slots"); -+ goto err; -+ } -+ ret = 0; -+err: -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter = { NULL }; -+ struct bkey_s_c k; -+ u64 i; -+ int ret = 0; -+ -+ delete_test_keys(c); -+ -+ pr_info("inserting test keys"); -+ -+ for (i = 0; i < nr; i += 16) { -+ struct bkey_i_cookie ck; -+ -+ bkey_cookie_init(&ck.k_i); -+ ck.k.p.offset = i + 16; -+ ck.k.p.snapshot = U32_MAX; -+ ck.k.size = 8; -+ -+ ret = bch2_btree_insert(c, BTREE_ID_extents, &ck.k_i, NULL, 0); -+ bch_err_msg(c, ret, "insert error"); -+ if (ret) -+ goto err; -+ } -+ -+ pr_info("iterating forwards"); -+ -+ i = 0; -+ -+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents, -+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -+ 0, k, ({ -+ BUG_ON(bkey_start_offset(k.k) != i + 8); -+ BUG_ON(k.k->size != 8); -+ i += 16; -+ 0; -+ })); -+ bch_err_msg(c, ret, "error iterating forwards"); -+ if (ret) -+ goto err; -+ -+ BUG_ON(i != nr); -+ -+ pr_info("iterating forwards by slots"); -+ -+ i = 0; -+ -+ ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents, -+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -+ BTREE_ITER_SLOTS, k, ({ -+ if (i == nr) -+ break; -+ BUG_ON(bkey_deleted(k.k) != !(i % 16)); -+ -+ BUG_ON(bkey_start_offset(k.k) != i); -+ BUG_ON(k.k->size != 8); -+ i = k.k->p.offset; -+ 0; -+ })); -+ bch_err_msg(c, ret, "error iterating forwards by slots"); -+ if (ret) -+ goto err; -+ ret = 0; -+err: -+ bch2_trans_put(trans); -+ return 0; -+} -+ -+/* -+ * XXX: we really want to make sure we've got a btree with depth > 0 for these -+ * tests -+ */ -+static int test_peek_end(struct bch_fs *c, u64 nr) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, -+ SPOS(0, 0, U32_MAX), 0); -+ -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ BUG_ON(k.k); -+ -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ BUG_ON(k.k); -+ -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ return 0; -+} -+ -+static int test_peek_end_extents(struct bch_fs *c, u64 nr) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, -+ SPOS(0, 0, U32_MAX), 0); -+ -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ BUG_ON(k.k); -+ -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ BUG_ON(k.k); -+ -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ return 0; -+} -+ -+/* extent unit tests */ -+ -+static u64 test_version; -+ -+static int insert_test_extent(struct bch_fs *c, -+ u64 start, u64 end) -+{ -+ struct bkey_i_cookie k; -+ int ret; -+ -+ bkey_cookie_init(&k.k_i); -+ k.k_i.k.p.offset = end; -+ k.k_i.k.p.snapshot = U32_MAX; -+ k.k_i.k.size = end - start; -+ k.k_i.k.version.lo = test_version++; -+ -+ ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0); -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int __test_extent_overwrite(struct bch_fs *c, -+ u64 e1_start, u64 e1_end, -+ u64 e2_start, u64 e2_end) -+{ -+ int ret; -+ -+ ret = insert_test_extent(c, e1_start, e1_end) ?: -+ insert_test_extent(c, e2_start, e2_end); -+ -+ delete_test_keys(c); -+ return ret; -+} -+ -+static int test_extent_overwrite_front(struct bch_fs *c, u64 nr) -+{ -+ return __test_extent_overwrite(c, 0, 64, 0, 32) ?: -+ __test_extent_overwrite(c, 8, 64, 0, 32); -+} -+ -+static int test_extent_overwrite_back(struct bch_fs *c, u64 nr) -+{ -+ return __test_extent_overwrite(c, 0, 64, 32, 64) ?: -+ __test_extent_overwrite(c, 0, 64, 32, 72); -+} -+ -+static int test_extent_overwrite_middle(struct bch_fs *c, u64 nr) -+{ -+ return __test_extent_overwrite(c, 0, 64, 32, 40); -+} -+ -+static int test_extent_overwrite_all(struct bch_fs *c, u64 nr) -+{ -+ return __test_extent_overwrite(c, 32, 64, 0, 64) ?: -+ __test_extent_overwrite(c, 32, 64, 0, 128) ?: -+ __test_extent_overwrite(c, 32, 64, 32, 64) ?: -+ __test_extent_overwrite(c, 32, 64, 32, 128); -+} -+ -+static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start, u32 len, u32 snapid) -+{ -+ struct bkey_i_cookie k; -+ int ret; -+ -+ bkey_cookie_init(&k.k_i); -+ k.k_i.k.p.inode = inum; -+ k.k_i.k.p.offset = start + len; -+ k.k_i.k.p.snapshot = snapid; -+ k.k_i.k.size = len; -+ -+ ret = bch2_trans_do(c, NULL, NULL, 0, -+ bch2_btree_insert_nonextent(trans, BTREE_ID_extents, &k.k_i, -+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)); -+ bch_err_fn(c, ret); -+ return ret; -+} -+ -+static int test_extent_create_overlapping(struct bch_fs *c, u64 inum) -+{ -+ return insert_test_overlapping_extent(c, inum, 0, 16, U32_MAX - 2) ?: /* overwrite entire */ -+ insert_test_overlapping_extent(c, inum, 2, 8, U32_MAX - 2) ?: -+ insert_test_overlapping_extent(c, inum, 4, 4, U32_MAX) ?: -+ insert_test_overlapping_extent(c, inum, 32, 8, U32_MAX - 2) ?: /* overwrite front/back */ -+ insert_test_overlapping_extent(c, inum, 36, 8, U32_MAX) ?: -+ insert_test_overlapping_extent(c, inum, 60, 8, U32_MAX - 2) ?: -+ insert_test_overlapping_extent(c, inum, 64, 8, U32_MAX); -+} -+ -+/* snapshot unit tests */ -+ -+/* Test skipping over keys in unrelated snapshots: */ -+static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) -+{ -+ struct btree_trans *trans; -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bkey_i_cookie cookie; -+ int ret; -+ -+ bkey_cookie_init(&cookie.k_i); -+ cookie.k.p.snapshot = snapid_hi; -+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0); -+ if (ret) -+ return ret; -+ -+ trans = bch2_trans_get(c); -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, -+ SPOS(0, 0, snapid_lo), 0); -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); -+ -+ BUG_ON(k.k->p.snapshot != U32_MAX); -+ -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static int test_snapshots(struct bch_fs *c, u64 nr) -+{ -+ struct bkey_i_cookie cookie; -+ u32 snapids[2]; -+ u32 snapid_subvols[2] = { 1, 1 }; -+ int ret; -+ -+ bkey_cookie_init(&cookie.k_i); -+ cookie.k.p.snapshot = U32_MAX; -+ ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0); -+ if (ret) -+ return ret; -+ -+ ret = bch2_trans_do(c, NULL, NULL, 0, -+ bch2_snapshot_node_create(trans, U32_MAX, -+ snapids, -+ snapid_subvols, -+ 2)); -+ if (ret) -+ return ret; -+ -+ if (snapids[0] > snapids[1]) -+ swap(snapids[0], snapids[1]); -+ -+ ret = test_snapshot_filter(c, snapids[0], snapids[1]); -+ bch_err_msg(c, ret, "from test_snapshot_filter"); -+ return ret; -+} -+ -+/* perf tests */ -+ -+static u64 test_rand(void) -+{ -+ u64 v; -+ -+ get_random_bytes(&v, sizeof(v)); -+ return v; -+} -+ -+static int rand_insert(struct bch_fs *c, u64 nr) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct bkey_i_cookie k; -+ int ret = 0; -+ u64 i; -+ -+ for (i = 0; i < nr; i++) { -+ bkey_cookie_init(&k.k_i); -+ k.k.p.offset = test_rand(); -+ k.k.p.snapshot = U32_MAX; -+ -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k.k_i, 0)); -+ if (ret) -+ break; -+ } -+ -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static int rand_insert_multi(struct bch_fs *c, u64 nr) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct bkey_i_cookie k[8]; -+ int ret = 0; -+ unsigned j; -+ u64 i; -+ -+ for (i = 0; i < nr; i += ARRAY_SIZE(k)) { -+ for (j = 0; j < ARRAY_SIZE(k); j++) { -+ bkey_cookie_init(&k[j].k_i); -+ k[j].k.p.offset = test_rand(); -+ k[j].k.p.snapshot = U32_MAX; -+ } -+ -+ ret = commit_do(trans, NULL, NULL, 0, -+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[0].k_i, 0) ?: -+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[1].k_i, 0) ?: -+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[2].k_i, 0) ?: -+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[3].k_i, 0) ?: -+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[4].k_i, 0) ?: -+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[5].k_i, 0) ?: -+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[6].k_i, 0) ?: -+ bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[7].k_i, 0)); -+ if (ret) -+ break; -+ } -+ -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static int rand_lookup(struct bch_fs *c, u64 nr) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret = 0; -+ u64 i; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, -+ SPOS(0, 0, U32_MAX), 0); -+ -+ for (i = 0; i < nr; i++) { -+ bch2_btree_iter_set_pos(&iter, SPOS(0, test_rand(), U32_MAX)); -+ -+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(&iter))); -+ ret = bkey_err(k); -+ if (ret) -+ break; -+ } -+ -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static int rand_mixed_trans(struct btree_trans *trans, -+ struct btree_iter *iter, -+ struct bkey_i_cookie *cookie, -+ u64 i, u64 pos) -+{ -+ struct bkey_s_c k; -+ int ret; -+ -+ bch2_btree_iter_set_pos(iter, SPOS(0, pos, U32_MAX)); -+ -+ k = bch2_btree_iter_peek(iter); -+ ret = bkey_err(k); -+ bch_err_msg(trans->c, ret, "lookup error"); -+ if (ret) -+ return ret; -+ -+ if (!(i & 3) && k.k) { -+ bkey_cookie_init(&cookie->k_i); -+ cookie->k.p = iter->pos; -+ ret = bch2_trans_update(trans, iter, &cookie->k_i, 0); -+ } -+ -+ return ret; -+} -+ -+static int rand_mixed(struct bch_fs *c, u64 nr) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_i_cookie cookie; -+ int ret = 0; -+ u64 i, rand; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, -+ SPOS(0, 0, U32_MAX), 0); -+ -+ for (i = 0; i < nr; i++) { -+ rand = test_rand(); -+ ret = commit_do(trans, NULL, NULL, 0, -+ rand_mixed_trans(trans, &iter, &cookie, i, rand)); -+ if (ret) -+ break; -+ } -+ -+ bch2_trans_iter_exit(trans, &iter); -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static int __do_delete(struct btree_trans *trans, struct bpos pos) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ int ret = 0; -+ -+ bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos, -+ BTREE_ITER_INTENT); -+ k = bch2_btree_iter_peek(&iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err; -+ -+ if (!k.k) -+ goto err; -+ -+ ret = bch2_btree_delete_at(trans, &iter, 0); -+err: -+ bch2_trans_iter_exit(trans, &iter); -+ return ret; -+} -+ -+static int rand_delete(struct bch_fs *c, u64 nr) -+{ -+ struct btree_trans *trans = bch2_trans_get(c); -+ int ret = 0; -+ u64 i; -+ -+ for (i = 0; i < nr; i++) { -+ struct bpos pos = SPOS(0, test_rand(), U32_MAX); -+ -+ ret = commit_do(trans, NULL, NULL, 0, -+ __do_delete(trans, pos)); -+ if (ret) -+ break; -+ } -+ -+ bch2_trans_put(trans); -+ return ret; -+} -+ -+static int seq_insert(struct bch_fs *c, u64 nr) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct bkey_i_cookie insert; -+ -+ bkey_cookie_init(&insert.k_i); -+ -+ return bch2_trans_run(c, -+ for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, -+ SPOS(0, 0, U32_MAX), -+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, -+ NULL, NULL, 0, ({ -+ if (iter.pos.offset >= nr) -+ break; -+ insert.k.p = iter.pos; -+ bch2_trans_update(trans, &iter, &insert.k_i, 0); -+ }))); -+} -+ -+static int seq_lookup(struct bch_fs *c, u64 nr) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ -+ return bch2_trans_run(c, -+ for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs, -+ SPOS(0, 0, U32_MAX), POS(0, U64_MAX), -+ 0, k, -+ 0)); -+} -+ -+static int seq_overwrite(struct bch_fs *c, u64 nr) -+{ -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ -+ return bch2_trans_run(c, -+ for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, -+ SPOS(0, 0, U32_MAX), -+ BTREE_ITER_INTENT, k, -+ NULL, NULL, 0, ({ -+ struct bkey_i_cookie u; -+ -+ bkey_reassemble(&u.k_i, k); -+ bch2_trans_update(trans, &iter, &u.k_i, 0); -+ }))); -+} -+ -+static int seq_delete(struct bch_fs *c, u64 nr) -+{ -+ return bch2_btree_delete_range(c, BTREE_ID_xattrs, -+ SPOS(0, 0, U32_MAX), -+ POS(0, U64_MAX), -+ 0, NULL); -+} -+ -+typedef int (*perf_test_fn)(struct bch_fs *, u64); -+ -+struct test_job { -+ struct bch_fs *c; -+ u64 nr; -+ unsigned nr_threads; -+ perf_test_fn fn; -+ -+ atomic_t ready; -+ wait_queue_head_t ready_wait; -+ -+ atomic_t done; -+ struct completion done_completion; -+ -+ u64 start; -+ u64 finish; -+ int ret; -+}; -+ -+static int btree_perf_test_thread(void *data) -+{ -+ struct test_job *j = data; -+ int ret; -+ -+ if (atomic_dec_and_test(&j->ready)) { -+ wake_up(&j->ready_wait); -+ j->start = sched_clock(); -+ } else { -+ wait_event(j->ready_wait, !atomic_read(&j->ready)); -+ } -+ -+ ret = j->fn(j->c, div64_u64(j->nr, j->nr_threads)); -+ if (ret) { -+ bch_err(j->c, "%ps: error %s", j->fn, bch2_err_str(ret)); -+ j->ret = ret; -+ } -+ -+ if (atomic_dec_and_test(&j->done)) { -+ j->finish = sched_clock(); -+ complete(&j->done_completion); -+ } -+ -+ return 0; -+} -+ -+int bch2_btree_perf_test(struct bch_fs *c, const char *testname, -+ u64 nr, unsigned nr_threads) -+{ -+ struct test_job j = { .c = c, .nr = nr, .nr_threads = nr_threads }; -+ char name_buf[20]; -+ struct printbuf nr_buf = PRINTBUF; -+ struct printbuf per_sec_buf = PRINTBUF; -+ unsigned i; -+ u64 time; -+ -+ atomic_set(&j.ready, nr_threads); -+ init_waitqueue_head(&j.ready_wait); -+ -+ atomic_set(&j.done, nr_threads); -+ init_completion(&j.done_completion); -+ -+#define perf_test(_test) \ -+ if (!strcmp(testname, #_test)) j.fn = _test -+ -+ perf_test(rand_insert); -+ perf_test(rand_insert_multi); -+ perf_test(rand_lookup); -+ perf_test(rand_mixed); -+ perf_test(rand_delete); -+ -+ perf_test(seq_insert); -+ perf_test(seq_lookup); -+ perf_test(seq_overwrite); -+ perf_test(seq_delete); -+ -+ /* a unit test, not a perf test: */ -+ perf_test(test_delete); -+ perf_test(test_delete_written); -+ perf_test(test_iterate); -+ perf_test(test_iterate_extents); -+ perf_test(test_iterate_slots); -+ perf_test(test_iterate_slots_extents); -+ perf_test(test_peek_end); -+ perf_test(test_peek_end_extents); -+ -+ perf_test(test_extent_overwrite_front); -+ perf_test(test_extent_overwrite_back); -+ perf_test(test_extent_overwrite_middle); -+ perf_test(test_extent_overwrite_all); -+ perf_test(test_extent_create_overlapping); -+ -+ perf_test(test_snapshots); -+ -+ if (!j.fn) { -+ pr_err("unknown test %s", testname); -+ return -EINVAL; -+ } -+ -+ //pr_info("running test %s:", testname); -+ -+ if (nr_threads == 1) -+ btree_perf_test_thread(&j); -+ else -+ for (i = 0; i < nr_threads; i++) -+ kthread_run(btree_perf_test_thread, &j, -+ "bcachefs perf test[%u]", i); -+ -+ while (wait_for_completion_interruptible(&j.done_completion)) -+ ; -+ -+ time = j.finish - j.start; -+ -+ scnprintf(name_buf, sizeof(name_buf), "%s:", testname); -+ prt_human_readable_u64(&nr_buf, nr); -+ prt_human_readable_u64(&per_sec_buf, div64_u64(nr * NSEC_PER_SEC, time)); -+ printk(KERN_INFO "%-12s %s with %u threads in %5llu sec, %5llu nsec per iter, %5s per sec\n", -+ name_buf, nr_buf.buf, nr_threads, -+ div_u64(time, NSEC_PER_SEC), -+ div_u64(time * nr_threads, nr), -+ per_sec_buf.buf); -+ printbuf_exit(&per_sec_buf); -+ printbuf_exit(&nr_buf); -+ return j.ret; -+} -+ -+#endif /* CONFIG_BCACHEFS_TESTS */ -diff --git a/fs/bcachefs/tests.h b/fs/bcachefs/tests.h -new file mode 100644 -index 000000000000..c73b18aea7e0 ---- /dev/null -+++ b/fs/bcachefs/tests.h -@@ -0,0 +1,15 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_TEST_H -+#define _BCACHEFS_TEST_H -+ -+struct bch_fs; -+ -+#ifdef CONFIG_BCACHEFS_TESTS -+ -+int bch2_btree_perf_test(struct bch_fs *, const char *, u64, unsigned); -+ -+#else -+ -+#endif /* CONFIG_BCACHEFS_TESTS */ -+ -+#endif /* _BCACHEFS_TEST_H */ -diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c -new file mode 100644 -index 000000000000..dc48b52b01b4 ---- /dev/null -+++ b/fs/bcachefs/trace.c -@@ -0,0 +1,17 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include "bcachefs.h" -+#include "alloc_types.h" -+#include "buckets.h" -+#include "btree_cache.h" -+#include "btree_iter.h" -+#include "btree_locking.h" -+#include "btree_update_interior.h" -+#include "keylist.h" -+#include "move_types.h" -+#include "opts.h" -+#include "six.h" -+ -+#include -+ -+#define CREATE_TRACE_POINTS -+#include "trace.h" -diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h -new file mode 100644 -index 000000000000..893304a1f06e ---- /dev/null -+++ b/fs/bcachefs/trace.h -@@ -0,0 +1,1334 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#undef TRACE_SYSTEM -+#define TRACE_SYSTEM bcachefs -+ -+#if !defined(_TRACE_BCACHEFS_H) || defined(TRACE_HEADER_MULTI_READ) -+#define _TRACE_BCACHEFS_H -+ -+#include -+ -+#define TRACE_BPOS_entries(name) \ -+ __field(u64, name##_inode ) \ -+ __field(u64, name##_offset ) \ -+ __field(u32, name##_snapshot ) -+ -+#define TRACE_BPOS_assign(dst, src) \ -+ __entry->dst##_inode = (src).inode; \ -+ __entry->dst##_offset = (src).offset; \ -+ __entry->dst##_snapshot = (src).snapshot -+ -+DECLARE_EVENT_CLASS(bpos, -+ TP_PROTO(const struct bpos *p), -+ TP_ARGS(p), -+ -+ TP_STRUCT__entry( -+ TRACE_BPOS_entries(p) -+ ), -+ -+ TP_fast_assign( -+ TRACE_BPOS_assign(p, *p); -+ ), -+ -+ TP_printk("%llu:%llu:%u", __entry->p_inode, __entry->p_offset, __entry->p_snapshot) -+); -+ -+DECLARE_EVENT_CLASS(bkey, -+ TP_PROTO(struct bch_fs *c, const char *k), -+ TP_ARGS(c, k), -+ -+ TP_STRUCT__entry( -+ __string(k, k ) -+ ), -+ -+ TP_fast_assign( -+ __assign_str(k, k); -+ ), -+ -+ TP_printk("%s", __get_str(k)) -+); -+ -+DECLARE_EVENT_CLASS(btree_node, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(u8, level ) -+ __field(u8, btree_id ) -+ TRACE_BPOS_entries(pos) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->level = b->c.level; -+ __entry->btree_id = b->c.btree_id; -+ TRACE_BPOS_assign(pos, b->key.k.p); -+ ), -+ -+ TP_printk("%d,%d %u %s %llu:%llu:%u", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->level, -+ bch2_btree_id_str(__entry->btree_id), -+ __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot) -+); -+ -+DECLARE_EVENT_CLASS(bch_fs, -+ TP_PROTO(struct bch_fs *c), -+ TP_ARGS(c), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ ), -+ -+ TP_printk("%d,%d", MAJOR(__entry->dev), MINOR(__entry->dev)) -+); -+ -+DECLARE_EVENT_CLASS(bio, -+ TP_PROTO(struct bio *bio), -+ TP_ARGS(bio), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(sector_t, sector ) -+ __field(unsigned int, nr_sector ) -+ __array(char, rwbs, 6 ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = bio->bi_bdev ? bio_dev(bio) : 0; -+ __entry->sector = bio->bi_iter.bi_sector; -+ __entry->nr_sector = bio->bi_iter.bi_size >> 9; -+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf); -+ ), -+ -+ TP_printk("%d,%d %s %llu + %u", -+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, -+ (unsigned long long)__entry->sector, __entry->nr_sector) -+); -+ -+/* super-io.c: */ -+TRACE_EVENT(write_super, -+ TP_PROTO(struct bch_fs *c, unsigned long ip), -+ TP_ARGS(c, ip), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(unsigned long, ip ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->ip = ip; -+ ), -+ -+ TP_printk("%d,%d for %pS", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ (void *) __entry->ip) -+); -+ -+/* io.c: */ -+ -+DEFINE_EVENT(bio, read_promote, -+ TP_PROTO(struct bio *bio), -+ TP_ARGS(bio) -+); -+ -+TRACE_EVENT(read_nopromote, -+ TP_PROTO(struct bch_fs *c, int ret), -+ TP_ARGS(c, ret), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __array(char, ret, 32 ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ strscpy(__entry->ret, bch2_err_str(ret), sizeof(__entry->ret)); -+ ), -+ -+ TP_printk("%d,%d ret %s", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->ret) -+); -+ -+DEFINE_EVENT(bio, read_bounce, -+ TP_PROTO(struct bio *bio), -+ TP_ARGS(bio) -+); -+ -+DEFINE_EVENT(bio, read_split, -+ TP_PROTO(struct bio *bio), -+ TP_ARGS(bio) -+); -+ -+DEFINE_EVENT(bio, read_retry, -+ TP_PROTO(struct bio *bio), -+ TP_ARGS(bio) -+); -+ -+DEFINE_EVENT(bio, read_reuse_race, -+ TP_PROTO(struct bio *bio), -+ TP_ARGS(bio) -+); -+ -+/* Journal */ -+ -+DEFINE_EVENT(bch_fs, journal_full, -+ TP_PROTO(struct bch_fs *c), -+ TP_ARGS(c) -+); -+ -+DEFINE_EVENT(bch_fs, journal_entry_full, -+ TP_PROTO(struct bch_fs *c), -+ TP_ARGS(c) -+); -+ -+DEFINE_EVENT(bio, journal_write, -+ TP_PROTO(struct bio *bio), -+ TP_ARGS(bio) -+); -+ -+TRACE_EVENT(journal_reclaim_start, -+ TP_PROTO(struct bch_fs *c, bool direct, bool kicked, -+ u64 min_nr, u64 min_key_cache, -+ u64 prereserved, u64 prereserved_total, -+ u64 btree_cache_dirty, u64 btree_cache_total, -+ u64 btree_key_cache_dirty, u64 btree_key_cache_total), -+ TP_ARGS(c, direct, kicked, min_nr, min_key_cache, prereserved, prereserved_total, -+ btree_cache_dirty, btree_cache_total, -+ btree_key_cache_dirty, btree_key_cache_total), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(bool, direct ) -+ __field(bool, kicked ) -+ __field(u64, min_nr ) -+ __field(u64, min_key_cache ) -+ __field(u64, prereserved ) -+ __field(u64, prereserved_total ) -+ __field(u64, btree_cache_dirty ) -+ __field(u64, btree_cache_total ) -+ __field(u64, btree_key_cache_dirty ) -+ __field(u64, btree_key_cache_total ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->direct = direct; -+ __entry->kicked = kicked; -+ __entry->min_nr = min_nr; -+ __entry->min_key_cache = min_key_cache; -+ __entry->prereserved = prereserved; -+ __entry->prereserved_total = prereserved_total; -+ __entry->btree_cache_dirty = btree_cache_dirty; -+ __entry->btree_cache_total = btree_cache_total; -+ __entry->btree_key_cache_dirty = btree_key_cache_dirty; -+ __entry->btree_key_cache_total = btree_key_cache_total; -+ ), -+ -+ TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->direct, -+ __entry->kicked, -+ __entry->min_nr, -+ __entry->min_key_cache, -+ __entry->prereserved, -+ __entry->prereserved_total, -+ __entry->btree_cache_dirty, -+ __entry->btree_cache_total, -+ __entry->btree_key_cache_dirty, -+ __entry->btree_key_cache_total) -+); -+ -+TRACE_EVENT(journal_reclaim_finish, -+ TP_PROTO(struct bch_fs *c, u64 nr_flushed), -+ TP_ARGS(c, nr_flushed), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(u64, nr_flushed ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->nr_flushed = nr_flushed; -+ ), -+ -+ TP_printk("%d,%d flushed %llu", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->nr_flushed) -+); -+ -+/* bset.c: */ -+ -+DEFINE_EVENT(bpos, bkey_pack_pos_fail, -+ TP_PROTO(const struct bpos *p), -+ TP_ARGS(p) -+); -+ -+/* Btree cache: */ -+ -+TRACE_EVENT(btree_cache_scan, -+ TP_PROTO(long nr_to_scan, long can_free, long ret), -+ TP_ARGS(nr_to_scan, can_free, ret), -+ -+ TP_STRUCT__entry( -+ __field(long, nr_to_scan ) -+ __field(long, can_free ) -+ __field(long, ret ) -+ ), -+ -+ TP_fast_assign( -+ __entry->nr_to_scan = nr_to_scan; -+ __entry->can_free = can_free; -+ __entry->ret = ret; -+ ), -+ -+ TP_printk("scanned for %li nodes, can free %li, ret %li", -+ __entry->nr_to_scan, __entry->can_free, __entry->ret) -+); -+ -+DEFINE_EVENT(btree_node, btree_cache_reap, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+DEFINE_EVENT(bch_fs, btree_cache_cannibalize_lock_fail, -+ TP_PROTO(struct bch_fs *c), -+ TP_ARGS(c) -+); -+ -+DEFINE_EVENT(bch_fs, btree_cache_cannibalize_lock, -+ TP_PROTO(struct bch_fs *c), -+ TP_ARGS(c) -+); -+ -+DEFINE_EVENT(bch_fs, btree_cache_cannibalize, -+ TP_PROTO(struct bch_fs *c), -+ TP_ARGS(c) -+); -+ -+DEFINE_EVENT(bch_fs, btree_cache_cannibalize_unlock, -+ TP_PROTO(struct bch_fs *c), -+ TP_ARGS(c) -+); -+ -+/* Btree */ -+ -+DEFINE_EVENT(btree_node, btree_node_read, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+TRACE_EVENT(btree_node_write, -+ TP_PROTO(struct btree *b, unsigned bytes, unsigned sectors), -+ TP_ARGS(b, bytes, sectors), -+ -+ TP_STRUCT__entry( -+ __field(enum btree_node_type, type) -+ __field(unsigned, bytes ) -+ __field(unsigned, sectors ) -+ ), -+ -+ TP_fast_assign( -+ __entry->type = btree_node_type(b); -+ __entry->bytes = bytes; -+ __entry->sectors = sectors; -+ ), -+ -+ TP_printk("bkey type %u bytes %u sectors %u", -+ __entry->type , __entry->bytes, __entry->sectors) -+); -+ -+DEFINE_EVENT(btree_node, btree_node_alloc, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+DEFINE_EVENT(btree_node, btree_node_free, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+TRACE_EVENT(btree_reserve_get_fail, -+ TP_PROTO(const char *trans_fn, -+ unsigned long caller_ip, -+ size_t required, -+ int ret), -+ TP_ARGS(trans_fn, caller_ip, required, ret), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __field(unsigned long, caller_ip ) -+ __field(size_t, required ) -+ __array(char, ret, 32 ) -+ ), -+ -+ TP_fast_assign( -+ strscpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); -+ __entry->caller_ip = caller_ip; -+ __entry->required = required; -+ strscpy(__entry->ret, bch2_err_str(ret), sizeof(__entry->ret)); -+ ), -+ -+ TP_printk("%s %pS required %zu ret %s", -+ __entry->trans_fn, -+ (void *) __entry->caller_ip, -+ __entry->required, -+ __entry->ret) -+); -+ -+DEFINE_EVENT(btree_node, btree_node_compact, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+DEFINE_EVENT(btree_node, btree_node_merge, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+DEFINE_EVENT(btree_node, btree_node_split, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+DEFINE_EVENT(btree_node, btree_node_rewrite, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+DEFINE_EVENT(btree_node, btree_node_set_root, -+ TP_PROTO(struct bch_fs *c, struct btree *b), -+ TP_ARGS(c, b) -+); -+ -+TRACE_EVENT(btree_path_relock_fail, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path, -+ unsigned level), -+ TP_ARGS(trans, caller_ip, path, level), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __field(unsigned long, caller_ip ) -+ __field(u8, btree_id ) -+ __field(u8, level ) -+ TRACE_BPOS_entries(pos) -+ __array(char, node, 24 ) -+ __field(u8, self_read_count ) -+ __field(u8, self_intent_count) -+ __field(u8, read_count ) -+ __field(u8, intent_count ) -+ __field(u32, iter_lock_seq ) -+ __field(u32, node_lock_seq ) -+ ), -+ -+ TP_fast_assign( -+ struct btree *b = btree_path_node(path, level); -+ struct six_lock_count c; -+ -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ __entry->caller_ip = caller_ip; -+ __entry->btree_id = path->btree_id; -+ __entry->level = path->level; -+ TRACE_BPOS_assign(pos, path->pos); -+ -+ c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level), -+ __entry->self_read_count = c.n[SIX_LOCK_read]; -+ __entry->self_intent_count = c.n[SIX_LOCK_intent]; -+ -+ if (IS_ERR(b)) { -+ strscpy(__entry->node, bch2_err_str(PTR_ERR(b)), sizeof(__entry->node)); -+ } else { -+ c = six_lock_counts(&path->l[level].b->c.lock); -+ __entry->read_count = c.n[SIX_LOCK_read]; -+ __entry->intent_count = c.n[SIX_LOCK_intent]; -+ scnprintf(__entry->node, sizeof(__entry->node), "%px", b); -+ } -+ __entry->iter_lock_seq = path->l[level].lock_seq; -+ __entry->node_lock_seq = is_btree_node(path, level) -+ ? six_lock_seq(&path->l[level].b->c.lock) -+ : 0; -+ ), -+ -+ TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u node %s held %u:%u lock count %u:%u iter seq %u lock seq %u", -+ __entry->trans_fn, -+ (void *) __entry->caller_ip, -+ bch2_btree_id_str(__entry->btree_id), -+ __entry->pos_inode, -+ __entry->pos_offset, -+ __entry->pos_snapshot, -+ __entry->level, -+ __entry->node, -+ __entry->self_read_count, -+ __entry->self_intent_count, -+ __entry->read_count, -+ __entry->intent_count, -+ __entry->iter_lock_seq, -+ __entry->node_lock_seq) -+); -+ -+TRACE_EVENT(btree_path_upgrade_fail, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path, -+ unsigned level), -+ TP_ARGS(trans, caller_ip, path, level), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __field(unsigned long, caller_ip ) -+ __field(u8, btree_id ) -+ __field(u8, level ) -+ TRACE_BPOS_entries(pos) -+ __field(u8, locked ) -+ __field(u8, self_read_count ) -+ __field(u8, self_intent_count) -+ __field(u8, read_count ) -+ __field(u8, intent_count ) -+ __field(u32, iter_lock_seq ) -+ __field(u32, node_lock_seq ) -+ ), -+ -+ TP_fast_assign( -+ struct six_lock_count c; -+ -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ __entry->caller_ip = caller_ip; -+ __entry->btree_id = path->btree_id; -+ __entry->level = level; -+ TRACE_BPOS_assign(pos, path->pos); -+ __entry->locked = btree_node_locked(path, level); -+ -+ c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level), -+ __entry->self_read_count = c.n[SIX_LOCK_read]; -+ __entry->self_intent_count = c.n[SIX_LOCK_intent]; -+ c = six_lock_counts(&path->l[level].b->c.lock); -+ __entry->read_count = c.n[SIX_LOCK_read]; -+ __entry->intent_count = c.n[SIX_LOCK_intent]; -+ __entry->iter_lock_seq = path->l[level].lock_seq; -+ __entry->node_lock_seq = is_btree_node(path, level) -+ ? six_lock_seq(&path->l[level].b->c.lock) -+ : 0; -+ ), -+ -+ TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u locked %u held %u:%u lock count %u:%u iter seq %u lock seq %u", -+ __entry->trans_fn, -+ (void *) __entry->caller_ip, -+ bch2_btree_id_str(__entry->btree_id), -+ __entry->pos_inode, -+ __entry->pos_offset, -+ __entry->pos_snapshot, -+ __entry->level, -+ __entry->locked, -+ __entry->self_read_count, -+ __entry->self_intent_count, -+ __entry->read_count, -+ __entry->intent_count, -+ __entry->iter_lock_seq, -+ __entry->node_lock_seq) -+); -+ -+/* Garbage collection */ -+ -+DEFINE_EVENT(bch_fs, gc_gens_start, -+ TP_PROTO(struct bch_fs *c), -+ TP_ARGS(c) -+); -+ -+DEFINE_EVENT(bch_fs, gc_gens_end, -+ TP_PROTO(struct bch_fs *c), -+ TP_ARGS(c) -+); -+ -+/* Allocator */ -+ -+DECLARE_EVENT_CLASS(bucket_alloc, -+ TP_PROTO(struct bch_dev *ca, const char *alloc_reserve, -+ u64 bucket, -+ u64 free, -+ u64 avail, -+ u64 copygc_wait_amount, -+ s64 copygc_waiting_for, -+ struct bucket_alloc_state *s, -+ bool nonblocking, -+ const char *err), -+ TP_ARGS(ca, alloc_reserve, bucket, free, avail, -+ copygc_wait_amount, copygc_waiting_for, -+ s, nonblocking, err), -+ -+ TP_STRUCT__entry( -+ __field(u8, dev ) -+ __array(char, reserve, 16 ) -+ __field(u64, bucket ) -+ __field(u64, free ) -+ __field(u64, avail ) -+ __field(u64, copygc_wait_amount ) -+ __field(s64, copygc_waiting_for ) -+ __field(u64, seen ) -+ __field(u64, open ) -+ __field(u64, need_journal_commit ) -+ __field(u64, nouse ) -+ __field(bool, nonblocking ) -+ __field(u64, nocow ) -+ __array(char, err, 32 ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = ca->dev_idx; -+ strscpy(__entry->reserve, alloc_reserve, sizeof(__entry->reserve)); -+ __entry->bucket = bucket; -+ __entry->free = free; -+ __entry->avail = avail; -+ __entry->copygc_wait_amount = copygc_wait_amount; -+ __entry->copygc_waiting_for = copygc_waiting_for; -+ __entry->seen = s->buckets_seen; -+ __entry->open = s->skipped_open; -+ __entry->need_journal_commit = s->skipped_need_journal_commit; -+ __entry->nouse = s->skipped_nouse; -+ __entry->nonblocking = nonblocking; -+ __entry->nocow = s->skipped_nocow; -+ strscpy(__entry->err, err, sizeof(__entry->err)); -+ ), -+ -+ TP_printk("reserve %s bucket %u:%llu free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nocow %llu nonblocking %u err %s", -+ __entry->reserve, -+ __entry->dev, -+ __entry->bucket, -+ __entry->free, -+ __entry->avail, -+ __entry->copygc_wait_amount, -+ __entry->copygc_waiting_for, -+ __entry->seen, -+ __entry->open, -+ __entry->need_journal_commit, -+ __entry->nouse, -+ __entry->nocow, -+ __entry->nonblocking, -+ __entry->err) -+); -+ -+DEFINE_EVENT(bucket_alloc, bucket_alloc, -+ TP_PROTO(struct bch_dev *ca, const char *alloc_reserve, -+ u64 bucket, -+ u64 free, -+ u64 avail, -+ u64 copygc_wait_amount, -+ s64 copygc_waiting_for, -+ struct bucket_alloc_state *s, -+ bool nonblocking, -+ const char *err), -+ TP_ARGS(ca, alloc_reserve, bucket, free, avail, -+ copygc_wait_amount, copygc_waiting_for, -+ s, nonblocking, err) -+); -+ -+DEFINE_EVENT(bucket_alloc, bucket_alloc_fail, -+ TP_PROTO(struct bch_dev *ca, const char *alloc_reserve, -+ u64 bucket, -+ u64 free, -+ u64 avail, -+ u64 copygc_wait_amount, -+ s64 copygc_waiting_for, -+ struct bucket_alloc_state *s, -+ bool nonblocking, -+ const char *err), -+ TP_ARGS(ca, alloc_reserve, bucket, free, avail, -+ copygc_wait_amount, copygc_waiting_for, -+ s, nonblocking, err) -+); -+ -+TRACE_EVENT(discard_buckets, -+ TP_PROTO(struct bch_fs *c, u64 seen, u64 open, -+ u64 need_journal_commit, u64 discarded, const char *err), -+ TP_ARGS(c, seen, open, need_journal_commit, discarded, err), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(u64, seen ) -+ __field(u64, open ) -+ __field(u64, need_journal_commit ) -+ __field(u64, discarded ) -+ __array(char, err, 16 ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->seen = seen; -+ __entry->open = open; -+ __entry->need_journal_commit = need_journal_commit; -+ __entry->discarded = discarded; -+ strscpy(__entry->err, err, sizeof(__entry->err)); -+ ), -+ -+ TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu err %s", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->seen, -+ __entry->open, -+ __entry->need_journal_commit, -+ __entry->discarded, -+ __entry->err) -+); -+ -+TRACE_EVENT(bucket_invalidate, -+ TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket, u32 sectors), -+ TP_ARGS(c, dev, bucket, sectors), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(u32, dev_idx ) -+ __field(u32, sectors ) -+ __field(u64, bucket ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->dev_idx = dev; -+ __entry->sectors = sectors; -+ __entry->bucket = bucket; -+ ), -+ -+ TP_printk("%d:%d invalidated %u:%llu cached sectors %u", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->dev_idx, __entry->bucket, -+ __entry->sectors) -+); -+ -+/* Moving IO */ -+ -+TRACE_EVENT(bucket_evacuate, -+ TP_PROTO(struct bch_fs *c, struct bpos *bucket), -+ TP_ARGS(c, bucket), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(u32, dev_idx ) -+ __field(u64, bucket ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->dev_idx = bucket->inode; -+ __entry->bucket = bucket->offset; -+ ), -+ -+ TP_printk("%d:%d %u:%llu", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->dev_idx, __entry->bucket) -+); -+ -+DEFINE_EVENT(bkey, move_extent, -+ TP_PROTO(struct bch_fs *c, const char *k), -+ TP_ARGS(c, k) -+); -+ -+DEFINE_EVENT(bkey, move_extent_read, -+ TP_PROTO(struct bch_fs *c, const char *k), -+ TP_ARGS(c, k) -+); -+ -+DEFINE_EVENT(bkey, move_extent_write, -+ TP_PROTO(struct bch_fs *c, const char *k), -+ TP_ARGS(c, k) -+); -+ -+DEFINE_EVENT(bkey, move_extent_finish, -+ TP_PROTO(struct bch_fs *c, const char *k), -+ TP_ARGS(c, k) -+); -+ -+TRACE_EVENT(move_extent_fail, -+ TP_PROTO(struct bch_fs *c, const char *msg), -+ TP_ARGS(c, msg), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __string(msg, msg ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __assign_str(msg, msg); -+ ), -+ -+ TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg)) -+); -+ -+DEFINE_EVENT(bkey, move_extent_alloc_mem_fail, -+ TP_PROTO(struct bch_fs *c, const char *k), -+ TP_ARGS(c, k) -+); -+ -+TRACE_EVENT(move_data, -+ TP_PROTO(struct bch_fs *c, -+ struct bch_move_stats *stats), -+ TP_ARGS(c, stats), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(u64, keys_moved ) -+ __field(u64, keys_raced ) -+ __field(u64, sectors_seen ) -+ __field(u64, sectors_moved ) -+ __field(u64, sectors_raced ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->keys_moved = atomic64_read(&stats->keys_moved); -+ __entry->keys_raced = atomic64_read(&stats->keys_raced); -+ __entry->sectors_seen = atomic64_read(&stats->sectors_seen); -+ __entry->sectors_moved = atomic64_read(&stats->sectors_moved); -+ __entry->sectors_raced = atomic64_read(&stats->sectors_raced); -+ ), -+ -+ TP_printk("%d,%d keys moved %llu raced %llu" -+ "sectors seen %llu moved %llu raced %llu", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->keys_moved, -+ __entry->keys_raced, -+ __entry->sectors_seen, -+ __entry->sectors_moved, -+ __entry->sectors_raced) -+); -+ -+TRACE_EVENT(evacuate_bucket, -+ TP_PROTO(struct bch_fs *c, struct bpos *bucket, -+ unsigned sectors, unsigned bucket_size, -+ u64 fragmentation, int ret), -+ TP_ARGS(c, bucket, sectors, bucket_size, fragmentation, ret), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(u64, member ) -+ __field(u64, bucket ) -+ __field(u32, sectors ) -+ __field(u32, bucket_size ) -+ __field(u64, fragmentation ) -+ __field(int, ret ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->member = bucket->inode; -+ __entry->bucket = bucket->offset; -+ __entry->sectors = sectors; -+ __entry->bucket_size = bucket_size; -+ __entry->fragmentation = fragmentation; -+ __entry->ret = ret; -+ ), -+ -+ TP_printk("%d,%d %llu:%llu sectors %u/%u fragmentation %llu ret %i", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->member, __entry->bucket, -+ __entry->sectors, __entry->bucket_size, -+ __entry->fragmentation, __entry->ret) -+); -+ -+TRACE_EVENT(copygc, -+ TP_PROTO(struct bch_fs *c, -+ u64 sectors_moved, u64 sectors_not_moved, -+ u64 buckets_moved, u64 buckets_not_moved), -+ TP_ARGS(c, -+ sectors_moved, sectors_not_moved, -+ buckets_moved, buckets_not_moved), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(u64, sectors_moved ) -+ __field(u64, sectors_not_moved ) -+ __field(u64, buckets_moved ) -+ __field(u64, buckets_not_moved ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->sectors_moved = sectors_moved; -+ __entry->sectors_not_moved = sectors_not_moved; -+ __entry->buckets_moved = buckets_moved; -+ __entry->buckets_not_moved = buckets_moved; -+ ), -+ -+ TP_printk("%d,%d sectors moved %llu remain %llu buckets moved %llu remain %llu", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->sectors_moved, __entry->sectors_not_moved, -+ __entry->buckets_moved, __entry->buckets_not_moved) -+); -+ -+TRACE_EVENT(copygc_wait, -+ TP_PROTO(struct bch_fs *c, -+ u64 wait_amount, u64 until), -+ TP_ARGS(c, wait_amount, until), -+ -+ TP_STRUCT__entry( -+ __field(dev_t, dev ) -+ __field(u64, wait_amount ) -+ __field(u64, until ) -+ ), -+ -+ TP_fast_assign( -+ __entry->dev = c->dev; -+ __entry->wait_amount = wait_amount; -+ __entry->until = until; -+ ), -+ -+ TP_printk("%d,%u waiting for %llu sectors until %llu", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ __entry->wait_amount, __entry->until) -+); -+ -+/* btree transactions: */ -+ -+DECLARE_EVENT_CLASS(transaction_event, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip), -+ TP_ARGS(trans, caller_ip), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __field(unsigned long, caller_ip ) -+ ), -+ -+ TP_fast_assign( -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ __entry->caller_ip = caller_ip; -+ ), -+ -+ TP_printk("%s %pS", __entry->trans_fn, (void *) __entry->caller_ip) -+); -+ -+DEFINE_EVENT(transaction_event, transaction_commit, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip), -+ TP_ARGS(trans, caller_ip) -+); -+ -+DEFINE_EVENT(transaction_event, trans_restart_injected, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip), -+ TP_ARGS(trans, caller_ip) -+); -+ -+TRACE_EVENT(trans_restart_split_race, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree *b), -+ TP_ARGS(trans, caller_ip, b), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __field(unsigned long, caller_ip ) -+ __field(u8, level ) -+ __field(u16, written ) -+ __field(u16, blocks ) -+ __field(u16, u64s_remaining ) -+ ), -+ -+ TP_fast_assign( -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ __entry->caller_ip = caller_ip; -+ __entry->level = b->c.level; -+ __entry->written = b->written; -+ __entry->blocks = btree_blocks(trans->c); -+ __entry->u64s_remaining = bch_btree_keys_u64s_remaining(trans->c, b); -+ ), -+ -+ TP_printk("%s %pS l=%u written %u/%u u64s remaining %u", -+ __entry->trans_fn, (void *) __entry->caller_ip, -+ __entry->level, -+ __entry->written, __entry->blocks, -+ __entry->u64s_remaining) -+); -+ -+DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip), -+ TP_ARGS(trans, caller_ip) -+); -+ -+TRACE_EVENT(trans_restart_journal_preres_get, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ unsigned flags), -+ TP_ARGS(trans, caller_ip, flags), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __field(unsigned long, caller_ip ) -+ __field(unsigned, flags ) -+ ), -+ -+ TP_fast_assign( -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ __entry->caller_ip = caller_ip; -+ __entry->flags = flags; -+ ), -+ -+ TP_printk("%s %pS %x", __entry->trans_fn, -+ (void *) __entry->caller_ip, -+ __entry->flags) -+); -+ -+DEFINE_EVENT(transaction_event, trans_restart_fault_inject, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip), -+ TP_ARGS(trans, caller_ip) -+); -+ -+DEFINE_EVENT(transaction_event, trans_traverse_all, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip), -+ TP_ARGS(trans, caller_ip) -+); -+ -+DEFINE_EVENT(transaction_event, trans_restart_key_cache_raced, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip), -+ TP_ARGS(trans, caller_ip) -+); -+ -+DEFINE_EVENT(transaction_event, trans_restart_too_many_iters, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip), -+ TP_ARGS(trans, caller_ip) -+); -+ -+DECLARE_EVENT_CLASS(transaction_restart_iter, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path), -+ TP_ARGS(trans, caller_ip, path), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __field(unsigned long, caller_ip ) -+ __field(u8, btree_id ) -+ TRACE_BPOS_entries(pos) -+ ), -+ -+ TP_fast_assign( -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ __entry->caller_ip = caller_ip; -+ __entry->btree_id = path->btree_id; -+ TRACE_BPOS_assign(pos, path->pos) -+ ), -+ -+ TP_printk("%s %pS btree %s pos %llu:%llu:%u", -+ __entry->trans_fn, -+ (void *) __entry->caller_ip, -+ bch2_btree_id_str(__entry->btree_id), -+ __entry->pos_inode, -+ __entry->pos_offset, -+ __entry->pos_snapshot) -+); -+ -+DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_reused, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path), -+ TP_ARGS(trans, caller_ip, path) -+); -+ -+DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path), -+ TP_ARGS(trans, caller_ip, path) -+); -+ -+struct get_locks_fail; -+ -+TRACE_EVENT(trans_restart_upgrade, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path, -+ unsigned old_locks_want, -+ unsigned new_locks_want, -+ struct get_locks_fail *f), -+ TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want, f), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __field(unsigned long, caller_ip ) -+ __field(u8, btree_id ) -+ __field(u8, old_locks_want ) -+ __field(u8, new_locks_want ) -+ __field(u8, level ) -+ __field(u32, path_seq ) -+ __field(u32, node_seq ) -+ __field(u32, path_alloc_seq ) -+ __field(u32, downgrade_seq) -+ TRACE_BPOS_entries(pos) -+ ), -+ -+ TP_fast_assign( -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ __entry->caller_ip = caller_ip; -+ __entry->btree_id = path->btree_id; -+ __entry->old_locks_want = old_locks_want; -+ __entry->new_locks_want = new_locks_want; -+ __entry->level = f->l; -+ __entry->path_seq = path->l[f->l].lock_seq; -+ __entry->node_seq = IS_ERR_OR_NULL(f->b) ? 0 : f->b->c.lock.seq; -+ __entry->path_alloc_seq = path->alloc_seq; -+ __entry->downgrade_seq = path->downgrade_seq; -+ TRACE_BPOS_assign(pos, path->pos) -+ ), -+ -+ TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u level %u path seq %u node seq %u alloc_seq %u downgrade_seq %u", -+ __entry->trans_fn, -+ (void *) __entry->caller_ip, -+ bch2_btree_id_str(__entry->btree_id), -+ __entry->pos_inode, -+ __entry->pos_offset, -+ __entry->pos_snapshot, -+ __entry->old_locks_want, -+ __entry->new_locks_want, -+ __entry->level, -+ __entry->path_seq, -+ __entry->node_seq, -+ __entry->path_alloc_seq, -+ __entry->downgrade_seq) -+); -+ -+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path), -+ TP_ARGS(trans, caller_ip, path) -+); -+ -+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_next_node, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path), -+ TP_ARGS(trans, caller_ip, path) -+); -+ -+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_parent_for_fill, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path), -+ TP_ARGS(trans, caller_ip, path) -+); -+ -+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_after_fill, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path), -+ TP_ARGS(trans, caller_ip, path) -+); -+ -+DEFINE_EVENT(transaction_event, trans_restart_key_cache_upgrade, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip), -+ TP_ARGS(trans, caller_ip) -+); -+ -+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_key_cache_fill, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path), -+ TP_ARGS(trans, caller_ip, path) -+); -+ -+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path), -+ TP_ARGS(trans, caller_ip, path) -+); -+ -+DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_path_intent, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path), -+ TP_ARGS(trans, caller_ip, path) -+); -+ -+DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path), -+ TP_ARGS(trans, caller_ip, path) -+); -+ -+DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path), -+ TP_ARGS(trans, caller_ip, path) -+); -+ -+DEFINE_EVENT(transaction_event, trans_restart_would_deadlock, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip), -+ TP_ARGS(trans, caller_ip) -+); -+ -+DEFINE_EVENT(transaction_event, trans_restart_would_deadlock_recursion_limit, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip), -+ TP_ARGS(trans, caller_ip) -+); -+ -+TRACE_EVENT(trans_restart_would_deadlock_write, -+ TP_PROTO(struct btree_trans *trans), -+ TP_ARGS(trans), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ ), -+ -+ TP_fast_assign( -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ ), -+ -+ TP_printk("%s", __entry->trans_fn) -+); -+ -+TRACE_EVENT(trans_restart_mem_realloced, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ unsigned long bytes), -+ TP_ARGS(trans, caller_ip, bytes), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __field(unsigned long, caller_ip ) -+ __field(unsigned long, bytes ) -+ ), -+ -+ TP_fast_assign( -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ __entry->caller_ip = caller_ip; -+ __entry->bytes = bytes; -+ ), -+ -+ TP_printk("%s %pS bytes %lu", -+ __entry->trans_fn, -+ (void *) __entry->caller_ip, -+ __entry->bytes) -+); -+ -+TRACE_EVENT(trans_restart_key_cache_key_realloced, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path, -+ unsigned old_u64s, -+ unsigned new_u64s), -+ TP_ARGS(trans, caller_ip, path, old_u64s, new_u64s), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __field(unsigned long, caller_ip ) -+ __field(enum btree_id, btree_id ) -+ TRACE_BPOS_entries(pos) -+ __field(u32, old_u64s ) -+ __field(u32, new_u64s ) -+ ), -+ -+ TP_fast_assign( -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ __entry->caller_ip = caller_ip; -+ -+ __entry->btree_id = path->btree_id; -+ TRACE_BPOS_assign(pos, path->pos); -+ __entry->old_u64s = old_u64s; -+ __entry->new_u64s = new_u64s; -+ ), -+ -+ TP_printk("%s %pS btree %s pos %llu:%llu:%u old_u64s %u new_u64s %u", -+ __entry->trans_fn, -+ (void *) __entry->caller_ip, -+ bch2_btree_id_str(__entry->btree_id), -+ __entry->pos_inode, -+ __entry->pos_offset, -+ __entry->pos_snapshot, -+ __entry->old_u64s, -+ __entry->new_u64s) -+); -+ -+TRACE_EVENT(path_downgrade, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip, -+ struct btree_path *path), -+ TP_ARGS(trans, caller_ip, path), -+ -+ TP_STRUCT__entry( -+ __array(char, trans_fn, 32 ) -+ __field(unsigned long, caller_ip ) -+ ), -+ -+ TP_fast_assign( -+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); -+ __entry->caller_ip = caller_ip; -+ ), -+ -+ TP_printk("%s %pS", -+ __entry->trans_fn, -+ (void *) __entry->caller_ip) -+); -+ -+DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, -+ TP_PROTO(struct btree_trans *trans, -+ unsigned long caller_ip), -+ TP_ARGS(trans, caller_ip) -+); -+ -+TRACE_EVENT(write_buffer_flush, -+ TP_PROTO(struct btree_trans *trans, size_t nr, size_t skipped, size_t fast, size_t size), -+ TP_ARGS(trans, nr, skipped, fast, size), -+ -+ TP_STRUCT__entry( -+ __field(size_t, nr ) -+ __field(size_t, skipped ) -+ __field(size_t, fast ) -+ __field(size_t, size ) -+ ), -+ -+ TP_fast_assign( -+ __entry->nr = nr; -+ __entry->skipped = skipped; -+ __entry->fast = fast; -+ __entry->size = size; -+ ), -+ -+ TP_printk("%zu/%zu skipped %zu fast %zu", -+ __entry->nr, __entry->size, __entry->skipped, __entry->fast) -+); -+ -+TRACE_EVENT(write_buffer_flush_slowpath, -+ TP_PROTO(struct btree_trans *trans, size_t nr, size_t size), -+ TP_ARGS(trans, nr, size), -+ -+ TP_STRUCT__entry( -+ __field(size_t, nr ) -+ __field(size_t, size ) -+ ), -+ -+ TP_fast_assign( -+ __entry->nr = nr; -+ __entry->size = size; -+ ), -+ -+ TP_printk("%zu/%zu", __entry->nr, __entry->size) -+); -+ -+#endif /* _TRACE_BCACHEFS_H */ -+ -+/* This part must be outside protection */ -+#undef TRACE_INCLUDE_PATH -+#define TRACE_INCLUDE_PATH ../../fs/bcachefs -+ -+#undef TRACE_INCLUDE_FILE -+#define TRACE_INCLUDE_FILE trace -+ -+#include -diff --git a/fs/bcachefs/two_state_shared_lock.c b/fs/bcachefs/two_state_shared_lock.c -new file mode 100644 -index 000000000000..9764c2e6a910 ---- /dev/null -+++ b/fs/bcachefs/two_state_shared_lock.c -@@ -0,0 +1,8 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "two_state_shared_lock.h" -+ -+void __bch2_two_state_lock(two_state_lock_t *lock, int s) -+{ -+ __wait_event(lock->wait, bch2_two_state_trylock(lock, s)); -+} -diff --git a/fs/bcachefs/two_state_shared_lock.h b/fs/bcachefs/two_state_shared_lock.h -new file mode 100644 -index 000000000000..905801772002 ---- /dev/null -+++ b/fs/bcachefs/two_state_shared_lock.h -@@ -0,0 +1,59 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_TWO_STATE_LOCK_H -+#define _BCACHEFS_TWO_STATE_LOCK_H -+ -+#include -+#include -+#include -+ -+#include "util.h" -+ -+/* -+ * Two-state lock - can be taken for add or block - both states are shared, -+ * like read side of rwsem, but conflict with other state: -+ */ -+typedef struct { -+ atomic_long_t v; -+ wait_queue_head_t wait; -+} two_state_lock_t; -+ -+static inline void two_state_lock_init(two_state_lock_t *lock) -+{ -+ atomic_long_set(&lock->v, 0); -+ init_waitqueue_head(&lock->wait); -+} -+ -+static inline void bch2_two_state_unlock(two_state_lock_t *lock, int s) -+{ -+ long i = s ? 1 : -1; -+ -+ EBUG_ON(atomic_long_read(&lock->v) == 0); -+ -+ if (atomic_long_sub_return_release(i, &lock->v) == 0) -+ wake_up_all(&lock->wait); -+} -+ -+static inline bool bch2_two_state_trylock(two_state_lock_t *lock, int s) -+{ -+ long i = s ? 1 : -1; -+ long v = atomic_long_read(&lock->v), old; -+ -+ do { -+ old = v; -+ -+ if (i > 0 ? v < 0 : v > 0) -+ return false; -+ } while ((v = atomic_long_cmpxchg_acquire(&lock->v, -+ old, old + i)) != old); -+ return true; -+} -+ -+void __bch2_two_state_lock(two_state_lock_t *, int); -+ -+static inline void bch2_two_state_lock(two_state_lock_t *lock, int s) -+{ -+ if (!bch2_two_state_trylock(lock, s)) -+ __bch2_two_state_lock(lock, s); -+} -+ -+#endif /* _BCACHEFS_TWO_STATE_LOCK_H */ -diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c -new file mode 100644 -index 000000000000..84b142fcc3df ---- /dev/null -+++ b/fs/bcachefs/util.c -@@ -0,0 +1,1159 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * random utiility code, for bcache but in theory not specific to bcache -+ * -+ * Copyright 2010, 2011 Kent Overstreet -+ * Copyright 2012 Google, Inc. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "eytzinger.h" -+#include "mean_and_variance.h" -+#include "util.h" -+ -+static const char si_units[] = "?kMGTPEZY"; -+ -+/* string_get_size units: */ -+static const char *const units_2[] = { -+ "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" -+}; -+static const char *const units_10[] = { -+ "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB" -+}; -+ -+static int parse_u64(const char *cp, u64 *res) -+{ -+ const char *start = cp; -+ u64 v = 0; -+ -+ if (!isdigit(*cp)) -+ return -EINVAL; -+ -+ do { -+ if (v > U64_MAX / 10) -+ return -ERANGE; -+ v *= 10; -+ if (v > U64_MAX - (*cp - '0')) -+ return -ERANGE; -+ v += *cp - '0'; -+ cp++; -+ } while (isdigit(*cp)); -+ -+ *res = v; -+ return cp - start; -+} -+ -+static int bch2_pow(u64 n, u64 p, u64 *res) -+{ -+ *res = 1; -+ -+ while (p--) { -+ if (*res > div_u64(U64_MAX, n)) -+ return -ERANGE; -+ *res *= n; -+ } -+ return 0; -+} -+ -+static int parse_unit_suffix(const char *cp, u64 *res) -+{ -+ const char *start = cp; -+ u64 base = 1024; -+ unsigned u; -+ int ret; -+ -+ if (*cp == ' ') -+ cp++; -+ -+ for (u = 1; u < strlen(si_units); u++) -+ if (*cp == si_units[u]) { -+ cp++; -+ goto got_unit; -+ } -+ -+ for (u = 0; u < ARRAY_SIZE(units_2); u++) -+ if (!strncmp(cp, units_2[u], strlen(units_2[u]))) { -+ cp += strlen(units_2[u]); -+ goto got_unit; -+ } -+ -+ for (u = 0; u < ARRAY_SIZE(units_10); u++) -+ if (!strncmp(cp, units_10[u], strlen(units_10[u]))) { -+ cp += strlen(units_10[u]); -+ base = 1000; -+ goto got_unit; -+ } -+ -+ *res = 1; -+ return 0; -+got_unit: -+ ret = bch2_pow(base, u, res); -+ if (ret) -+ return ret; -+ -+ return cp - start; -+} -+ -+#define parse_or_ret(cp, _f) \ -+do { \ -+ int _ret = _f; \ -+ if (_ret < 0) \ -+ return _ret; \ -+ cp += _ret; \ -+} while (0) -+ -+static int __bch2_strtou64_h(const char *cp, u64 *res) -+{ -+ const char *start = cp; -+ u64 v = 0, b, f_n = 0, f_d = 1; -+ int ret; -+ -+ parse_or_ret(cp, parse_u64(cp, &v)); -+ -+ if (*cp == '.') { -+ cp++; -+ ret = parse_u64(cp, &f_n); -+ if (ret < 0) -+ return ret; -+ cp += ret; -+ -+ ret = bch2_pow(10, ret, &f_d); -+ if (ret) -+ return ret; -+ } -+ -+ parse_or_ret(cp, parse_unit_suffix(cp, &b)); -+ -+ if (v > div_u64(U64_MAX, b)) -+ return -ERANGE; -+ v *= b; -+ -+ if (f_n > div_u64(U64_MAX, b)) -+ return -ERANGE; -+ -+ f_n = div_u64(f_n * b, f_d); -+ if (v + f_n < v) -+ return -ERANGE; -+ v += f_n; -+ -+ *res = v; -+ return cp - start; -+} -+ -+static int __bch2_strtoh(const char *cp, u64 *res, -+ u64 t_max, bool t_signed) -+{ -+ bool positive = *cp != '-'; -+ u64 v = 0; -+ -+ if (*cp == '+' || *cp == '-') -+ cp++; -+ -+ parse_or_ret(cp, __bch2_strtou64_h(cp, &v)); -+ -+ if (*cp == '\n') -+ cp++; -+ if (*cp) -+ return -EINVAL; -+ -+ if (positive) { -+ if (v > t_max) -+ return -ERANGE; -+ } else { -+ if (v && !t_signed) -+ return -ERANGE; -+ -+ if (v > t_max + 1) -+ return -ERANGE; -+ v = -v; -+ } -+ -+ *res = v; -+ return 0; -+} -+ -+#define STRTO_H(name, type) \ -+int bch2_ ## name ## _h(const char *cp, type *res) \ -+{ \ -+ u64 v = 0; \ -+ int ret = __bch2_strtoh(cp, &v, ANYSINT_MAX(type), \ -+ ANYSINT_MAX(type) != ((type) ~0ULL)); \ -+ *res = v; \ -+ return ret; \ -+} -+ -+STRTO_H(strtoint, int) -+STRTO_H(strtouint, unsigned int) -+STRTO_H(strtoll, long long) -+STRTO_H(strtoull, unsigned long long) -+STRTO_H(strtou64, u64) -+ -+u64 bch2_read_flag_list(char *opt, const char * const list[]) -+{ -+ u64 ret = 0; -+ char *p, *s, *d = kstrdup(opt, GFP_KERNEL); -+ -+ if (!d) -+ return -ENOMEM; -+ -+ s = strim(d); -+ -+ while ((p = strsep(&s, ","))) { -+ int flag = match_string(list, -1, p); -+ -+ if (flag < 0) { -+ ret = -1; -+ break; -+ } -+ -+ ret |= 1 << flag; -+ } -+ -+ kfree(d); -+ -+ return ret; -+} -+ -+bool bch2_is_zero(const void *_p, size_t n) -+{ -+ const char *p = _p; -+ size_t i; -+ -+ for (i = 0; i < n; i++) -+ if (p[i]) -+ return false; -+ return true; -+} -+ -+void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits) -+{ -+ while (nr_bits) -+ prt_char(out, '0' + ((v >> --nr_bits) & 1)); -+} -+ -+void bch2_print_string_as_lines(const char *prefix, const char *lines) -+{ -+ const char *p; -+ -+ if (!lines) { -+ printk("%s (null)\n", prefix); -+ return; -+ } -+ -+ console_lock(); -+ while (1) { -+ p = strchrnul(lines, '\n'); -+ printk("%s%.*s\n", prefix, (int) (p - lines), lines); -+ if (!*p) -+ break; -+ lines = p + 1; -+ } -+ console_unlock(); -+} -+ -+int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task) -+{ -+#ifdef CONFIG_STACKTRACE -+ unsigned nr_entries = 0; -+ int ret = 0; -+ -+ stack->nr = 0; -+ ret = darray_make_room(stack, 32); -+ if (ret) -+ return ret; -+ -+ if (!down_read_trylock(&task->signal->exec_update_lock)) -+ return -1; -+ -+ do { -+ nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, 0); -+ } while (nr_entries == stack->size && -+ !(ret = darray_make_room(stack, stack->size * 2))); -+ -+ stack->nr = nr_entries; -+ up_read(&task->signal->exec_update_lock); -+ -+ return ret; -+#else -+ return 0; -+#endif -+} -+ -+void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack) -+{ -+ unsigned long *i; -+ -+ darray_for_each(*stack, i) { -+ prt_printf(out, "[<0>] %pB", (void *) *i); -+ prt_newline(out); -+ } -+} -+ -+int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task) -+{ -+ bch_stacktrace stack = { 0 }; -+ int ret = bch2_save_backtrace(&stack, task); -+ -+ bch2_prt_backtrace(out, &stack); -+ darray_exit(&stack); -+ return ret; -+} -+ -+/* time stats: */ -+ -+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT -+static void bch2_quantiles_update(struct bch2_quantiles *q, u64 v) -+{ -+ unsigned i = 0; -+ -+ while (i < ARRAY_SIZE(q->entries)) { -+ struct bch2_quantile_entry *e = q->entries + i; -+ -+ if (unlikely(!e->step)) { -+ e->m = v; -+ e->step = max_t(unsigned, v / 2, 1024); -+ } else if (e->m > v) { -+ e->m = e->m >= e->step -+ ? e->m - e->step -+ : 0; -+ } else if (e->m < v) { -+ e->m = e->m + e->step > e->m -+ ? e->m + e->step -+ : U32_MAX; -+ } -+ -+ if ((e->m > v ? e->m - v : v - e->m) < e->step) -+ e->step = max_t(unsigned, e->step / 2, 1); -+ -+ if (v >= e->m) -+ break; -+ -+ i = eytzinger0_child(i, v > e->m); -+ } -+} -+ -+static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats, -+ u64 start, u64 end) -+{ -+ u64 duration, freq; -+ -+ if (time_after64(end, start)) { -+ duration = end - start; -+ mean_and_variance_update(&stats->duration_stats, duration); -+ mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration); -+ stats->max_duration = max(stats->max_duration, duration); -+ stats->min_duration = min(stats->min_duration, duration); -+ bch2_quantiles_update(&stats->quantiles, duration); -+ } -+ -+ if (time_after64(end, stats->last_event)) { -+ freq = end - stats->last_event; -+ mean_and_variance_update(&stats->freq_stats, freq); -+ mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq); -+ stats->max_freq = max(stats->max_freq, freq); -+ stats->min_freq = min(stats->min_freq, freq); -+ stats->last_event = end; -+ } -+} -+ -+static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats, -+ struct bch2_time_stat_buffer *b) -+{ -+ struct bch2_time_stat_buffer_entry *i; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&stats->lock, flags); -+ for (i = b->entries; -+ i < b->entries + ARRAY_SIZE(b->entries); -+ i++) -+ bch2_time_stats_update_one(stats, i->start, i->end); -+ spin_unlock_irqrestore(&stats->lock, flags); -+ -+ b->nr = 0; -+} -+ -+void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) -+{ -+ unsigned long flags; -+ -+ WARN_RATELIMIT(!stats->min_duration || !stats->min_freq, -+ "time_stats: min_duration = %llu, min_freq = %llu", -+ stats->min_duration, stats->min_freq); -+ -+ if (!stats->buffer) { -+ spin_lock_irqsave(&stats->lock, flags); -+ bch2_time_stats_update_one(stats, start, end); -+ -+ if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted) < 32 && -+ stats->duration_stats.n > 1024) -+ stats->buffer = -+ alloc_percpu_gfp(struct bch2_time_stat_buffer, -+ GFP_ATOMIC); -+ spin_unlock_irqrestore(&stats->lock, flags); -+ } else { -+ struct bch2_time_stat_buffer *b; -+ -+ preempt_disable(); -+ b = this_cpu_ptr(stats->buffer); -+ -+ BUG_ON(b->nr >= ARRAY_SIZE(b->entries)); -+ b->entries[b->nr++] = (struct bch2_time_stat_buffer_entry) { -+ .start = start, -+ .end = end -+ }; -+ -+ if (unlikely(b->nr == ARRAY_SIZE(b->entries))) -+ bch2_time_stats_clear_buffer(stats, b); -+ preempt_enable(); -+ } -+} -+#endif -+ -+static const struct time_unit { -+ const char *name; -+ u64 nsecs; -+} time_units[] = { -+ { "ns", 1 }, -+ { "us", NSEC_PER_USEC }, -+ { "ms", NSEC_PER_MSEC }, -+ { "s", NSEC_PER_SEC }, -+ { "m", (u64) NSEC_PER_SEC * 60}, -+ { "h", (u64) NSEC_PER_SEC * 3600}, -+ { "eon", U64_MAX }, -+}; -+ -+static const struct time_unit *pick_time_units(u64 ns) -+{ -+ const struct time_unit *u; -+ -+ for (u = time_units; -+ u + 1 < time_units + ARRAY_SIZE(time_units) && -+ ns >= u[1].nsecs << 1; -+ u++) -+ ; -+ -+ return u; -+} -+ -+void bch2_pr_time_units(struct printbuf *out, u64 ns) -+{ -+ const struct time_unit *u = pick_time_units(ns); -+ -+ prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name); -+} -+ -+static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns) -+{ -+ const struct time_unit *u = pick_time_units(ns); -+ -+ prt_printf(out, "%llu ", div64_u64(ns, u->nsecs)); -+ prt_tab_rjust(out); -+ prt_printf(out, "%s", u->name); -+} -+ -+#ifndef __KERNEL__ -+#include -+void bch2_prt_datetime(struct printbuf *out, time64_t sec) -+{ -+ time_t t = sec; -+ char buf[64]; -+ ctime_r(&t, buf); -+ prt_str(out, buf); -+} -+#else -+void bch2_prt_datetime(struct printbuf *out, time64_t sec) -+{ -+ char buf[64]; -+ snprintf(buf, sizeof(buf), "%ptT", &sec); -+ prt_u64(out, sec); -+} -+#endif -+ -+#define TABSTOP_SIZE 12 -+ -+static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns) -+{ -+ prt_str(out, name); -+ prt_tab(out); -+ bch2_pr_time_units_aligned(out, ns); -+ prt_newline(out); -+} -+ -+void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) -+{ -+ const struct time_unit *u; -+ s64 f_mean = 0, d_mean = 0; -+ u64 q, last_q = 0, f_stddev = 0, d_stddev = 0; -+ int i; -+ /* -+ * avoid divide by zero -+ */ -+ if (stats->freq_stats.n) { -+ f_mean = mean_and_variance_get_mean(stats->freq_stats); -+ f_stddev = mean_and_variance_get_stddev(stats->freq_stats); -+ d_mean = mean_and_variance_get_mean(stats->duration_stats); -+ d_stddev = mean_and_variance_get_stddev(stats->duration_stats); -+ } -+ -+ printbuf_tabstop_push(out, out->indent + TABSTOP_SIZE); -+ prt_printf(out, "count:"); -+ prt_tab(out); -+ prt_printf(out, "%llu ", -+ stats->duration_stats.n); -+ printbuf_tabstop_pop(out); -+ prt_newline(out); -+ -+ printbuf_tabstops_reset(out); -+ -+ printbuf_tabstop_push(out, out->indent + 20); -+ printbuf_tabstop_push(out, TABSTOP_SIZE + 2); -+ printbuf_tabstop_push(out, 0); -+ printbuf_tabstop_push(out, TABSTOP_SIZE + 2); -+ -+ prt_tab(out); -+ prt_printf(out, "since mount"); -+ prt_tab_rjust(out); -+ prt_tab(out); -+ prt_printf(out, "recent"); -+ prt_tab_rjust(out); -+ prt_newline(out); -+ -+ printbuf_tabstops_reset(out); -+ printbuf_tabstop_push(out, out->indent + 20); -+ printbuf_tabstop_push(out, TABSTOP_SIZE); -+ printbuf_tabstop_push(out, 2); -+ printbuf_tabstop_push(out, TABSTOP_SIZE); -+ -+ prt_printf(out, "duration of events"); -+ prt_newline(out); -+ printbuf_indent_add(out, 2); -+ -+ pr_name_and_units(out, "min:", stats->min_duration); -+ pr_name_and_units(out, "max:", stats->max_duration); -+ -+ prt_printf(out, "mean:"); -+ prt_tab(out); -+ bch2_pr_time_units_aligned(out, d_mean); -+ prt_tab(out); -+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted)); -+ prt_newline(out); -+ -+ prt_printf(out, "stddev:"); -+ prt_tab(out); -+ bch2_pr_time_units_aligned(out, d_stddev); -+ prt_tab(out); -+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted)); -+ -+ printbuf_indent_sub(out, 2); -+ prt_newline(out); -+ -+ prt_printf(out, "time between events"); -+ prt_newline(out); -+ printbuf_indent_add(out, 2); -+ -+ pr_name_and_units(out, "min:", stats->min_freq); -+ pr_name_and_units(out, "max:", stats->max_freq); -+ -+ prt_printf(out, "mean:"); -+ prt_tab(out); -+ bch2_pr_time_units_aligned(out, f_mean); -+ prt_tab(out); -+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted)); -+ prt_newline(out); -+ -+ prt_printf(out, "stddev:"); -+ prt_tab(out); -+ bch2_pr_time_units_aligned(out, f_stddev); -+ prt_tab(out); -+ bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted)); -+ -+ printbuf_indent_sub(out, 2); -+ prt_newline(out); -+ -+ printbuf_tabstops_reset(out); -+ -+ i = eytzinger0_first(NR_QUANTILES); -+ u = pick_time_units(stats->quantiles.entries[i].m); -+ -+ prt_printf(out, "quantiles (%s):\t", u->name); -+ eytzinger0_for_each(i, NR_QUANTILES) { -+ bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1; -+ -+ q = max(stats->quantiles.entries[i].m, last_q); -+ prt_printf(out, "%llu ", -+ div_u64(q, u->nsecs)); -+ if (is_last) -+ prt_newline(out); -+ last_q = q; -+ } -+} -+ -+void bch2_time_stats_exit(struct bch2_time_stats *stats) -+{ -+ free_percpu(stats->buffer); -+} -+ -+void bch2_time_stats_init(struct bch2_time_stats *stats) -+{ -+ memset(stats, 0, sizeof(*stats)); -+ stats->duration_stats_weighted.weight = 8; -+ stats->freq_stats_weighted.weight = 8; -+ stats->min_duration = U64_MAX; -+ stats->min_freq = U64_MAX; -+ spin_lock_init(&stats->lock); -+} -+ -+/* ratelimit: */ -+ -+/** -+ * bch2_ratelimit_delay() - return how long to delay until the next time to do -+ * some work -+ * @d: the struct bch_ratelimit to update -+ * Returns: the amount of time to delay by, in jiffies -+ */ -+u64 bch2_ratelimit_delay(struct bch_ratelimit *d) -+{ -+ u64 now = local_clock(); -+ -+ return time_after64(d->next, now) -+ ? nsecs_to_jiffies(d->next - now) -+ : 0; -+} -+ -+/** -+ * bch2_ratelimit_increment() - increment @d by the amount of work done -+ * @d: the struct bch_ratelimit to update -+ * @done: the amount of work done, in arbitrary units -+ */ -+void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done) -+{ -+ u64 now = local_clock(); -+ -+ d->next += div_u64(done * NSEC_PER_SEC, d->rate); -+ -+ if (time_before64(now + NSEC_PER_SEC, d->next)) -+ d->next = now + NSEC_PER_SEC; -+ -+ if (time_after64(now - NSEC_PER_SEC * 2, d->next)) -+ d->next = now - NSEC_PER_SEC * 2; -+} -+ -+/* pd controller: */ -+ -+/* -+ * Updates pd_controller. Attempts to scale inputed values to units per second. -+ * @target: desired value -+ * @actual: current value -+ * -+ * @sign: 1 or -1; 1 if increasing the rate makes actual go up, -1 if increasing -+ * it makes actual go down. -+ */ -+void bch2_pd_controller_update(struct bch_pd_controller *pd, -+ s64 target, s64 actual, int sign) -+{ -+ s64 proportional, derivative, change; -+ -+ unsigned long seconds_since_update = (jiffies - pd->last_update) / HZ; -+ -+ if (seconds_since_update == 0) -+ return; -+ -+ pd->last_update = jiffies; -+ -+ proportional = actual - target; -+ proportional *= seconds_since_update; -+ proportional = div_s64(proportional, pd->p_term_inverse); -+ -+ derivative = actual - pd->last_actual; -+ derivative = div_s64(derivative, seconds_since_update); -+ derivative = ewma_add(pd->smoothed_derivative, derivative, -+ (pd->d_term / seconds_since_update) ?: 1); -+ derivative = derivative * pd->d_term; -+ derivative = div_s64(derivative, pd->p_term_inverse); -+ -+ change = proportional + derivative; -+ -+ /* Don't increase rate if not keeping up */ -+ if (change > 0 && -+ pd->backpressure && -+ time_after64(local_clock(), -+ pd->rate.next + NSEC_PER_MSEC)) -+ change = 0; -+ -+ change *= (sign * -1); -+ -+ pd->rate.rate = clamp_t(s64, (s64) pd->rate.rate + change, -+ 1, UINT_MAX); -+ -+ pd->last_actual = actual; -+ pd->last_derivative = derivative; -+ pd->last_proportional = proportional; -+ pd->last_change = change; -+ pd->last_target = target; -+} -+ -+void bch2_pd_controller_init(struct bch_pd_controller *pd) -+{ -+ pd->rate.rate = 1024; -+ pd->last_update = jiffies; -+ pd->p_term_inverse = 6000; -+ pd->d_term = 30; -+ pd->d_smooth = pd->d_term; -+ pd->backpressure = 1; -+} -+ -+void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd) -+{ -+ if (!out->nr_tabstops) -+ printbuf_tabstop_push(out, 20); -+ -+ prt_printf(out, "rate:"); -+ prt_tab(out); -+ prt_human_readable_s64(out, pd->rate.rate); -+ prt_newline(out); -+ -+ prt_printf(out, "target:"); -+ prt_tab(out); -+ prt_human_readable_u64(out, pd->last_target); -+ prt_newline(out); -+ -+ prt_printf(out, "actual:"); -+ prt_tab(out); -+ prt_human_readable_u64(out, pd->last_actual); -+ prt_newline(out); -+ -+ prt_printf(out, "proportional:"); -+ prt_tab(out); -+ prt_human_readable_s64(out, pd->last_proportional); -+ prt_newline(out); -+ -+ prt_printf(out, "derivative:"); -+ prt_tab(out); -+ prt_human_readable_s64(out, pd->last_derivative); -+ prt_newline(out); -+ -+ prt_printf(out, "change:"); -+ prt_tab(out); -+ prt_human_readable_s64(out, pd->last_change); -+ prt_newline(out); -+ -+ prt_printf(out, "next io:"); -+ prt_tab(out); -+ prt_printf(out, "%llims", div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC)); -+ prt_newline(out); -+} -+ -+/* misc: */ -+ -+void bch2_bio_map(struct bio *bio, void *base, size_t size) -+{ -+ while (size) { -+ struct page *page = is_vmalloc_addr(base) -+ ? vmalloc_to_page(base) -+ : virt_to_page(base); -+ unsigned offset = offset_in_page(base); -+ unsigned len = min_t(size_t, PAGE_SIZE - offset, size); -+ -+ BUG_ON(!bio_add_page(bio, page, len, offset)); -+ size -= len; -+ base += len; -+ } -+} -+ -+int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) -+{ -+ while (size) { -+ struct page *page = alloc_pages(gfp_mask, 0); -+ unsigned len = min_t(size_t, PAGE_SIZE, size); -+ -+ if (!page) -+ return -ENOMEM; -+ -+ if (unlikely(!bio_add_page(bio, page, len, 0))) { -+ __free_page(page); -+ break; -+ } -+ -+ size -= len; -+ } -+ -+ return 0; -+} -+ -+size_t bch2_rand_range(size_t max) -+{ -+ size_t rand; -+ -+ if (!max) -+ return 0; -+ -+ do { -+ rand = get_random_long(); -+ rand &= roundup_pow_of_two(max) - 1; -+ } while (rand >= max); -+ -+ return rand; -+} -+ -+void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src) -+{ -+ struct bio_vec bv; -+ struct bvec_iter iter; -+ -+ __bio_for_each_segment(bv, dst, iter, dst_iter) { -+ void *dstp = kmap_local_page(bv.bv_page); -+ -+ memcpy(dstp + bv.bv_offset, src, bv.bv_len); -+ kunmap_local(dstp); -+ -+ src += bv.bv_len; -+ } -+} -+ -+void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) -+{ -+ struct bio_vec bv; -+ struct bvec_iter iter; -+ -+ __bio_for_each_segment(bv, src, iter, src_iter) { -+ void *srcp = kmap_local_page(bv.bv_page); -+ -+ memcpy(dst, srcp + bv.bv_offset, bv.bv_len); -+ kunmap_local(srcp); -+ -+ dst += bv.bv_len; -+ } -+} -+ -+static int alignment_ok(const void *base, size_t align) -+{ -+ return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || -+ ((unsigned long)base & (align - 1)) == 0; -+} -+ -+static void u32_swap(void *a, void *b, size_t size) -+{ -+ u32 t = *(u32 *)a; -+ *(u32 *)a = *(u32 *)b; -+ *(u32 *)b = t; -+} -+ -+static void u64_swap(void *a, void *b, size_t size) -+{ -+ u64 t = *(u64 *)a; -+ *(u64 *)a = *(u64 *)b; -+ *(u64 *)b = t; -+} -+ -+static void generic_swap(void *a, void *b, size_t size) -+{ -+ char t; -+ -+ do { -+ t = *(char *)a; -+ *(char *)a++ = *(char *)b; -+ *(char *)b++ = t; -+ } while (--size > 0); -+} -+ -+static inline int do_cmp(void *base, size_t n, size_t size, -+ int (*cmp_func)(const void *, const void *, size_t), -+ size_t l, size_t r) -+{ -+ return cmp_func(base + inorder_to_eytzinger0(l, n) * size, -+ base + inorder_to_eytzinger0(r, n) * size, -+ size); -+} -+ -+static inline void do_swap(void *base, size_t n, size_t size, -+ void (*swap_func)(void *, void *, size_t), -+ size_t l, size_t r) -+{ -+ swap_func(base + inorder_to_eytzinger0(l, n) * size, -+ base + inorder_to_eytzinger0(r, n) * size, -+ size); -+} -+ -+void eytzinger0_sort(void *base, size_t n, size_t size, -+ int (*cmp_func)(const void *, const void *, size_t), -+ void (*swap_func)(void *, void *, size_t)) -+{ -+ int i, c, r; -+ -+ if (!swap_func) { -+ if (size == 4 && alignment_ok(base, 4)) -+ swap_func = u32_swap; -+ else if (size == 8 && alignment_ok(base, 8)) -+ swap_func = u64_swap; -+ else -+ swap_func = generic_swap; -+ } -+ -+ /* heapify */ -+ for (i = n / 2 - 1; i >= 0; --i) { -+ for (r = i; r * 2 + 1 < n; r = c) { -+ c = r * 2 + 1; -+ -+ if (c + 1 < n && -+ do_cmp(base, n, size, cmp_func, c, c + 1) < 0) -+ c++; -+ -+ if (do_cmp(base, n, size, cmp_func, r, c) >= 0) -+ break; -+ -+ do_swap(base, n, size, swap_func, r, c); -+ } -+ } -+ -+ /* sort */ -+ for (i = n - 1; i > 0; --i) { -+ do_swap(base, n, size, swap_func, 0, i); -+ -+ for (r = 0; r * 2 + 1 < i; r = c) { -+ c = r * 2 + 1; -+ -+ if (c + 1 < i && -+ do_cmp(base, n, size, cmp_func, c, c + 1) < 0) -+ c++; -+ -+ if (do_cmp(base, n, size, cmp_func, r, c) >= 0) -+ break; -+ -+ do_swap(base, n, size, swap_func, r, c); -+ } -+ } -+} -+ -+void sort_cmp_size(void *base, size_t num, size_t size, -+ int (*cmp_func)(const void *, const void *, size_t), -+ void (*swap_func)(void *, void *, size_t size)) -+{ -+ /* pre-scale counters for performance */ -+ int i = (num/2 - 1) * size, n = num * size, c, r; -+ -+ if (!swap_func) { -+ if (size == 4 && alignment_ok(base, 4)) -+ swap_func = u32_swap; -+ else if (size == 8 && alignment_ok(base, 8)) -+ swap_func = u64_swap; -+ else -+ swap_func = generic_swap; -+ } -+ -+ /* heapify */ -+ for ( ; i >= 0; i -= size) { -+ for (r = i; r * 2 + size < n; r = c) { -+ c = r * 2 + size; -+ if (c < n - size && -+ cmp_func(base + c, base + c + size, size) < 0) -+ c += size; -+ if (cmp_func(base + r, base + c, size) >= 0) -+ break; -+ swap_func(base + r, base + c, size); -+ } -+ } -+ -+ /* sort */ -+ for (i = n - size; i > 0; i -= size) { -+ swap_func(base, base + i, size); -+ for (r = 0; r * 2 + size < i; r = c) { -+ c = r * 2 + size; -+ if (c < i - size && -+ cmp_func(base + c, base + c + size, size) < 0) -+ c += size; -+ if (cmp_func(base + r, base + c, size) >= 0) -+ break; -+ swap_func(base + r, base + c, size); -+ } -+ } -+} -+ -+static void mempool_free_vp(void *element, void *pool_data) -+{ -+ size_t size = (size_t) pool_data; -+ -+ vpfree(element, size); -+} -+ -+static void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data) -+{ -+ size_t size = (size_t) pool_data; -+ -+ return vpmalloc(size, gfp_mask); -+} -+ -+int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size) -+{ -+ return size < PAGE_SIZE -+ ? mempool_init_kmalloc_pool(pool, min_nr, size) -+ : mempool_init(pool, min_nr, mempool_alloc_vp, -+ mempool_free_vp, (void *) size); -+} -+ -+#if 0 -+void eytzinger1_test(void) -+{ -+ unsigned inorder, eytz, size; -+ -+ pr_info("1 based eytzinger test:"); -+ -+ for (size = 2; -+ size < 65536; -+ size++) { -+ unsigned extra = eytzinger1_extra(size); -+ -+ if (!(size % 4096)) -+ pr_info("tree size %u", size); -+ -+ BUG_ON(eytzinger1_prev(0, size) != eytzinger1_last(size)); -+ BUG_ON(eytzinger1_next(0, size) != eytzinger1_first(size)); -+ -+ BUG_ON(eytzinger1_prev(eytzinger1_first(size), size) != 0); -+ BUG_ON(eytzinger1_next(eytzinger1_last(size), size) != 0); -+ -+ inorder = 1; -+ eytzinger1_for_each(eytz, size) { -+ BUG_ON(__inorder_to_eytzinger1(inorder, size, extra) != eytz); -+ BUG_ON(__eytzinger1_to_inorder(eytz, size, extra) != inorder); -+ BUG_ON(eytz != eytzinger1_last(size) && -+ eytzinger1_prev(eytzinger1_next(eytz, size), size) != eytz); -+ -+ inorder++; -+ } -+ } -+} -+ -+void eytzinger0_test(void) -+{ -+ -+ unsigned inorder, eytz, size; -+ -+ pr_info("0 based eytzinger test:"); -+ -+ for (size = 1; -+ size < 65536; -+ size++) { -+ unsigned extra = eytzinger0_extra(size); -+ -+ if (!(size % 4096)) -+ pr_info("tree size %u", size); -+ -+ BUG_ON(eytzinger0_prev(-1, size) != eytzinger0_last(size)); -+ BUG_ON(eytzinger0_next(-1, size) != eytzinger0_first(size)); -+ -+ BUG_ON(eytzinger0_prev(eytzinger0_first(size), size) != -1); -+ BUG_ON(eytzinger0_next(eytzinger0_last(size), size) != -1); -+ -+ inorder = 0; -+ eytzinger0_for_each(eytz, size) { -+ BUG_ON(__inorder_to_eytzinger0(inorder, size, extra) != eytz); -+ BUG_ON(__eytzinger0_to_inorder(eytz, size, extra) != inorder); -+ BUG_ON(eytz != eytzinger0_last(size) && -+ eytzinger0_prev(eytzinger0_next(eytz, size), size) != eytz); -+ -+ inorder++; -+ } -+ } -+} -+ -+static inline int cmp_u16(const void *_l, const void *_r, size_t size) -+{ -+ const u16 *l = _l, *r = _r; -+ -+ return (*l > *r) - (*r - *l); -+} -+ -+static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search) -+{ -+ int i, c1 = -1, c2 = -1; -+ ssize_t r; -+ -+ r = eytzinger0_find_le(test_array, nr, -+ sizeof(test_array[0]), -+ cmp_u16, &search); -+ if (r >= 0) -+ c1 = test_array[r]; -+ -+ for (i = 0; i < nr; i++) -+ if (test_array[i] <= search && test_array[i] > c2) -+ c2 = test_array[i]; -+ -+ if (c1 != c2) { -+ eytzinger0_for_each(i, nr) -+ pr_info("[%3u] = %12u", i, test_array[i]); -+ pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i", -+ i, r, c1, c2); -+ } -+} -+ -+void eytzinger0_find_test(void) -+{ -+ unsigned i, nr, allocated = 1 << 12; -+ u16 *test_array = kmalloc_array(allocated, sizeof(test_array[0]), GFP_KERNEL); -+ -+ for (nr = 1; nr < allocated; nr++) { -+ pr_info("testing %u elems", nr); -+ -+ get_random_bytes(test_array, nr * sizeof(test_array[0])); -+ eytzinger0_sort(test_array, nr, sizeof(test_array[0]), cmp_u16, NULL); -+ -+ /* verify array is sorted correctly: */ -+ eytzinger0_for_each(i, nr) -+ BUG_ON(i != eytzinger0_last(nr) && -+ test_array[i] > test_array[eytzinger0_next(i, nr)]); -+ -+ for (i = 0; i < U16_MAX; i += 1 << 12) -+ eytzinger0_find_test_val(test_array, nr, i); -+ -+ for (i = 0; i < nr; i++) { -+ eytzinger0_find_test_val(test_array, nr, test_array[i] - 1); -+ eytzinger0_find_test_val(test_array, nr, test_array[i]); -+ eytzinger0_find_test_val(test_array, nr, test_array[i] + 1); -+ } -+ } -+ -+ kfree(test_array); -+} -+#endif -+ -+/* -+ * Accumulate percpu counters onto one cpu's copy - only valid when access -+ * against any percpu counter is guarded against -+ */ -+u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) -+{ -+ u64 *ret; -+ int cpu; -+ -+ /* access to pcpu vars has to be blocked by other locking */ -+ preempt_disable(); -+ ret = this_cpu_ptr(p); -+ preempt_enable(); -+ -+ for_each_possible_cpu(cpu) { -+ u64 *i = per_cpu_ptr(p, cpu); -+ -+ if (i != ret) { -+ acc_u64s(ret, i, nr); -+ memset(i, 0, nr * sizeof(u64)); -+ } -+ } -+ -+ return ret; -+} -diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h -new file mode 100644 -index 000000000000..2984b57b2958 ---- /dev/null -+++ b/fs/bcachefs/util.h -@@ -0,0 +1,833 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_UTIL_H -+#define _BCACHEFS_UTIL_H -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "mean_and_variance.h" -+ -+#include "darray.h" -+ -+struct closure; -+ -+#ifdef CONFIG_BCACHEFS_DEBUG -+#define EBUG_ON(cond) BUG_ON(cond) -+#else -+#define EBUG_ON(cond) -+#endif -+ -+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -+#define CPU_BIG_ENDIAN 0 -+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -+#define CPU_BIG_ENDIAN 1 -+#endif -+ -+/* type hackery */ -+ -+#define type_is_exact(_val, _type) \ -+ __builtin_types_compatible_p(typeof(_val), _type) -+ -+#define type_is(_val, _type) \ -+ (__builtin_types_compatible_p(typeof(_val), _type) || \ -+ __builtin_types_compatible_p(typeof(_val), const _type)) -+ -+/* Userspace doesn't align allocations as nicely as the kernel allocators: */ -+static inline size_t buf_pages(void *p, size_t len) -+{ -+ return DIV_ROUND_UP(len + -+ ((unsigned long) p & (PAGE_SIZE - 1)), -+ PAGE_SIZE); -+} -+ -+static inline void vpfree(void *p, size_t size) -+{ -+ if (is_vmalloc_addr(p)) -+ vfree(p); -+ else -+ free_pages((unsigned long) p, get_order(size)); -+} -+ -+static inline void *vpmalloc(size_t size, gfp_t gfp_mask) -+{ -+ return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN, -+ get_order(size)) ?: -+ __vmalloc(size, gfp_mask); -+} -+ -+static inline void kvpfree(void *p, size_t size) -+{ -+ if (size < PAGE_SIZE) -+ kfree(p); -+ else -+ vpfree(p, size); -+} -+ -+static inline void *kvpmalloc(size_t size, gfp_t gfp_mask) -+{ -+ return size < PAGE_SIZE -+ ? kmalloc(size, gfp_mask) -+ : vpmalloc(size, gfp_mask); -+} -+ -+int mempool_init_kvpmalloc_pool(mempool_t *, int, size_t); -+ -+#define HEAP(type) \ -+struct { \ -+ size_t size, used; \ -+ type *data; \ -+} -+ -+#define DECLARE_HEAP(type, name) HEAP(type) name -+ -+#define init_heap(heap, _size, gfp) \ -+({ \ -+ (heap)->used = 0; \ -+ (heap)->size = (_size); \ -+ (heap)->data = kvpmalloc((heap)->size * sizeof((heap)->data[0]),\ -+ (gfp)); \ -+}) -+ -+#define free_heap(heap) \ -+do { \ -+ kvpfree((heap)->data, (heap)->size * sizeof((heap)->data[0])); \ -+ (heap)->data = NULL; \ -+} while (0) -+ -+#define heap_set_backpointer(h, i, _fn) \ -+do { \ -+ void (*fn)(typeof(h), size_t) = _fn; \ -+ if (fn) \ -+ fn(h, i); \ -+} while (0) -+ -+#define heap_swap(h, i, j, set_backpointer) \ -+do { \ -+ swap((h)->data[i], (h)->data[j]); \ -+ heap_set_backpointer(h, i, set_backpointer); \ -+ heap_set_backpointer(h, j, set_backpointer); \ -+} while (0) -+ -+#define heap_peek(h) \ -+({ \ -+ EBUG_ON(!(h)->used); \ -+ (h)->data[0]; \ -+}) -+ -+#define heap_full(h) ((h)->used == (h)->size) -+ -+#define heap_sift_down(h, i, cmp, set_backpointer) \ -+do { \ -+ size_t _c, _j = i; \ -+ \ -+ for (; _j * 2 + 1 < (h)->used; _j = _c) { \ -+ _c = _j * 2 + 1; \ -+ if (_c + 1 < (h)->used && \ -+ cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0) \ -+ _c++; \ -+ \ -+ if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0) \ -+ break; \ -+ heap_swap(h, _c, _j, set_backpointer); \ -+ } \ -+} while (0) -+ -+#define heap_sift_up(h, i, cmp, set_backpointer) \ -+do { \ -+ while (i) { \ -+ size_t p = (i - 1) / 2; \ -+ if (cmp(h, (h)->data[i], (h)->data[p]) >= 0) \ -+ break; \ -+ heap_swap(h, i, p, set_backpointer); \ -+ i = p; \ -+ } \ -+} while (0) -+ -+#define __heap_add(h, d, cmp, set_backpointer) \ -+({ \ -+ size_t _i = (h)->used++; \ -+ (h)->data[_i] = d; \ -+ heap_set_backpointer(h, _i, set_backpointer); \ -+ \ -+ heap_sift_up(h, _i, cmp, set_backpointer); \ -+ _i; \ -+}) -+ -+#define heap_add(h, d, cmp, set_backpointer) \ -+({ \ -+ bool _r = !heap_full(h); \ -+ if (_r) \ -+ __heap_add(h, d, cmp, set_backpointer); \ -+ _r; \ -+}) -+ -+#define heap_add_or_replace(h, new, cmp, set_backpointer) \ -+do { \ -+ if (!heap_add(h, new, cmp, set_backpointer) && \ -+ cmp(h, new, heap_peek(h)) >= 0) { \ -+ (h)->data[0] = new; \ -+ heap_set_backpointer(h, 0, set_backpointer); \ -+ heap_sift_down(h, 0, cmp, set_backpointer); \ -+ } \ -+} while (0) -+ -+#define heap_del(h, i, cmp, set_backpointer) \ -+do { \ -+ size_t _i = (i); \ -+ \ -+ BUG_ON(_i >= (h)->used); \ -+ (h)->used--; \ -+ if ((_i) < (h)->used) { \ -+ heap_swap(h, _i, (h)->used, set_backpointer); \ -+ heap_sift_up(h, _i, cmp, set_backpointer); \ -+ heap_sift_down(h, _i, cmp, set_backpointer); \ -+ } \ -+} while (0) -+ -+#define heap_pop(h, d, cmp, set_backpointer) \ -+({ \ -+ bool _r = (h)->used; \ -+ if (_r) { \ -+ (d) = (h)->data[0]; \ -+ heap_del(h, 0, cmp, set_backpointer); \ -+ } \ -+ _r; \ -+}) -+ -+#define heap_resort(heap, cmp, set_backpointer) \ -+do { \ -+ ssize_t _i; \ -+ for (_i = (ssize_t) (heap)->used / 2 - 1; _i >= 0; --_i) \ -+ heap_sift_down(heap, _i, cmp, set_backpointer); \ -+} while (0) -+ -+#define ANYSINT_MAX(t) \ -+ ((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1) -+ -+#include "printbuf.h" -+ -+#define prt_vprintf(_out, ...) bch2_prt_vprintf(_out, __VA_ARGS__) -+#define prt_printf(_out, ...) bch2_prt_printf(_out, __VA_ARGS__) -+#define printbuf_str(_buf) bch2_printbuf_str(_buf) -+#define printbuf_exit(_buf) bch2_printbuf_exit(_buf) -+ -+#define printbuf_tabstops_reset(_buf) bch2_printbuf_tabstops_reset(_buf) -+#define printbuf_tabstop_pop(_buf) bch2_printbuf_tabstop_pop(_buf) -+#define printbuf_tabstop_push(_buf, _n) bch2_printbuf_tabstop_push(_buf, _n) -+ -+#define printbuf_indent_add(_out, _n) bch2_printbuf_indent_add(_out, _n) -+#define printbuf_indent_sub(_out, _n) bch2_printbuf_indent_sub(_out, _n) -+ -+#define prt_newline(_out) bch2_prt_newline(_out) -+#define prt_tab(_out) bch2_prt_tab(_out) -+#define prt_tab_rjust(_out) bch2_prt_tab_rjust(_out) -+ -+#define prt_bytes_indented(...) bch2_prt_bytes_indented(__VA_ARGS__) -+#define prt_u64(_out, _v) prt_printf(_out, "%llu", (u64) (_v)) -+#define prt_human_readable_u64(...) bch2_prt_human_readable_u64(__VA_ARGS__) -+#define prt_human_readable_s64(...) bch2_prt_human_readable_s64(__VA_ARGS__) -+#define prt_units_u64(...) bch2_prt_units_u64(__VA_ARGS__) -+#define prt_units_s64(...) bch2_prt_units_s64(__VA_ARGS__) -+#define prt_string_option(...) bch2_prt_string_option(__VA_ARGS__) -+#define prt_bitflags(...) bch2_prt_bitflags(__VA_ARGS__) -+ -+void bch2_pr_time_units(struct printbuf *, u64); -+void bch2_prt_datetime(struct printbuf *, time64_t); -+ -+#ifdef __KERNEL__ -+static inline void uuid_unparse_lower(u8 *uuid, char *out) -+{ -+ sprintf(out, "%pUb", uuid); -+} -+#else -+#include -+#endif -+ -+static inline void pr_uuid(struct printbuf *out, u8 *uuid) -+{ -+ char uuid_str[40]; -+ -+ uuid_unparse_lower(uuid, uuid_str); -+ prt_printf(out, "%s", uuid_str); -+} -+ -+int bch2_strtoint_h(const char *, int *); -+int bch2_strtouint_h(const char *, unsigned int *); -+int bch2_strtoll_h(const char *, long long *); -+int bch2_strtoull_h(const char *, unsigned long long *); -+int bch2_strtou64_h(const char *, u64 *); -+ -+static inline int bch2_strtol_h(const char *cp, long *res) -+{ -+#if BITS_PER_LONG == 32 -+ return bch2_strtoint_h(cp, (int *) res); -+#else -+ return bch2_strtoll_h(cp, (long long *) res); -+#endif -+} -+ -+static inline int bch2_strtoul_h(const char *cp, long *res) -+{ -+#if BITS_PER_LONG == 32 -+ return bch2_strtouint_h(cp, (unsigned int *) res); -+#else -+ return bch2_strtoull_h(cp, (unsigned long long *) res); -+#endif -+} -+ -+#define strtoi_h(cp, res) \ -+ ( type_is(*res, int) ? bch2_strtoint_h(cp, (void *) res)\ -+ : type_is(*res, long) ? bch2_strtol_h(cp, (void *) res)\ -+ : type_is(*res, long long) ? bch2_strtoll_h(cp, (void *) res)\ -+ : type_is(*res, unsigned) ? bch2_strtouint_h(cp, (void *) res)\ -+ : type_is(*res, unsigned long) ? bch2_strtoul_h(cp, (void *) res)\ -+ : type_is(*res, unsigned long long) ? bch2_strtoull_h(cp, (void *) res)\ -+ : -EINVAL) -+ -+#define strtoul_safe(cp, var) \ -+({ \ -+ unsigned long _v; \ -+ int _r = kstrtoul(cp, 10, &_v); \ -+ if (!_r) \ -+ var = _v; \ -+ _r; \ -+}) -+ -+#define strtoul_safe_clamp(cp, var, min, max) \ -+({ \ -+ unsigned long _v; \ -+ int _r = kstrtoul(cp, 10, &_v); \ -+ if (!_r) \ -+ var = clamp_t(typeof(var), _v, min, max); \ -+ _r; \ -+}) -+ -+#define strtoul_safe_restrict(cp, var, min, max) \ -+({ \ -+ unsigned long _v; \ -+ int _r = kstrtoul(cp, 10, &_v); \ -+ if (!_r && _v >= min && _v <= max) \ -+ var = _v; \ -+ else \ -+ _r = -EINVAL; \ -+ _r; \ -+}) -+ -+#define snprint(out, var) \ -+ prt_printf(out, \ -+ type_is(var, int) ? "%i\n" \ -+ : type_is(var, unsigned) ? "%u\n" \ -+ : type_is(var, long) ? "%li\n" \ -+ : type_is(var, unsigned long) ? "%lu\n" \ -+ : type_is(var, s64) ? "%lli\n" \ -+ : type_is(var, u64) ? "%llu\n" \ -+ : type_is(var, char *) ? "%s\n" \ -+ : "%i\n", var) -+ -+bool bch2_is_zero(const void *, size_t); -+ -+u64 bch2_read_flag_list(char *, const char * const[]); -+ -+void bch2_prt_u64_binary(struct printbuf *, u64, unsigned); -+ -+void bch2_print_string_as_lines(const char *prefix, const char *lines); -+ -+typedef DARRAY(unsigned long) bch_stacktrace; -+int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *); -+void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *); -+int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *); -+ -+#define NR_QUANTILES 15 -+#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES) -+#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES) -+#define QUANTILE_LAST eytzinger0_last(NR_QUANTILES) -+ -+struct bch2_quantiles { -+ struct bch2_quantile_entry { -+ u64 m; -+ u64 step; -+ } entries[NR_QUANTILES]; -+}; -+ -+struct bch2_time_stat_buffer { -+ unsigned nr; -+ struct bch2_time_stat_buffer_entry { -+ u64 start; -+ u64 end; -+ } entries[32]; -+}; -+ -+struct bch2_time_stats { -+ spinlock_t lock; -+ /* all fields are in nanoseconds */ -+ u64 max_duration; -+ u64 min_duration; -+ u64 max_freq; -+ u64 min_freq; -+ u64 last_event; -+ struct bch2_quantiles quantiles; -+ -+ struct mean_and_variance duration_stats; -+ struct mean_and_variance_weighted duration_stats_weighted; -+ struct mean_and_variance freq_stats; -+ struct mean_and_variance_weighted freq_stats_weighted; -+ struct bch2_time_stat_buffer __percpu *buffer; -+}; -+ -+#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT -+void __bch2_time_stats_update(struct bch2_time_stats *stats, u64, u64); -+#else -+static inline void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) {} -+#endif -+ -+static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start) -+{ -+ __bch2_time_stats_update(stats, start, local_clock()); -+} -+ -+void bch2_time_stats_to_text(struct printbuf *, struct bch2_time_stats *); -+ -+void bch2_time_stats_exit(struct bch2_time_stats *); -+void bch2_time_stats_init(struct bch2_time_stats *); -+ -+#define ewma_add(ewma, val, weight) \ -+({ \ -+ typeof(ewma) _ewma = (ewma); \ -+ typeof(weight) _weight = (weight); \ -+ \ -+ (((_ewma << _weight) - _ewma) + (val)) >> _weight; \ -+}) -+ -+struct bch_ratelimit { -+ /* Next time we want to do some work, in nanoseconds */ -+ u64 next; -+ -+ /* -+ * Rate at which we want to do work, in units per nanosecond -+ * The units here correspond to the units passed to -+ * bch2_ratelimit_increment() -+ */ -+ unsigned rate; -+}; -+ -+static inline void bch2_ratelimit_reset(struct bch_ratelimit *d) -+{ -+ d->next = local_clock(); -+} -+ -+u64 bch2_ratelimit_delay(struct bch_ratelimit *); -+void bch2_ratelimit_increment(struct bch_ratelimit *, u64); -+ -+struct bch_pd_controller { -+ struct bch_ratelimit rate; -+ unsigned long last_update; -+ -+ s64 last_actual; -+ s64 smoothed_derivative; -+ -+ unsigned p_term_inverse; -+ unsigned d_smooth; -+ unsigned d_term; -+ -+ /* for exporting to sysfs (no effect on behavior) */ -+ s64 last_derivative; -+ s64 last_proportional; -+ s64 last_change; -+ s64 last_target; -+ -+ /* -+ * If true, the rate will not increase if bch2_ratelimit_delay() -+ * is not being called often enough. -+ */ -+ bool backpressure; -+}; -+ -+void bch2_pd_controller_update(struct bch_pd_controller *, s64, s64, int); -+void bch2_pd_controller_init(struct bch_pd_controller *); -+void bch2_pd_controller_debug_to_text(struct printbuf *, struct bch_pd_controller *); -+ -+#define sysfs_pd_controller_attribute(name) \ -+ rw_attribute(name##_rate); \ -+ rw_attribute(name##_rate_bytes); \ -+ rw_attribute(name##_rate_d_term); \ -+ rw_attribute(name##_rate_p_term_inverse); \ -+ read_attribute(name##_rate_debug) -+ -+#define sysfs_pd_controller_files(name) \ -+ &sysfs_##name##_rate, \ -+ &sysfs_##name##_rate_bytes, \ -+ &sysfs_##name##_rate_d_term, \ -+ &sysfs_##name##_rate_p_term_inverse, \ -+ &sysfs_##name##_rate_debug -+ -+#define sysfs_pd_controller_show(name, var) \ -+do { \ -+ sysfs_hprint(name##_rate, (var)->rate.rate); \ -+ sysfs_print(name##_rate_bytes, (var)->rate.rate); \ -+ sysfs_print(name##_rate_d_term, (var)->d_term); \ -+ sysfs_print(name##_rate_p_term_inverse, (var)->p_term_inverse); \ -+ \ -+ if (attr == &sysfs_##name##_rate_debug) \ -+ bch2_pd_controller_debug_to_text(out, var); \ -+} while (0) -+ -+#define sysfs_pd_controller_store(name, var) \ -+do { \ -+ sysfs_strtoul_clamp(name##_rate, \ -+ (var)->rate.rate, 1, UINT_MAX); \ -+ sysfs_strtoul_clamp(name##_rate_bytes, \ -+ (var)->rate.rate, 1, UINT_MAX); \ -+ sysfs_strtoul(name##_rate_d_term, (var)->d_term); \ -+ sysfs_strtoul_clamp(name##_rate_p_term_inverse, \ -+ (var)->p_term_inverse, 1, INT_MAX); \ -+} while (0) -+ -+#define container_of_or_null(ptr, type, member) \ -+({ \ -+ typeof(ptr) _ptr = ptr; \ -+ _ptr ? container_of(_ptr, type, member) : NULL; \ -+}) -+ -+/* Does linear interpolation between powers of two */ -+static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) -+{ -+ unsigned fract = x & ~(~0 << fract_bits); -+ -+ x >>= fract_bits; -+ x = 1 << x; -+ x += (x * fract) >> fract_bits; -+ -+ return x; -+} -+ -+void bch2_bio_map(struct bio *bio, void *base, size_t); -+int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t); -+ -+static inline sector_t bdev_sectors(struct block_device *bdev) -+{ -+ return bdev->bd_inode->i_size >> 9; -+} -+ -+#define closure_bio_submit(bio, cl) \ -+do { \ -+ closure_get(cl); \ -+ submit_bio(bio); \ -+} while (0) -+ -+#define kthread_wait(cond) \ -+({ \ -+ int _ret = 0; \ -+ \ -+ while (1) { \ -+ set_current_state(TASK_INTERRUPTIBLE); \ -+ if (kthread_should_stop()) { \ -+ _ret = -1; \ -+ break; \ -+ } \ -+ \ -+ if (cond) \ -+ break; \ -+ \ -+ schedule(); \ -+ } \ -+ set_current_state(TASK_RUNNING); \ -+ _ret; \ -+}) -+ -+#define kthread_wait_freezable(cond) \ -+({ \ -+ int _ret = 0; \ -+ while (1) { \ -+ set_current_state(TASK_INTERRUPTIBLE); \ -+ if (kthread_should_stop()) { \ -+ _ret = -1; \ -+ break; \ -+ } \ -+ \ -+ if (cond) \ -+ break; \ -+ \ -+ schedule(); \ -+ try_to_freeze(); \ -+ } \ -+ set_current_state(TASK_RUNNING); \ -+ _ret; \ -+}) -+ -+size_t bch2_rand_range(size_t); -+ -+void memcpy_to_bio(struct bio *, struct bvec_iter, const void *); -+void memcpy_from_bio(void *, struct bio *, struct bvec_iter); -+ -+static inline void memcpy_u64s_small(void *dst, const void *src, -+ unsigned u64s) -+{ -+ u64 *d = dst; -+ const u64 *s = src; -+ -+ while (u64s--) -+ *d++ = *s++; -+} -+ -+static inline void __memcpy_u64s(void *dst, const void *src, -+ unsigned u64s) -+{ -+#ifdef CONFIG_X86_64 -+ long d0, d1, d2; -+ -+ asm volatile("rep ; movsq" -+ : "=&c" (d0), "=&D" (d1), "=&S" (d2) -+ : "0" (u64s), "1" (dst), "2" (src) -+ : "memory"); -+#else -+ u64 *d = dst; -+ const u64 *s = src; -+ -+ while (u64s--) -+ *d++ = *s++; -+#endif -+} -+ -+static inline void memcpy_u64s(void *dst, const void *src, -+ unsigned u64s) -+{ -+ EBUG_ON(!(dst >= src + u64s * sizeof(u64) || -+ dst + u64s * sizeof(u64) <= src)); -+ -+ __memcpy_u64s(dst, src, u64s); -+} -+ -+static inline void __memmove_u64s_down(void *dst, const void *src, -+ unsigned u64s) -+{ -+ __memcpy_u64s(dst, src, u64s); -+} -+ -+static inline void memmove_u64s_down(void *dst, const void *src, -+ unsigned u64s) -+{ -+ EBUG_ON(dst > src); -+ -+ __memmove_u64s_down(dst, src, u64s); -+} -+ -+static inline void __memmove_u64s_down_small(void *dst, const void *src, -+ unsigned u64s) -+{ -+ memcpy_u64s_small(dst, src, u64s); -+} -+ -+static inline void memmove_u64s_down_small(void *dst, const void *src, -+ unsigned u64s) -+{ -+ EBUG_ON(dst > src); -+ -+ __memmove_u64s_down_small(dst, src, u64s); -+} -+ -+static inline void __memmove_u64s_up_small(void *_dst, const void *_src, -+ unsigned u64s) -+{ -+ u64 *dst = (u64 *) _dst + u64s; -+ u64 *src = (u64 *) _src + u64s; -+ -+ while (u64s--) -+ *--dst = *--src; -+} -+ -+static inline void memmove_u64s_up_small(void *dst, const void *src, -+ unsigned u64s) -+{ -+ EBUG_ON(dst < src); -+ -+ __memmove_u64s_up_small(dst, src, u64s); -+} -+ -+static inline void __memmove_u64s_up(void *_dst, const void *_src, -+ unsigned u64s) -+{ -+ u64 *dst = (u64 *) _dst + u64s - 1; -+ u64 *src = (u64 *) _src + u64s - 1; -+ -+#ifdef CONFIG_X86_64 -+ long d0, d1, d2; -+ -+ asm volatile("std ;\n" -+ "rep ; movsq\n" -+ "cld ;\n" -+ : "=&c" (d0), "=&D" (d1), "=&S" (d2) -+ : "0" (u64s), "1" (dst), "2" (src) -+ : "memory"); -+#else -+ while (u64s--) -+ *dst-- = *src--; -+#endif -+} -+ -+static inline void memmove_u64s_up(void *dst, const void *src, -+ unsigned u64s) -+{ -+ EBUG_ON(dst < src); -+ -+ __memmove_u64s_up(dst, src, u64s); -+} -+ -+static inline void memmove_u64s(void *dst, const void *src, -+ unsigned u64s) -+{ -+ if (dst < src) -+ __memmove_u64s_down(dst, src, u64s); -+ else -+ __memmove_u64s_up(dst, src, u64s); -+} -+ -+/* Set the last few bytes up to a u64 boundary given an offset into a buffer. */ -+static inline void memset_u64s_tail(void *s, int c, unsigned bytes) -+{ -+ unsigned rem = round_up(bytes, sizeof(u64)) - bytes; -+ -+ memset(s + bytes, c, rem); -+} -+ -+void sort_cmp_size(void *base, size_t num, size_t size, -+ int (*cmp_func)(const void *, const void *, size_t), -+ void (*swap_func)(void *, void *, size_t)); -+ -+/* just the memmove, doesn't update @_nr */ -+#define __array_insert_item(_array, _nr, _pos) \ -+ memmove(&(_array)[(_pos) + 1], \ -+ &(_array)[(_pos)], \ -+ sizeof((_array)[0]) * ((_nr) - (_pos))) -+ -+#define array_insert_item(_array, _nr, _pos, _new_item) \ -+do { \ -+ __array_insert_item(_array, _nr, _pos); \ -+ (_nr)++; \ -+ (_array)[(_pos)] = (_new_item); \ -+} while (0) -+ -+#define array_remove_items(_array, _nr, _pos, _nr_to_remove) \ -+do { \ -+ (_nr) -= (_nr_to_remove); \ -+ memmove(&(_array)[(_pos)], \ -+ &(_array)[(_pos) + (_nr_to_remove)], \ -+ sizeof((_array)[0]) * ((_nr) - (_pos))); \ -+} while (0) -+ -+#define array_remove_item(_array, _nr, _pos) \ -+ array_remove_items(_array, _nr, _pos, 1) -+ -+static inline void __move_gap(void *array, size_t element_size, -+ size_t nr, size_t size, -+ size_t old_gap, size_t new_gap) -+{ -+ size_t gap_end = old_gap + size - nr; -+ -+ if (new_gap < old_gap) { -+ size_t move = old_gap - new_gap; -+ -+ memmove(array + element_size * (gap_end - move), -+ array + element_size * (old_gap - move), -+ element_size * move); -+ } else if (new_gap > old_gap) { -+ size_t move = new_gap - old_gap; -+ -+ memmove(array + element_size * old_gap, -+ array + element_size * gap_end, -+ element_size * move); -+ } -+} -+ -+/* Move the gap in a gap buffer: */ -+#define move_gap(_array, _nr, _size, _old_gap, _new_gap) \ -+ __move_gap(_array, sizeof(_array[0]), _nr, _size, _old_gap, _new_gap) -+ -+#define bubble_sort(_base, _nr, _cmp) \ -+do { \ -+ ssize_t _i, _last; \ -+ bool _swapped = true; \ -+ \ -+ for (_last= (ssize_t) (_nr) - 1; _last > 0 && _swapped; --_last) {\ -+ _swapped = false; \ -+ for (_i = 0; _i < _last; _i++) \ -+ if (_cmp((_base)[_i], (_base)[_i + 1]) > 0) { \ -+ swap((_base)[_i], (_base)[_i + 1]); \ -+ _swapped = true; \ -+ } \ -+ } \ -+} while (0) -+ -+static inline u64 percpu_u64_get(u64 __percpu *src) -+{ -+ u64 ret = 0; -+ int cpu; -+ -+ for_each_possible_cpu(cpu) -+ ret += *per_cpu_ptr(src, cpu); -+ return ret; -+} -+ -+static inline void percpu_u64_set(u64 __percpu *dst, u64 src) -+{ -+ int cpu; -+ -+ for_each_possible_cpu(cpu) -+ *per_cpu_ptr(dst, cpu) = 0; -+ this_cpu_write(*dst, src); -+} -+ -+static inline void acc_u64s(u64 *acc, const u64 *src, unsigned nr) -+{ -+ unsigned i; -+ -+ for (i = 0; i < nr; i++) -+ acc[i] += src[i]; -+} -+ -+static inline void acc_u64s_percpu(u64 *acc, const u64 __percpu *src, -+ unsigned nr) -+{ -+ int cpu; -+ -+ for_each_possible_cpu(cpu) -+ acc_u64s(acc, per_cpu_ptr(src, cpu), nr); -+} -+ -+static inline void percpu_memset(void __percpu *p, int c, size_t bytes) -+{ -+ int cpu; -+ -+ for_each_possible_cpu(cpu) -+ memset(per_cpu_ptr(p, cpu), c, bytes); -+} -+ -+u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned); -+ -+#define cmp_int(l, r) ((l > r) - (l < r)) -+ -+static inline int u8_cmp(u8 l, u8 r) -+{ -+ return cmp_int(l, r); -+} -+ -+static inline int cmp_le32(__le32 l, __le32 r) -+{ -+ return cmp_int(le32_to_cpu(l), le32_to_cpu(r)); -+} -+ -+#include -+ -+#endif /* _BCACHEFS_UTIL_H */ -diff --git a/fs/bcachefs/varint.c b/fs/bcachefs/varint.c -new file mode 100644 -index 000000000000..cb4f33ed9ab3 ---- /dev/null -+++ b/fs/bcachefs/varint.c -@@ -0,0 +1,129 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_VALGRIND -+#include -+#endif -+ -+#include "varint.h" -+ -+/** -+ * bch2_varint_encode - encode a variable length integer -+ * @out: destination to encode to -+ * @v: unsigned integer to encode -+ * Returns: size in bytes of the encoded integer - at most 9 bytes -+ */ -+int bch2_varint_encode(u8 *out, u64 v) -+{ -+ unsigned bits = fls64(v|1); -+ unsigned bytes = DIV_ROUND_UP(bits, 7); -+ __le64 v_le; -+ -+ if (likely(bytes < 9)) { -+ v <<= bytes; -+ v |= ~(~0 << (bytes - 1)); -+ v_le = cpu_to_le64(v); -+ memcpy(out, &v_le, bytes); -+ } else { -+ *out++ = 255; -+ bytes = 9; -+ put_unaligned_le64(v, out); -+ } -+ -+ return bytes; -+} -+ -+/** -+ * bch2_varint_decode - encode a variable length integer -+ * @in: varint to decode -+ * @end: end of buffer to decode from -+ * @out: on success, decoded integer -+ * Returns: size in bytes of the decoded integer - or -1 on failure (would -+ * have read past the end of the buffer) -+ */ -+int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out) -+{ -+ unsigned bytes = likely(in < end) -+ ? ffz(*in & 255) + 1 -+ : 1; -+ u64 v; -+ -+ if (unlikely(in + bytes > end)) -+ return -1; -+ -+ if (likely(bytes < 9)) { -+ __le64 v_le = 0; -+ -+ memcpy(&v_le, in, bytes); -+ v = le64_to_cpu(v_le); -+ v >>= bytes; -+ } else { -+ v = get_unaligned_le64(++in); -+ } -+ -+ *out = v; -+ return bytes; -+} -+ -+/** -+ * bch2_varint_encode_fast - fast version of bch2_varint_encode -+ * @out: destination to encode to -+ * @v: unsigned integer to encode -+ * Returns: size in bytes of the encoded integer - at most 9 bytes -+ * -+ * This version assumes it's always safe to write 8 bytes to @out, even if the -+ * encoded integer would be smaller. -+ */ -+int bch2_varint_encode_fast(u8 *out, u64 v) -+{ -+ unsigned bits = fls64(v|1); -+ unsigned bytes = DIV_ROUND_UP(bits, 7); -+ -+ if (likely(bytes < 9)) { -+ v <<= bytes; -+ v |= ~(~0 << (bytes - 1)); -+ } else { -+ *out++ = 255; -+ bytes = 9; -+ } -+ -+ put_unaligned_le64(v, out); -+ return bytes; -+} -+ -+/** -+ * bch2_varint_decode_fast - fast version of bch2_varint_decode -+ * @in: varint to decode -+ * @end: end of buffer to decode from -+ * @out: on success, decoded integer -+ * Returns: size in bytes of the decoded integer - or -1 on failure (would -+ * have read past the end of the buffer) -+ * -+ * This version assumes that it is safe to read at most 8 bytes past the end of -+ * @end (we still return an error if the varint extends past @end). -+ */ -+int bch2_varint_decode_fast(const u8 *in, const u8 *end, u64 *out) -+{ -+#ifdef CONFIG_VALGRIND -+ VALGRIND_MAKE_MEM_DEFINED(in, 8); -+#endif -+ u64 v = get_unaligned_le64(in); -+ unsigned bytes = ffz(*in) + 1; -+ -+ if (unlikely(in + bytes > end)) -+ return -1; -+ -+ if (likely(bytes < 9)) { -+ v >>= bytes; -+ v &= ~(~0ULL << (7 * bytes)); -+ } else { -+ v = get_unaligned_le64(++in); -+ } -+ -+ *out = v; -+ return bytes; -+} -diff --git a/fs/bcachefs/varint.h b/fs/bcachefs/varint.h -new file mode 100644 -index 000000000000..92a182fb3d7a ---- /dev/null -+++ b/fs/bcachefs/varint.h -@@ -0,0 +1,11 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_VARINT_H -+#define _BCACHEFS_VARINT_H -+ -+int bch2_varint_encode(u8 *, u64); -+int bch2_varint_decode(const u8 *, const u8 *, u64 *); -+ -+int bch2_varint_encode_fast(u8 *, u64); -+int bch2_varint_decode_fast(const u8 *, const u8 *, u64 *); -+ -+#endif /* _BCACHEFS_VARINT_H */ -diff --git a/fs/bcachefs/vstructs.h b/fs/bcachefs/vstructs.h -new file mode 100644 -index 000000000000..a6561b4b36a6 ---- /dev/null -+++ b/fs/bcachefs/vstructs.h -@@ -0,0 +1,63 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _VSTRUCTS_H -+#define _VSTRUCTS_H -+ -+#include "util.h" -+ -+/* -+ * NOTE: we can't differentiate between __le64 and u64 with type_is - this -+ * assumes u64 is little endian: -+ */ -+#define __vstruct_u64s(_s) \ -+({ \ -+ ( type_is((_s)->u64s, u64) ? le64_to_cpu((__force __le64) (_s)->u64s) \ -+ : type_is((_s)->u64s, u32) ? le32_to_cpu((__force __le32) (_s)->u64s) \ -+ : type_is((_s)->u64s, u16) ? le16_to_cpu((__force __le16) (_s)->u64s) \ -+ : ((__force u8) ((_s)->u64s))); \ -+}) -+ -+#define __vstruct_bytes(_type, _u64s) \ -+({ \ -+ BUILD_BUG_ON(offsetof(_type, _data) % sizeof(u64)); \ -+ \ -+ (size_t) (offsetof(_type, _data) + (_u64s) * sizeof(u64)); \ -+}) -+ -+#define vstruct_bytes(_s) \ -+ __vstruct_bytes(typeof(*(_s)), __vstruct_u64s(_s)) -+ -+#define __vstruct_blocks(_type, _sector_block_bits, _u64s) \ -+ (round_up(__vstruct_bytes(_type, _u64s), \ -+ 512 << (_sector_block_bits)) >> (9 + (_sector_block_bits))) -+ -+#define vstruct_blocks(_s, _sector_block_bits) \ -+ __vstruct_blocks(typeof(*(_s)), _sector_block_bits, __vstruct_u64s(_s)) -+ -+#define vstruct_blocks_plus(_s, _sector_block_bits, _u64s) \ -+ __vstruct_blocks(typeof(*(_s)), _sector_block_bits, \ -+ __vstruct_u64s(_s) + (_u64s)) -+ -+#define vstruct_sectors(_s, _sector_block_bits) \ -+ (round_up(vstruct_bytes(_s), 512 << (_sector_block_bits)) >> 9) -+ -+#define vstruct_next(_s) \ -+ ((typeof(_s)) ((u64 *) (_s)->_data + __vstruct_u64s(_s))) -+#define vstruct_last(_s) \ -+ ((typeof(&(_s)->start[0])) ((u64 *) (_s)->_data + __vstruct_u64s(_s))) -+#define vstruct_end(_s) \ -+ ((void *) ((u64 *) (_s)->_data + __vstruct_u64s(_s))) -+ -+#define vstruct_for_each(_s, _i) \ -+ for (_i = (_s)->start; \ -+ _i < vstruct_last(_s); \ -+ _i = vstruct_next(_i)) -+ -+#define vstruct_for_each_safe(_s, _i, _t) \ -+ for (_i = (_s)->start; \ -+ _i < vstruct_last(_s) && (_t = vstruct_next(_i), true); \ -+ _i = _t) -+ -+#define vstruct_idx(_s, _idx) \ -+ ((typeof(&(_s)->start[0])) ((_s)->_data + (_idx))) -+ -+#endif /* _VSTRUCTS_H */ -diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c -new file mode 100644 -index 000000000000..a39ff0c296ec ---- /dev/null -+++ b/fs/bcachefs/xattr.c -@@ -0,0 +1,643 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include "bcachefs.h" -+#include "acl.h" -+#include "bkey_methods.h" -+#include "btree_update.h" -+#include "extents.h" -+#include "fs.h" -+#include "rebalance.h" -+#include "str_hash.h" -+#include "xattr.h" -+ -+#include -+#include -+#include -+ -+static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned); -+ -+static u64 bch2_xattr_hash(const struct bch_hash_info *info, -+ const struct xattr_search_key *key) -+{ -+ struct bch_str_hash_ctx ctx; -+ -+ bch2_str_hash_init(&ctx, info); -+ bch2_str_hash_update(&ctx, info, &key->type, sizeof(key->type)); -+ bch2_str_hash_update(&ctx, info, key->name.name, key->name.len); -+ -+ return bch2_str_hash_end(&ctx, info); -+} -+ -+static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key) -+{ -+ return bch2_xattr_hash(info, key); -+} -+ -+static u64 xattr_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) -+{ -+ struct bkey_s_c_xattr x = bkey_s_c_to_xattr(k); -+ -+ return bch2_xattr_hash(info, -+ &X_SEARCH(x.v->x_type, x.v->x_name, x.v->x_name_len)); -+} -+ -+static bool xattr_cmp_key(struct bkey_s_c _l, const void *_r) -+{ -+ struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l); -+ const struct xattr_search_key *r = _r; -+ -+ return l.v->x_type != r->type || -+ l.v->x_name_len != r->name.len || -+ memcmp(l.v->x_name, r->name.name, r->name.len); -+} -+ -+static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) -+{ -+ struct bkey_s_c_xattr l = bkey_s_c_to_xattr(_l); -+ struct bkey_s_c_xattr r = bkey_s_c_to_xattr(_r); -+ -+ return l.v->x_type != r.v->x_type || -+ l.v->x_name_len != r.v->x_name_len || -+ memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len); -+} -+ -+const struct bch_hash_desc bch2_xattr_hash_desc = { -+ .btree_id = BTREE_ID_xattrs, -+ .key_type = KEY_TYPE_xattr, -+ .hash_key = xattr_hash_key, -+ .hash_bkey = xattr_hash_bkey, -+ .cmp_key = xattr_cmp_key, -+ .cmp_bkey = xattr_cmp_bkey, -+}; -+ -+int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, -+ enum bkey_invalid_flags flags, -+ struct printbuf *err) -+{ -+ struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); -+ unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len, -+ le16_to_cpu(xattr.v->x_val_len)); -+ int ret = 0; -+ -+ bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, c, err, -+ xattr_val_size_too_small, -+ "value too small (%zu < %u)", -+ bkey_val_u64s(k.k), val_u64s); -+ -+ /* XXX why +4 ? */ -+ val_u64s = xattr_val_u64s(xattr.v->x_name_len, -+ le16_to_cpu(xattr.v->x_val_len) + 4); -+ -+ bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, c, err, -+ xattr_val_size_too_big, -+ "value too big (%zu > %u)", -+ bkey_val_u64s(k.k), val_u64s); -+ -+ bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), c, err, -+ xattr_invalid_type, -+ "invalid type (%u)", xattr.v->x_type); -+ -+ bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), c, err, -+ xattr_name_invalid_chars, -+ "xattr name has invalid characters"); -+fsck_err: -+ return ret; -+} -+ -+void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c, -+ struct bkey_s_c k) -+{ -+ const struct xattr_handler *handler; -+ struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); -+ -+ handler = bch2_xattr_type_to_handler(xattr.v->x_type); -+ if (handler && handler->prefix) -+ prt_printf(out, "%s", handler->prefix); -+ else if (handler) -+ prt_printf(out, "(type %u)", xattr.v->x_type); -+ else -+ prt_printf(out, "(unknown type %u)", xattr.v->x_type); -+ -+ prt_printf(out, "%.*s:%.*s", -+ xattr.v->x_name_len, -+ xattr.v->x_name, -+ le16_to_cpu(xattr.v->x_val_len), -+ (char *) xattr_val(xattr.v)); -+ -+ if (xattr.v->x_type == KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS || -+ xattr.v->x_type == KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT) { -+ prt_char(out, ' '); -+ bch2_acl_to_text(out, xattr_val(xattr.v), -+ le16_to_cpu(xattr.v->x_val_len)); -+ } -+} -+ -+static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info *inode, -+ const char *name, void *buffer, size_t size, int type) -+{ -+ struct bch_hash_info hash = bch2_hash_info_init(trans->c, &inode->ei_inode); -+ struct xattr_search_key search = X_SEARCH(type, name, strlen(name)); -+ struct btree_iter iter; -+ struct bkey_s_c_xattr xattr; -+ struct bkey_s_c k; -+ int ret; -+ -+ ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash, -+ inode_inum(inode), &search, 0); -+ if (ret) -+ goto err1; -+ -+ k = bch2_btree_iter_peek_slot(&iter); -+ ret = bkey_err(k); -+ if (ret) -+ goto err2; -+ -+ xattr = bkey_s_c_to_xattr(k); -+ ret = le16_to_cpu(xattr.v->x_val_len); -+ if (buffer) { -+ if (ret > size) -+ ret = -ERANGE; -+ else -+ memcpy(buffer, xattr_val(xattr.v), ret); -+ } -+err2: -+ bch2_trans_iter_exit(trans, &iter); -+err1: -+ return ret < 0 && bch2_err_matches(ret, ENOENT) ? -ENODATA : ret; -+} -+ -+int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, -+ struct bch_inode_unpacked *inode_u, -+ const struct bch_hash_info *hash_info, -+ const char *name, const void *value, size_t size, -+ int type, int flags) -+{ -+ struct bch_fs *c = trans->c; -+ struct btree_iter inode_iter = { NULL }; -+ int ret; -+ -+ ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT); -+ if (ret) -+ return ret; -+ -+ inode_u->bi_ctime = bch2_current_time(c); -+ -+ ret = bch2_inode_write(trans, &inode_iter, inode_u); -+ bch2_trans_iter_exit(trans, &inode_iter); -+ -+ if (ret) -+ return ret; -+ -+ if (value) { -+ struct bkey_i_xattr *xattr; -+ unsigned namelen = strlen(name); -+ unsigned u64s = BKEY_U64s + -+ xattr_val_u64s(namelen, size); -+ -+ if (u64s > U8_MAX) -+ return -ERANGE; -+ -+ xattr = bch2_trans_kmalloc(trans, u64s * sizeof(u64)); -+ if (IS_ERR(xattr)) -+ return PTR_ERR(xattr); -+ -+ bkey_xattr_init(&xattr->k_i); -+ xattr->k.u64s = u64s; -+ xattr->v.x_type = type; -+ xattr->v.x_name_len = namelen; -+ xattr->v.x_val_len = cpu_to_le16(size); -+ memcpy(xattr->v.x_name, name, namelen); -+ memcpy(xattr_val(&xattr->v), value, size); -+ -+ ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info, -+ inum, &xattr->k_i, -+ (flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)| -+ (flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0)); -+ } else { -+ struct xattr_search_key search = -+ X_SEARCH(type, name, strlen(name)); -+ -+ ret = bch2_hash_delete(trans, bch2_xattr_hash_desc, -+ hash_info, inum, &search); -+ } -+ -+ if (bch2_err_matches(ret, ENOENT)) -+ ret = flags & XATTR_REPLACE ? -ENODATA : 0; -+ -+ return ret; -+} -+ -+struct xattr_buf { -+ char *buf; -+ size_t len; -+ size_t used; -+}; -+ -+static int __bch2_xattr_emit(const char *prefix, -+ const char *name, size_t name_len, -+ struct xattr_buf *buf) -+{ -+ const size_t prefix_len = strlen(prefix); -+ const size_t total_len = prefix_len + name_len + 1; -+ -+ if (buf->buf) { -+ if (buf->used + total_len > buf->len) -+ return -ERANGE; -+ -+ memcpy(buf->buf + buf->used, prefix, prefix_len); -+ memcpy(buf->buf + buf->used + prefix_len, -+ name, name_len); -+ buf->buf[buf->used + prefix_len + name_len] = '\0'; -+ } -+ -+ buf->used += total_len; -+ return 0; -+} -+ -+static int bch2_xattr_emit(struct dentry *dentry, -+ const struct bch_xattr *xattr, -+ struct xattr_buf *buf) -+{ -+ const struct xattr_handler *handler = -+ bch2_xattr_type_to_handler(xattr->x_type); -+ -+ return handler && (!handler->list || handler->list(dentry)) -+ ? __bch2_xattr_emit(handler->prefix ?: handler->name, -+ xattr->x_name, xattr->x_name_len, buf) -+ : 0; -+} -+ -+static int bch2_xattr_list_bcachefs(struct bch_fs *c, -+ struct bch_inode_unpacked *inode, -+ struct xattr_buf *buf, -+ bool all) -+{ -+ const char *prefix = all ? "bcachefs_effective." : "bcachefs."; -+ unsigned id; -+ int ret = 0; -+ u64 v; -+ -+ for (id = 0; id < Inode_opt_nr; id++) { -+ v = bch2_inode_opt_get(inode, id); -+ if (!v) -+ continue; -+ -+ if (!all && -+ !(inode->bi_fields_set & (1 << id))) -+ continue; -+ -+ ret = __bch2_xattr_emit(prefix, bch2_inode_opts[id], -+ strlen(bch2_inode_opts[id]), buf); -+ if (ret) -+ break; -+ } -+ -+ return ret; -+} -+ -+ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) -+{ -+ struct bch_fs *c = dentry->d_sb->s_fs_info; -+ struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); -+ struct btree_trans *trans = bch2_trans_get(c); -+ struct btree_iter iter; -+ struct bkey_s_c k; -+ struct xattr_buf buf = { .buf = buffer, .len = buffer_size }; -+ u64 offset = 0, inum = inode->ei_inode.bi_inum; -+ u32 snapshot; -+ int ret; -+retry: -+ bch2_trans_begin(trans); -+ iter = (struct btree_iter) { NULL }; -+ -+ ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot); -+ if (ret) -+ goto err; -+ -+ for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_xattrs, -+ SPOS(inum, offset, snapshot), -+ POS(inum, U64_MAX), 0, k, ret) { -+ if (k.k->type != KEY_TYPE_xattr) -+ continue; -+ -+ ret = bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, &buf); -+ if (ret) -+ break; -+ } -+ -+ offset = iter.pos.offset; -+ bch2_trans_iter_exit(trans, &iter); -+err: -+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -+ goto retry; -+ -+ bch2_trans_put(trans); -+ -+ if (ret) -+ goto out; -+ -+ ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, false); -+ if (ret) -+ goto out; -+ -+ ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, true); -+ if (ret) -+ goto out; -+ -+ return buf.used; -+out: -+ return bch2_err_class(ret); -+} -+ -+static int bch2_xattr_get_handler(const struct xattr_handler *handler, -+ struct dentry *dentry, struct inode *vinode, -+ const char *name, void *buffer, size_t size) -+{ -+ struct bch_inode_info *inode = to_bch_ei(vinode); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ int ret = bch2_trans_do(c, NULL, NULL, 0, -+ bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags)); -+ -+ return bch2_err_class(ret); -+} -+ -+static int bch2_xattr_set_handler(const struct xattr_handler *handler, -+ struct mnt_idmap *idmap, -+ struct dentry *dentry, struct inode *vinode, -+ const char *name, const void *value, -+ size_t size, int flags) -+{ -+ struct bch_inode_info *inode = to_bch_ei(vinode); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); -+ struct bch_inode_unpacked inode_u; -+ int ret; -+ -+ ret = bch2_trans_run(c, -+ commit_do(trans, NULL, NULL, 0, -+ bch2_xattr_set(trans, inode_inum(inode), &inode_u, -+ &hash, name, value, size, -+ handler->flags, flags)) ?: -+ (bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME), 0)); -+ -+ return bch2_err_class(ret); -+} -+ -+static const struct xattr_handler bch_xattr_user_handler = { -+ .prefix = XATTR_USER_PREFIX, -+ .get = bch2_xattr_get_handler, -+ .set = bch2_xattr_set_handler, -+ .flags = KEY_TYPE_XATTR_INDEX_USER, -+}; -+ -+static bool bch2_xattr_trusted_list(struct dentry *dentry) -+{ -+ return capable(CAP_SYS_ADMIN); -+} -+ -+static const struct xattr_handler bch_xattr_trusted_handler = { -+ .prefix = XATTR_TRUSTED_PREFIX, -+ .list = bch2_xattr_trusted_list, -+ .get = bch2_xattr_get_handler, -+ .set = bch2_xattr_set_handler, -+ .flags = KEY_TYPE_XATTR_INDEX_TRUSTED, -+}; -+ -+static const struct xattr_handler bch_xattr_security_handler = { -+ .prefix = XATTR_SECURITY_PREFIX, -+ .get = bch2_xattr_get_handler, -+ .set = bch2_xattr_set_handler, -+ .flags = KEY_TYPE_XATTR_INDEX_SECURITY, -+}; -+ -+#ifndef NO_BCACHEFS_FS -+ -+static int opt_to_inode_opt(int id) -+{ -+ switch (id) { -+#define x(name, ...) \ -+ case Opt_##name: return Inode_opt_##name; -+ BCH_INODE_OPTS() -+#undef x -+ default: -+ return -1; -+ } -+} -+ -+static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, -+ struct dentry *dentry, struct inode *vinode, -+ const char *name, void *buffer, size_t size, -+ bool all) -+{ -+ struct bch_inode_info *inode = to_bch_ei(vinode); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ struct bch_opts opts = -+ bch2_inode_opts_to_opts(&inode->ei_inode); -+ const struct bch_option *opt; -+ int id, inode_opt_id; -+ struct printbuf out = PRINTBUF; -+ int ret; -+ u64 v; -+ -+ id = bch2_opt_lookup(name); -+ if (id < 0 || !bch2_opt_is_inode_opt(id)) -+ return -EINVAL; -+ -+ inode_opt_id = opt_to_inode_opt(id); -+ if (inode_opt_id < 0) -+ return -EINVAL; -+ -+ opt = bch2_opt_table + id; -+ -+ if (!bch2_opt_defined_by_id(&opts, id)) -+ return -ENODATA; -+ -+ if (!all && -+ !(inode->ei_inode.bi_fields_set & (1 << inode_opt_id))) -+ return -ENODATA; -+ -+ v = bch2_opt_get_by_id(&opts, id); -+ bch2_opt_to_text(&out, c, c->disk_sb.sb, opt, v, 0); -+ -+ ret = out.pos; -+ -+ if (out.allocation_failure) { -+ ret = -ENOMEM; -+ } else if (buffer) { -+ if (out.pos > size) -+ ret = -ERANGE; -+ else -+ memcpy(buffer, out.buf, out.pos); -+ } -+ -+ printbuf_exit(&out); -+ return ret; -+} -+ -+static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler, -+ struct dentry *dentry, struct inode *vinode, -+ const char *name, void *buffer, size_t size) -+{ -+ return __bch2_xattr_bcachefs_get(handler, dentry, vinode, -+ name, buffer, size, false); -+} -+ -+struct inode_opt_set { -+ int id; -+ u64 v; -+ bool defined; -+}; -+ -+static int inode_opt_set_fn(struct btree_trans *trans, -+ struct bch_inode_info *inode, -+ struct bch_inode_unpacked *bi, -+ void *p) -+{ -+ struct inode_opt_set *s = p; -+ -+ if (s->defined) -+ bi->bi_fields_set |= 1U << s->id; -+ else -+ bi->bi_fields_set &= ~(1U << s->id); -+ -+ bch2_inode_opt_set(bi, s->id, s->v); -+ -+ return 0; -+} -+ -+static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, -+ struct mnt_idmap *idmap, -+ struct dentry *dentry, struct inode *vinode, -+ const char *name, const void *value, -+ size_t size, int flags) -+{ -+ struct bch_inode_info *inode = to_bch_ei(vinode); -+ struct bch_fs *c = inode->v.i_sb->s_fs_info; -+ const struct bch_option *opt; -+ char *buf; -+ struct inode_opt_set s; -+ int opt_id, inode_opt_id, ret; -+ -+ opt_id = bch2_opt_lookup(name); -+ if (opt_id < 0) -+ return -EINVAL; -+ -+ opt = bch2_opt_table + opt_id; -+ -+ inode_opt_id = opt_to_inode_opt(opt_id); -+ if (inode_opt_id < 0) -+ return -EINVAL; -+ -+ s.id = inode_opt_id; -+ -+ if (value) { -+ u64 v = 0; -+ -+ buf = kmalloc(size + 1, GFP_KERNEL); -+ if (!buf) -+ return -ENOMEM; -+ memcpy(buf, value, size); -+ buf[size] = '\0'; -+ -+ ret = bch2_opt_parse(c, opt, buf, &v, NULL); -+ kfree(buf); -+ -+ if (ret < 0) -+ return ret; -+ -+ ret = bch2_opt_check_may_set(c, opt_id, v); -+ if (ret < 0) -+ return ret; -+ -+ s.v = v + 1; -+ s.defined = true; -+ } else { -+ if (!IS_ROOT(dentry)) { -+ struct bch_inode_info *dir = -+ to_bch_ei(d_inode(dentry->d_parent)); -+ -+ s.v = bch2_inode_opt_get(&dir->ei_inode, inode_opt_id); -+ } else { -+ s.v = 0; -+ } -+ -+ s.defined = false; -+ } -+ -+ mutex_lock(&inode->ei_update_lock); -+ if (inode_opt_id == Inode_opt_project) { -+ /* -+ * inode fields accessible via the xattr interface are stored -+ * with a +1 bias, so that 0 means unset: -+ */ -+ ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0); -+ if (ret) -+ goto err; -+ } -+ -+ ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0); -+err: -+ mutex_unlock(&inode->ei_update_lock); -+ -+ if (value && -+ (opt_id == Opt_background_compression || -+ opt_id == Opt_background_target)) -+ bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum); -+ -+ return bch2_err_class(ret); -+} -+ -+static const struct xattr_handler bch_xattr_bcachefs_handler = { -+ .prefix = "bcachefs.", -+ .get = bch2_xattr_bcachefs_get, -+ .set = bch2_xattr_bcachefs_set, -+}; -+ -+static int bch2_xattr_bcachefs_get_effective( -+ const struct xattr_handler *handler, -+ struct dentry *dentry, struct inode *vinode, -+ const char *name, void *buffer, size_t size) -+{ -+ return __bch2_xattr_bcachefs_get(handler, dentry, vinode, -+ name, buffer, size, true); -+} -+ -+static const struct xattr_handler bch_xattr_bcachefs_effective_handler = { -+ .prefix = "bcachefs_effective.", -+ .get = bch2_xattr_bcachefs_get_effective, -+ .set = bch2_xattr_bcachefs_set, -+}; -+ -+#endif /* NO_BCACHEFS_FS */ -+ -+const struct xattr_handler *bch2_xattr_handlers[] = { -+ &bch_xattr_user_handler, -+#ifdef CONFIG_BCACHEFS_POSIX_ACL -+ &nop_posix_acl_access, -+ &nop_posix_acl_default, -+#endif -+ &bch_xattr_trusted_handler, -+ &bch_xattr_security_handler, -+#ifndef NO_BCACHEFS_FS -+ &bch_xattr_bcachefs_handler, -+ &bch_xattr_bcachefs_effective_handler, -+#endif -+ NULL -+}; -+ -+static const struct xattr_handler *bch_xattr_handler_map[] = { -+ [KEY_TYPE_XATTR_INDEX_USER] = &bch_xattr_user_handler, -+ [KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS] = -+ &nop_posix_acl_access, -+ [KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT] = -+ &nop_posix_acl_default, -+ [KEY_TYPE_XATTR_INDEX_TRUSTED] = &bch_xattr_trusted_handler, -+ [KEY_TYPE_XATTR_INDEX_SECURITY] = &bch_xattr_security_handler, -+}; -+ -+static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned type) -+{ -+ return type < ARRAY_SIZE(bch_xattr_handler_map) -+ ? bch_xattr_handler_map[type] -+ : NULL; -+} -diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h -new file mode 100644 -index 000000000000..1337f31a5c49 ---- /dev/null -+++ b/fs/bcachefs/xattr.h -@@ -0,0 +1,50 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _BCACHEFS_XATTR_H -+#define _BCACHEFS_XATTR_H -+ -+#include "str_hash.h" -+ -+extern const struct bch_hash_desc bch2_xattr_hash_desc; -+ -+int bch2_xattr_invalid(struct bch_fs *, struct bkey_s_c, -+ enum bkey_invalid_flags, struct printbuf *); -+void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -+ -+#define bch2_bkey_ops_xattr ((struct bkey_ops) { \ -+ .key_invalid = bch2_xattr_invalid, \ -+ .val_to_text = bch2_xattr_to_text, \ -+ .min_val_size = 8, \ -+}) -+ -+static inline unsigned xattr_val_u64s(unsigned name_len, unsigned val_len) -+{ -+ return DIV_ROUND_UP(offsetof(struct bch_xattr, x_name) + -+ name_len + val_len, sizeof(u64)); -+} -+ -+#define xattr_val(_xattr) \ -+ ((void *) (_xattr)->x_name + (_xattr)->x_name_len) -+ -+struct xattr_search_key { -+ u8 type; -+ struct qstr name; -+}; -+ -+#define X_SEARCH(_type, _name, _len) ((struct xattr_search_key) \ -+ { .type = _type, .name = QSTR_INIT(_name, _len) }) -+ -+struct dentry; -+struct xattr_handler; -+struct bch_hash_info; -+struct bch_inode_info; -+ -+/* Exported for cmd_migrate.c in tools: */ -+int bch2_xattr_set(struct btree_trans *, subvol_inum, -+ struct bch_inode_unpacked *, const struct bch_hash_info *, -+ const char *, const void *, size_t, int, int); -+ -+ssize_t bch2_xattr_list(struct dentry *, char *, size_t); -+ -+extern const struct xattr_handler *bch2_xattr_handlers[]; -+ -+#endif /* _BCACHEFS_XATTR_H */ -diff --git a/fs/dcache.c b/fs/dcache.c -index 25ac74d30bff..796e23761ba0 100644 ---- a/fs/dcache.c -+++ b/fs/dcache.c -@@ -3246,11 +3246,10 @@ void d_genocide(struct dentry *parent) - d_walk(parent, parent, d_genocide_kill); - } - --void d_tmpfile(struct file *file, struct inode *inode) -+void d_mark_tmpfile(struct file *file, struct inode *inode) - { - struct dentry *dentry = file->f_path.dentry; - -- inode_dec_link_count(inode); - BUG_ON(dentry->d_name.name != dentry->d_iname || - !hlist_unhashed(&dentry->d_u.d_alias) || - !d_unlinked(dentry)); -@@ -3260,6 +3259,15 @@ void d_tmpfile(struct file *file, struct inode *inode) - (unsigned long long)inode->i_ino); - spin_unlock(&dentry->d_lock); - spin_unlock(&dentry->d_parent->d_lock); -+} -+EXPORT_SYMBOL(d_mark_tmpfile); -+ -+void d_tmpfile(struct file *file, struct inode *inode) -+{ -+ struct dentry *dentry = file->f_path.dentry; -+ -+ inode_dec_link_count(inode); -+ d_mark_tmpfile(file, inode); - d_instantiate(dentry, inode); - } - EXPORT_SYMBOL(d_tmpfile); -diff --git a/drivers/md/bcache/closure.h b/include/linux/closure.h -similarity index 91% -rename from drivers/md/bcache/closure.h -rename to include/linux/closure.h -index c88cdc4ae4ec..de7bb47d8a46 100644 ---- a/drivers/md/bcache/closure.h -+++ b/include/linux/closure.h -@@ -154,8 +154,9 @@ struct closure { - struct closure *parent; - - atomic_t remaining; -+ bool closure_get_happened; - --#ifdef CONFIG_BCACHE_CLOSURES_DEBUG -+#ifdef CONFIG_DEBUG_CLOSURES - #define CLOSURE_MAGIC_DEAD 0xc054dead - #define CLOSURE_MAGIC_ALIVE 0xc054a11e - -@@ -172,6 +173,11 @@ void __closure_wake_up(struct closure_waitlist *list); - bool closure_wait(struct closure_waitlist *list, struct closure *cl); - void __closure_sync(struct closure *cl); - -+static inline unsigned closure_nr_remaining(struct closure *cl) -+{ -+ return atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK; -+} -+ - /** - * closure_sync - sleep until a closure a closure has nothing left to wait on - * -@@ -180,19 +186,21 @@ void __closure_sync(struct closure *cl); - */ - static inline void closure_sync(struct closure *cl) - { -- if ((atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK) != 1) -+#ifdef CONFIG_DEBUG_CLOSURES -+ BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened); -+#endif -+ -+ if (cl->closure_get_happened) - __closure_sync(cl); - } - --#ifdef CONFIG_BCACHE_CLOSURES_DEBUG -+#ifdef CONFIG_DEBUG_CLOSURES - --void closure_debug_init(void); - void closure_debug_create(struct closure *cl); - void closure_debug_destroy(struct closure *cl); - - #else - --static inline void closure_debug_init(void) {} - static inline void closure_debug_create(struct closure *cl) {} - static inline void closure_debug_destroy(struct closure *cl) {} - -@@ -200,21 +208,21 @@ static inline void closure_debug_destroy(struct closure *cl) {} - - static inline void closure_set_ip(struct closure *cl) - { --#ifdef CONFIG_BCACHE_CLOSURES_DEBUG -+#ifdef CONFIG_DEBUG_CLOSURES - cl->ip = _THIS_IP_; - #endif - } - - static inline void closure_set_ret_ip(struct closure *cl) - { --#ifdef CONFIG_BCACHE_CLOSURES_DEBUG -+#ifdef CONFIG_DEBUG_CLOSURES - cl->ip = _RET_IP_; - #endif - } - - static inline void closure_set_waiting(struct closure *cl, unsigned long f) - { --#ifdef CONFIG_BCACHE_CLOSURES_DEBUG -+#ifdef CONFIG_DEBUG_CLOSURES - cl->waiting_on = f; - #endif - } -@@ -230,8 +238,6 @@ static inline void set_closure_fn(struct closure *cl, closure_fn *fn, - closure_set_ip(cl); - cl->fn = fn; - cl->wq = wq; -- /* between atomic_dec() in closure_put() */ -- smp_mb__before_atomic(); - } - - static inline void closure_queue(struct closure *cl) -@@ -243,6 +249,7 @@ static inline void closure_queue(struct closure *cl) - */ - BUILD_BUG_ON(offsetof(struct closure, fn) - != offsetof(struct work_struct, func)); -+ - if (wq) { - INIT_WORK(&cl->work, cl->work.func); - BUG_ON(!queue_work(wq, &cl->work)); -@@ -255,7 +262,9 @@ static inline void closure_queue(struct closure *cl) - */ - static inline void closure_get(struct closure *cl) - { --#ifdef CONFIG_BCACHE_CLOSURES_DEBUG -+ cl->closure_get_happened = true; -+ -+#ifdef CONFIG_DEBUG_CLOSURES - BUG_ON((atomic_inc_return(&cl->remaining) & - CLOSURE_REMAINING_MASK) <= 1); - #else -@@ -271,12 +280,13 @@ static inline void closure_get(struct closure *cl) - */ - static inline void closure_init(struct closure *cl, struct closure *parent) - { -- memset(cl, 0, sizeof(struct closure)); -+ cl->fn = NULL; - cl->parent = parent; - if (parent) - closure_get(parent); - - atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); -+ cl->closure_get_happened = false; - - closure_debug_create(cl); - closure_set_ip(cl); -@@ -375,4 +385,26 @@ static inline void closure_call(struct closure *cl, closure_fn fn, - continue_at_nobarrier(cl, fn, wq); - } - -+#define __closure_wait_event(waitlist, _cond) \ -+do { \ -+ struct closure cl; \ -+ \ -+ closure_init_stack(&cl); \ -+ \ -+ while (1) { \ -+ closure_wait(waitlist, &cl); \ -+ if (_cond) \ -+ break; \ -+ closure_sync(&cl); \ -+ } \ -+ closure_wake_up(waitlist); \ -+ closure_sync(&cl); \ -+} while (0) -+ -+#define closure_wait_event(waitlist, _cond) \ -+do { \ -+ if (!(_cond)) \ -+ __closure_wait_event(waitlist, _cond); \ -+} while (0) -+ - #endif /* _LINUX_CLOSURE_H */ -diff --git a/include/linux/dcache.h b/include/linux/dcache.h -index 6b351e009f59..3da2f0545d5d 100644 ---- a/include/linux/dcache.h -+++ b/include/linux/dcache.h -@@ -251,6 +251,7 @@ extern struct dentry * d_make_root(struct inode *); - /* - the ramfs-type tree */ - extern void d_genocide(struct dentry *); - -+extern void d_mark_tmpfile(struct file *, struct inode *); - extern void d_tmpfile(struct file *, struct inode *); - - extern struct dentry *d_find_alias(struct inode *); -diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h -index 11fbd0ee1370..f75e0914d40d 100644 ---- a/include/linux/exportfs.h -+++ b/include/linux/exportfs.h -@@ -104,6 +104,12 @@ enum fid_type { - */ - FILEID_LUSTRE = 0x97, - -+ /* -+ * 64 bit inode number, 32 bit subvolume, 32 bit generation number: -+ */ -+ FILEID_BCACHEFS_WITHOUT_PARENT = 0xb1, -+ FILEID_BCACHEFS_WITH_PARENT = 0xb2, -+ - /* - * 64 bit unique kernfs id - */ -diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h -index 107613f7d792..847413164738 100644 ---- a/include/linux/generic-radix-tree.h -+++ b/include/linux/generic-radix-tree.h -@@ -116,6 +117,11 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size) - - #define __genradix_cast(_radix) (typeof((_radix)->type[0]) *) - #define __genradix_obj_size(_radix) sizeof((_radix)->type[0]) -+#define __genradix_objs_per_page(_radix) \ -+ (PAGE_SIZE / sizeof((_radix)->type[0])) -+#define __genradix_page_remainder(_radix) \ -+ (PAGE_SIZE % sizeof((_radix)->type[0])) -+ - #define __genradix_idx_to_offset(_radix, _idx) \ - __idx_to_offset(_idx, __genradix_obj_size(_radix)) - -@@ -185,7 +185,25 @@ - #define genradix_iter_peek(_iter, _radix) \ - (__genradix_cast(_radix) \ - __genradix_iter_peek(_iter, &(_radix)->tree, \ -- PAGE_SIZE / __genradix_obj_size(_radix))) -+ __genradix_objs_per_page(_radix))) -+ -+void *__genradix_iter_peek_prev(struct genradix_iter *, struct __genradix *, -+ size_t, size_t); -+ -+/** -+ * genradix_iter_peek_prev - get first entry at or below iterator's current -+ * position -+ * @_iter: a genradix_iter -+ * @_radix: genradix being iterated over -+ * -+ * If no more entries exist at or below @_iter's current position, returns NULL -+ */ -+#define genradix_iter_peek_prev(_iter, _radix) \ -+ (__genradix_cast(_radix) \ -+ __genradix_iter_peek_prev(_iter, &(_radix)->tree, \ -+ __genradix_objs_per_page(_radix), \ -+ __genradix_obj_size(_radix) + \ -+ __genradix_page_remainder(_radix))) - - static inline void __genradix_iter_advance(struct genradix_iter *iter, - size_t obj_size) -@@ -196,6 +226,25 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter, - #define genradix_iter_advance(_iter, _radix) \ - __genradix_iter_advance(_iter, __genradix_obj_size(_radix)) - -+static inline void __genradix_iter_rewind(struct genradix_iter *iter, -+ size_t obj_size) -+{ -+ if (iter->offset == 0 || -+ iter->offset == SIZE_MAX) { -+ iter->offset = SIZE_MAX; -+ return; -+ } -+ -+ if ((iter->offset & (PAGE_SIZE - 1)) == 0) -+ iter->offset -= PAGE_SIZE % obj_size; -+ -+ iter->offset -= obj_size; -+ iter->pos--; -+} -+ -+#define genradix_iter_rewind(_iter, _radix) \ -+ __genradix_iter_rewind(_iter, __genradix_obj_size(_radix)) -+ - #define genradix_for_each_from(_radix, _iter, _p, _start) \ - for (_iter = genradix_iter_init(_radix, _start); \ - (_p = genradix_iter_peek(&_iter, _radix)) != NULL; \ -@@ -213,6 +262,23 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter, - #define genradix_for_each(_radix, _iter, _p) \ - genradix_for_each_from(_radix, _iter, _p, 0) - -+#define genradix_last_pos(_radix) \ -+ (SIZE_MAX / PAGE_SIZE * __genradix_objs_per_page(_radix) - 1) -+ -+/** -+ * genradix_for_each_reverse - iterate over entry in a genradix, reverse order -+ * @_radix: genradix to iterate over -+ * @_iter: a genradix_iter to track current position -+ * @_p: pointer to genradix entry type -+ * -+ * On every iteration, @_p will point to the current entry, and @_iter.pos -+ * will be the current entry's index. -+ */ -+#define genradix_for_each_reverse(_radix, _iter, _p) \ -+ for (_iter = genradix_iter_init(_radix, genradix_last_pos(_radix));\ -+ (_p = genradix_iter_peek_prev(&_iter, _radix)) != NULL;\ -+ genradix_iter_rewind(&_iter, _radix)) -+ - int __genradix_prealloc(struct __genradix *, size_t, gfp_t); - - /** -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 77f01ac385f7..d5951e99706a 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -875,6 +875,7 @@ struct task_struct { - - struct mm_struct *mm; - struct mm_struct *active_mm; -+ struct address_space *faults_disabled_mapping; - - int exit_state; - int exit_code; -diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h -index 9d1f5bb74dd5..58fb1f90eda5 100644 ---- a/include/linux/string_helpers.h -+++ b/include/linux/string_helpers.h -@@ -24,8 +24,8 @@ enum string_size_units { - STRING_UNITS_2, /* use binary powers of 2^10 */ - }; - --void string_get_size(u64 size, u64 blk_size, enum string_size_units units, -- char *buf, int len); -+int string_get_size(u64 size, u64 blk_size, enum string_size_units units, -+ char *buf, int len); - - int parse_int_array_user(const char __user *from, size_t count, int **array); - -diff --git a/init/init_task.c b/init/init_task.c -index ff6c4b9bfe6b..f703116e0523 100644 ---- a/init/init_task.c -+++ b/init/init_task.c -@@ -85,6 +85,7 @@ struct task_struct init_task - .nr_cpus_allowed= NR_CPUS, - .mm = NULL, - .active_mm = &init_mm, -+ .faults_disabled_mapping = NULL, - .restart_block = { - .fn = do_no_restart_syscall, - }, -diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c -index d973fe6041bf..2deeeca3e71b 100644 ---- a/kernel/locking/mutex.c -+++ b/kernel/locking/mutex.c -@@ -1126,6 +1126,9 @@ EXPORT_SYMBOL(ww_mutex_lock_interruptible); - #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ - #endif /* !CONFIG_PREEMPT_RT */ - -+EXPORT_TRACEPOINT_SYMBOL_GPL(contention_begin); -+EXPORT_TRACEPOINT_SYMBOL_GPL(contention_end); -+ - /** - * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 - * @cnt: the atomic which we are to dec -diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c -index 9ed5ce989415..4f65824879ab 100644 ---- a/kernel/stacktrace.c -+++ b/kernel/stacktrace.c -@@ -151,6 +151,7 @@ unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store, - put_task_stack(tsk); - return c.len; - } -+EXPORT_SYMBOL_GPL(stack_trace_save_tsk); - - /** - * stack_trace_save_regs - Save a stack trace based on pt_regs into a storage array -@@ -301,6 +302,7 @@ unsigned int stack_trace_save_tsk(struct task_struct *task, - save_stack_trace_tsk(task, &trace); - return trace.nr_entries; - } -+EXPORT_SYMBOL_GPL(stack_trace_save_tsk); - - /** - * stack_trace_save_regs - Save a stack trace based on pt_regs into a storage array -diff --git a/lib/Kconfig b/lib/Kconfig -index c686f4adc124..263aa6ae8d7c 100644 ---- a/lib/Kconfig -+++ b/lib/Kconfig -@@ -506,6 +506,9 @@ config ASSOCIATIVE_ARRAY - - for more information. - -+config CLOSURES -+ bool -+ - config HAS_IOMEM - bool - depends on !NO_IOMEM -diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug -index fa307f93fa2e..ce3a4abf40f8 100644 ---- a/lib/Kconfig.debug -+++ b/lib/Kconfig.debug -@@ -1720,6 +1720,15 @@ config DEBUG_NOTIFIERS - This is a relatively cheap check but if you care about maximum - performance, say N. - -+config DEBUG_CLOSURES -+ bool "Debug closures (bcache async widgits)" -+ depends on CLOSURES -+ select DEBUG_FS -+ help -+ Keeps all active closures in a linked list and provides a debugfs -+ interface to list them, which makes it possible to see asynchronous -+ operations that get stuck. -+ - config DEBUG_MAPLE_TREE - bool "Debug maple trees" - depends on DEBUG_KERNEL -diff --git a/lib/Makefile b/lib/Makefile -index 740109b6e2c8..57d394575919 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -255,6 +255,8 @@ obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o - - obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o - -+obj-$(CONFIG_CLOSURES) += closure.o -+ - obj-$(CONFIG_DQL) += dynamic_queue_limits.o - - obj-$(CONFIG_GLOB) += glob.o -diff --git a/drivers/md/bcache/closure.c b/lib/closure.c -similarity index 83% -rename from drivers/md/bcache/closure.c -rename to lib/closure.c -index d8d9394a6beb..f86c9eeafb35 100644 ---- a/drivers/md/bcache/closure.c -+++ b/lib/closure.c -@@ -6,13 +6,13 @@ - * Copyright 2012 Google, Inc. - */ - -+#include - #include --#include -+#include -+#include - #include - #include - --#include "closure.h" -- - static inline void closure_put_after_sub(struct closure *cl, int flags) - { - int r = flags & CLOSURE_REMAINING_MASK; -@@ -21,6 +21,10 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) - BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR)); - - if (!r) { -+ smp_acquire__after_ctrl_dep(); -+ -+ cl->closure_get_happened = false; -+ - if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) { - atomic_set(&cl->remaining, - CLOSURE_REMAINING_INITIALIZER); -@@ -43,16 +47,18 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) - /* For clearing flags with the same atomic op as a put */ - void closure_sub(struct closure *cl, int v) - { -- closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining)); -+ closure_put_after_sub(cl, atomic_sub_return_release(v, &cl->remaining)); - } -+EXPORT_SYMBOL(closure_sub); - - /* - * closure_put - decrement a closure's refcount - */ - void closure_put(struct closure *cl) - { -- closure_put_after_sub(cl, atomic_dec_return(&cl->remaining)); -+ closure_put_after_sub(cl, atomic_dec_return_release(&cl->remaining)); - } -+EXPORT_SYMBOL(closure_put); - - /* - * closure_wake_up - wake up all closures on a wait list, without memory barrier -@@ -74,6 +80,7 @@ void __closure_wake_up(struct closure_waitlist *wait_list) - closure_sub(cl, CLOSURE_WAITING + 1); - } - } -+EXPORT_SYMBOL(__closure_wake_up); - - /** - * closure_wait - add a closure to a waitlist -@@ -87,12 +94,14 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl) - if (atomic_read(&cl->remaining) & CLOSURE_WAITING) - return false; - -+ cl->closure_get_happened = true; - closure_set_waiting(cl, _RET_IP_); - atomic_add(CLOSURE_WAITING + 1, &cl->remaining); - llist_add(&cl->list, &waitlist->list); - - return true; - } -+EXPORT_SYMBOL(closure_wait); - - struct closure_syncer { - struct task_struct *task; -@@ -127,8 +136,9 @@ void __sched __closure_sync(struct closure *cl) - - __set_current_state(TASK_RUNNING); - } -+EXPORT_SYMBOL(__closure_sync); - --#ifdef CONFIG_BCACHE_CLOSURES_DEBUG -+#ifdef CONFIG_DEBUG_CLOSURES - - static LIST_HEAD(closure_list); - static DEFINE_SPINLOCK(closure_list_lock); -@@ -144,6 +154,7 @@ void closure_debug_create(struct closure *cl) - list_add(&cl->all, &closure_list); - spin_unlock_irqrestore(&closure_list_lock, flags); - } -+EXPORT_SYMBOL(closure_debug_create); - - void closure_debug_destroy(struct closure *cl) - { -@@ -156,8 +167,7 @@ void closure_debug_destroy(struct closure *cl) - list_del(&cl->all); - spin_unlock_irqrestore(&closure_list_lock, flags); - } -- --static struct dentry *closure_debug; -+EXPORT_SYMBOL(closure_debug_destroy); - - static int debug_show(struct seq_file *f, void *data) - { -@@ -181,7 +191,7 @@ static int debug_show(struct seq_file *f, void *data) - seq_printf(f, " W %pS\n", - (void *) cl->waiting_on); - -- seq_printf(f, "\n"); -+ seq_puts(f, "\n"); - } - - spin_unlock_irq(&closure_list_lock); -@@ -190,18 +200,11 @@ static int debug_show(struct seq_file *f, void *data) - - DEFINE_SHOW_ATTRIBUTE(debug); - --void __init closure_debug_init(void) -+static int __init closure_debug_init(void) - { -- if (!IS_ERR_OR_NULL(bcache_debug)) -- /* -- * it is unnecessary to check return value of -- * debugfs_create_file(), we should not care -- * about this. -- */ -- closure_debug = debugfs_create_file( -- "closures", 0400, bcache_debug, NULL, &debug_fops); -+ debugfs_create_file("closures", 0400, NULL, NULL, &debug_fops); -+ return 0; - } --#endif -+late_initcall(closure_debug_init) - --MODULE_AUTHOR("Kent Overstreet "); --MODULE_LICENSE("GPL"); -+#endif -diff --git a/lib/errname.c b/lib/errname.c -index 67739b174a8c..dd1b998552cd 100644 ---- a/lib/errname.c -+++ b/lib/errname.c -@@ -228,3 +228,4 @@ const char *errname(int err) - - return err > 0 ? name + 1 : name; - } -+EXPORT_SYMBOL(errname); -diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c -index f25eb111c051..41f1bcdc4488 100644 ---- a/lib/generic-radix-tree.c -+++ b/lib/generic-radix-tree.c -@@ -1,4 +1,5 @@ - -+#include - #include - #include - #include -@@ -201,6 +213,64 @@ void *__genradix_iter_peek(struct genradix_iter *iter, - } - EXPORT_SYMBOL(__genradix_iter_peek); - -+void *__genradix_iter_peek_prev(struct genradix_iter *iter, -+ struct __genradix *radix, -+ size_t objs_per_page, -+ size_t obj_size_plus_page_remainder) -+{ -+ struct genradix_root *r; -+ struct genradix_node *n; -+ unsigned level, i; -+ -+ if (iter->offset == SIZE_MAX) -+ return NULL; -+ -+restart: -+ r = READ_ONCE(radix->root); -+ if (!r) -+ return NULL; -+ -+ n = genradix_root_to_node(r); -+ level = genradix_root_to_depth(r); -+ -+ if (ilog2(iter->offset) >= genradix_depth_shift(level)) { -+ iter->offset = genradix_depth_size(level); -+ iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page; -+ -+ iter->offset -= obj_size_plus_page_remainder; -+ iter->pos--; -+ } -+ -+ while (level) { -+ level--; -+ -+ i = (iter->offset >> genradix_depth_shift(level)) & -+ (GENRADIX_ARY - 1); -+ -+ while (!n->children[i]) { -+ size_t objs_per_ptr = genradix_depth_size(level); -+ -+ iter->offset = round_down(iter->offset, objs_per_ptr); -+ iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page; -+ -+ if (!iter->offset) -+ return NULL; -+ -+ iter->offset -= obj_size_plus_page_remainder; -+ iter->pos--; -+ -+ if (!i) -+ goto restart; -+ --i; -+ } -+ -+ n = n->children[i]; -+ } -+ -+ return &n->data[iter->offset & (PAGE_SIZE - 1)]; -+} -+EXPORT_SYMBOL(__genradix_iter_peek_prev); -+ - static void genradix_free_recurse(struct genradix_node *n, unsigned level) - { - if (level) { -diff --git a/lib/string_helpers.c b/lib/string_helpers.c -index 9982344cca34..7713f73e66b0 100644 ---- a/lib/string_helpers.c -+++ b/lib/string_helpers.c -@@ -31,9 +31,11 @@ - * giving the size in the required units. @buf should have room for - * at least 9 bytes and will always be zero terminated. - * -+ * Return value: number of characters of output that would have been written -+ * (which may be greater than len, if output was truncated). - */ --void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, -- char *buf, int len) -+int string_get_size(u64 size, u64 blk_size, const enum string_size_units units, -+ char *buf, int len) - { - static const char *const units_10[] = { - "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB" -@@ -126,8 +128,8 @@ void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, - else - unit = units_str[units][i]; - -- snprintf(buf, len, "%u%s %s", (u32)size, -- tmp, unit); -+ return snprintf(buf, len, "%u%s %s", (u32)size, -+ tmp, unit); - } - EXPORT_SYMBOL(string_get_size); - -diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h -index e45c7cb1d5bc..e92f67383dde 100644 ---- a/tools/objtool/noreturns.h -+++ b/tools/objtool/noreturns.h -@@ -14,6 +14,8 @@ NORETURN(__stack_chk_fail) - NORETURN(__ubsan_handle_builtin_unreachable) - NORETURN(arch_call_rest_init) - NORETURN(arch_cpu_idle_dead) -+NORETURN(bch2_trans_in_restart_error) -+NORETURN(bch2_trans_restart_error) - NORETURN(cpu_bringup_and_idle) - NORETURN(cpu_startup_entry) - NORETURN(do_exit) --- -2.42.0 - diff --git a/SOURCES/tkg-misc-additions.patch b/SOURCES/tkg-misc-additions.patch index 4969dc3..618b53c 100644 --- a/SOURCES/tkg-misc-additions.patch +++ b/SOURCES/tkg-misc-additions.patch @@ -64,760 +64,194 @@ index 2c7171e0b0010..85de313ddec29 100644 select CPU_FREQ_GOV_PERFORMANCE help -From 7695eb71d0872ed9633daf0ca779da3344b87dec Mon Sep 17 00:00:00 2001 -From: Evan Quan -Date: Mon, 21 Aug 2023 14:15:13 +0800 -Subject: [PATCH] drm/amd/pm: correct SMU13 gfx voltage related OD settings +From 3a88b77d3cb9f9cd8a8aee052ab479b73aeb2e80 Mon Sep 17 00:00:00 2001 +From: "Jan Alexander Steffens (heftig)" +Date: Sat, 13 Jan 2024 15:29:25 +0100 +Subject: [PATCH] arch/Kconfig: Default to maximum amount of ASLR bits -The voltage offset setting will be applied to the whole v/f curve line -instead of per anchor point base. - -Signed-off-by: Evan Quan -Acked-by: Alex Deucher +To mitigate https://zolutal.github.io/aslrnt/; do this with a patch to +avoid having to enable `CONFIG_EXPERT`. --- - drivers/gpu/drm/amd/pm/amdgpu_pm.c | 45 +++++++------------ - .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 31 ++++++------- - .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 31 ++++++------- - 3 files changed, 43 insertions(+), 64 deletions(-) - -diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c -index 1da7ece4c627..06aa5c18b40f 100644 ---- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c -+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c -@@ -643,18 +643,14 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, - * They can be used to calibrate the sclk voltage curve. This is - * available for Vega20 and NV1X. - * -- * - voltage offset for the six anchor points of the v/f curve labeled -- * OD_VDDC_CURVE. They can be used to calibrate the v/f curve. This -- * is only availabe for some SMU13 ASICs. -- * - * - voltage offset(in mV) applied on target voltage calculation. -- * This is available for Sienna Cichlid, Navy Flounder and Dimgrey -- * Cavefish. For these ASICs, the target voltage calculation can be -- * illustrated by "voltage = voltage calculated from v/f curve + -- * overdrive vddgfx offset" -+ * This is available for Sienna Cichlid, Navy Flounder, Dimgrey -+ * Cavefish and some later SMU13 ASICs. For these ASICs, the target -+ * voltage calculation can be illustrated by "voltage = voltage -+ * calculated from v/f curve + overdrive vddgfx offset" - * -- * - a list of valid ranges for sclk, mclk, and voltage curve points -- * labeled OD_RANGE -+ * - a list of valid ranges for sclk, mclk, voltage curve points -+ * or voltage offset labeled OD_RANGE - * - * < For APUs > - * -@@ -686,24 +682,17 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, - * E.g., "p 2 0 800" would set the minimum core clock on core - * 2 to 800Mhz. - * -- * For sclk voltage curve, -- * - For NV1X, enter the new values by writing a string that -- * contains "vc point clock voltage" to the file. The points -- * are indexed by 0, 1 and 2. E.g., "vc 0 300 600" will update -- * point1 with clock set as 300Mhz and voltage as 600mV. "vc 2 -- * 1000 1000" will update point3 with clock set as 1000Mhz and -- * voltage 1000mV. -- * - For SMU13 ASICs, enter the new values by writing a string that -- * contains "vc anchor_point_index voltage_offset" to the file. -- * There are total six anchor points defined on the v/f curve with -- * index as 0 - 5. -- * - "vc 0 10" will update the voltage offset for point1 as 10mv. -- * - "vc 5 -10" will update the voltage offset for point6 as -10mv. -- * -- * To update the voltage offset applied for gfxclk/voltage calculation, -- * enter the new value by writing a string that contains "vo offset". -- * This is supported by Sienna Cichlid, Navy Flounder and Dimgrey Cavefish. -- * And the offset can be a positive or negative value. -+ * For sclk voltage curve supported by Vega20 and NV1X, enter the new -+ * values by writing a string that contains "vc point clock voltage" -+ * to the file. The points are indexed by 0, 1 and 2. E.g., "vc 0 300 -+ * 600" will update point1 with clock set as 300Mhz and voltage as 600mV. -+ * "vc 2 1000 1000" will update point3 with clock set as 1000Mhz and -+ * voltage 1000mV. -+ * -+ * For voltage offset supported by Sienna Cichlid, Navy Flounder, Dimgrey -+ * Cavefish and some later SMU13 ASICs, enter the new value by writing a -+ * string that contains "vo offset". E.g., "vo -10" will update the extra -+ * voltage offset applied to the whole v/f curve line as -10mv. - * - * - When you have edited all of the states as needed, write "c" (commit) - * to the file to commit your changes -diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c -index 3903a47669e4..bd0d5f027cac 100644 ---- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c -+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c -@@ -1304,16 +1304,14 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, - od_table->OverDriveTable.UclkFmax); - break; - -- case SMU_OD_VDDC_CURVE: -+ case SMU_OD_VDDGFX_OFFSET: - if (!smu_v13_0_0_is_od_feature_supported(smu, - PP_OD_FEATURE_GFX_VF_CURVE_BIT)) - break; - -- size += sysfs_emit_at(buf, size, "OD_VDDC_CURVE:\n"); -- for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++) -- size += sysfs_emit_at(buf, size, "%d: %dmv\n", -- i, -- od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i]); -+ size += sysfs_emit_at(buf, size, "OD_VDDGFX_OFFSET:\n"); -+ size += sysfs_emit_at(buf, size, "%dmV\n", -+ od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[0]); - break; - - case SMU_OD_RANGE: -@@ -1355,7 +1353,7 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, - PP_OD_FEATURE_GFX_VF_CURVE, - &min_value, - &max_value); -- size += sysfs_emit_at(buf, size, "VDDC_CURVE: %7dmv %10dmv\n", -+ size += sysfs_emit_at(buf, size, "VDDGFX_OFFSET: %7dmv %10dmv\n", - min_value, max_value); - } - break; -@@ -1504,29 +1502,26 @@ static int smu_v13_0_0_od_edit_dpm_table(struct smu_context *smu, - } - break; - -- case PP_OD_EDIT_VDDC_CURVE: -+ case PP_OD_EDIT_VDDGFX_OFFSET: - if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) { -- dev_warn(adev->dev, "VF curve setting not supported!\n"); -+ dev_warn(adev->dev, "Gfx offset setting not supported!\n"); - return -ENOTSUPP; - } - -- if (input[0] >= PP_NUM_OD_VF_CURVE_POINTS || -- input[0] < 0) -- return -EINVAL; + arch/Kconfig | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/Kconfig b/arch/Kconfig +index f4b210ab061291..837d0dbb28ea08 100644 +--- a/arch/Kconfig ++++ b/arch/Kconfig +@@ -1032,7 +1032,7 @@ config ARCH_MMAP_RND_BITS + int "Number of bits to use for ASLR of mmap base address" if EXPERT + range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX + default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT +- default ARCH_MMAP_RND_BITS_MIN ++ default ARCH_MMAP_RND_BITS_MAX + depends on HAVE_ARCH_MMAP_RND_BITS + help + This value can be used to select the number of bits to use to +@@ -1066,7 +1066,7 @@ config ARCH_MMAP_RND_COMPAT_BITS + int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT + range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX + default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT +- default ARCH_MMAP_RND_COMPAT_BITS_MIN ++ default ARCH_MMAP_RND_COMPAT_BITS_MAX + depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS + help + This value can be used to select the number of bits to use to + +From 3cfb591e23181791195a74efe2e9065e0d4bd201 Mon Sep 17 00:00:00 2001 +From: Etienne JUVIGNY +Date: Mon, 15 Jan 2024 19:09:39 +0100 +Subject: Revert: drm/amd/pm: fix the high voltage and temperature issue + +This was supposed to fix the high voltage and temperature issue after the driver is unloaded on smu 13.0.0, +smu 13.0.7 and smu 13.0.10, but introduced an arguably more annoying issue. Let's revert it until a proper fix is offered. + +Fixes rdna3 shutdown/reboot hang. + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index 93cf73d6f..960966f4b 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -4050,23 +4050,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, + } + } + } else { +- switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { +- case IP_VERSION(13, 0, 0): +- case IP_VERSION(13, 0, 7): +- case IP_VERSION(13, 0, 10): +- r = psp_gpu_reset(adev); +- break; +- default: +- tmp = amdgpu_reset_method; +- /* It should do a default reset when loading or reloading the driver, +- * regardless of the module parameter reset_method. +- */ +- amdgpu_reset_method = AMD_RESET_METHOD_NONE; +- r = amdgpu_asic_reset(adev); +- amdgpu_reset_method = tmp; +- break; +- } - - smu_v13_0_0_get_od_setting_limits(smu, - PP_OD_FEATURE_GFX_VF_CURVE, - &minimum, - &maximum); -- if (input[1] < minimum || -- input[1] > maximum) { -+ if (input[0] < minimum || -+ input[0] > maximum) { - dev_info(adev->dev, "Voltage offset (%ld) must be within [%d, %d]!\n", -- input[1], minimum, maximum); -+ input[0], minimum, maximum); - return -EINVAL; - } - -- od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[input[0]] = input[1]; -- od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFX_VF_CURVE_BIT; -+ for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++) -+ od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i] = input[0]; -+ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT); - break; - - case PP_OD_RESTORE_DEFAULT_TABLE: -diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c -index 94ef5b4d116d..b9b3bf41eed3 100644 ---- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c -+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c -@@ -1284,16 +1284,14 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, - od_table->OverDriveTable.UclkFmax); - break; - -- case SMU_OD_VDDC_CURVE: -+ case SMU_OD_VDDGFX_OFFSET: - if (!smu_v13_0_7_is_od_feature_supported(smu, - PP_OD_FEATURE_GFX_VF_CURVE_BIT)) - break; - -- size += sysfs_emit_at(buf, size, "OD_VDDC_CURVE:\n"); -- for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++) -- size += sysfs_emit_at(buf, size, "%d: %dmv\n", -- i, -- od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i]); -+ size += sysfs_emit_at(buf, size, "OD_VDDGFX_OFFSET:\n"); -+ size += sysfs_emit_at(buf, size, "%dmV\n", -+ od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[0]); - break; - - case SMU_OD_RANGE: -@@ -1335,7 +1333,7 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, - PP_OD_FEATURE_GFX_VF_CURVE, - &min_value, - &max_value); -- size += sysfs_emit_at(buf, size, "VDDC_CURVE: %7dmv %10dmv\n", -+ size += sysfs_emit_at(buf, size, "VDDGFX_OFFSET: %7dmv %10dmv\n", - min_value, max_value); - } - break; -@@ -1484,29 +1482,26 @@ static int smu_v13_0_7_od_edit_dpm_table(struct smu_context *smu, - } - break; - -- case PP_OD_EDIT_VDDC_CURVE: -+ case PP_OD_EDIT_VDDGFX_OFFSET: - if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) { -- dev_warn(adev->dev, "VF curve setting not supported!\n"); -+ dev_warn(adev->dev, "Gfx offset setting not supported!\n"); - return -ENOTSUPP; - } ++ tmp = amdgpu_reset_method; ++ /* It should do a default reset when loading or reloading the driver, ++ * regardless of the module parameter reset_method. ++ */ ++ amdgpu_reset_method = AMD_RESET_METHOD_NONE; ++ r = amdgpu_asic_reset(adev); ++ amdgpu_reset_method = tmp; + if (r) { + dev_err(adev->dev, "asic reset on init failed\n"); + goto failed; +diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +index e1a5ee911..308ebeb43 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +@@ -733,7 +733,7 @@ static int smu_early_init(void *handle) + smu->adev = adev; + smu->pm_enabled = !!amdgpu_dpm; + smu->is_apu = false; +- smu->smu_baco.state = SMU_BACO_STATE_NONE; ++ smu->smu_baco.state = SMU_BACO_STATE_EXIT; + smu->smu_baco.platform_support = false; + smu->user_dpm_profile.fan_mode = -1; + +@@ -1753,31 +1753,10 @@ static int smu_smc_hw_cleanup(struct smu_context *smu) + return 0; + } -- if (input[0] >= PP_NUM_OD_VF_CURVE_POINTS || -- input[0] < 0) -- return -EINVAL; +-static int smu_reset_mp1_state(struct smu_context *smu) +-{ +- struct amdgpu_device *adev = smu->adev; +- int ret = 0; - - smu_v13_0_7_get_od_setting_limits(smu, - PP_OD_FEATURE_GFX_VF_CURVE, - &minimum, - &maximum); -- if (input[1] < minimum || -- input[1] > maximum) { -+ if (input[0] < minimum || -+ input[0] > maximum) { - dev_info(adev->dev, "Voltage offset (%ld) must be within [%d, %d]!\n", -- input[1], minimum, maximum); -+ input[0], minimum, maximum); - return -EINVAL; - } - -- od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[input[0]] = input[1]; -- od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFX_VF_CURVE_BIT; -+ for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++) -+ od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i] = input[0]; -+ od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT); - break; - - case PP_OD_RESTORE_DEFAULT_TABLE: --- -GitLab - - -From 8bad128720ebc69e37f1c66767fb276088ef4fa7 Mon Sep 17 00:00:00 2001 -From: Evan Quan -Date: Wed, 16 Aug 2023 14:51:19 +0800 -Subject: [PATCH] drm/amd/pm: fulfill the support for SMU13 `pp_dpm_dcefclk` - interface - -Fulfill the incomplete SMU13 `pp_dpm_dcefclk` implementation. - -Reported-by: Guan Yu -Signed-off-by: Evan Quan -Acked-by: Alex Deucher ---- - .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 27 +++++++++++++++++++ - .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 27 +++++++++++++++++++ - 2 files changed, 54 insertions(+) +- if ((!adev->in_runpm) && (!adev->in_suspend) && +- (!amdgpu_in_reset(adev))) +- switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { +- case IP_VERSION(13, 0, 0): +- case IP_VERSION(13, 0, 7): +- case IP_VERSION(13, 0, 10): +- ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD); +- break; +- default: +- break; +- } +- +- return ret; +-} +- + static int smu_hw_fini(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct smu_context *smu = adev->powerplay.pp_handle; +- int ret; -diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c -index bd0d5f027cac..5fdb2b3c042a 100644 ---- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c -+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c -@@ -176,6 +176,7 @@ static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { - CLK_MAP(VCLK1, PPCLK_VCLK_1), - CLK_MAP(DCLK, PPCLK_DCLK_0), - CLK_MAP(DCLK1, PPCLK_DCLK_1), -+ CLK_MAP(DCEFCLK, PPCLK_DCFCLK), - }; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; +@@ -1795,15 +1774,7 @@ static int smu_hw_fini(void *handle) - static struct cmn2asic_mapping smu_v13_0_0_feature_mask_map[SMU_FEATURE_COUNT] = { -@@ -707,6 +708,22 @@ static int smu_v13_0_0_set_default_dpm_table(struct smu_context *smu) - pcie_table->num_of_link_levels++; - } + adev->pm.dpm_enabled = false; -+ /* dcefclk dpm table setup */ -+ dpm_table = &dpm_context->dpm_tables.dcef_table; -+ if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_DCN_BIT)) { -+ ret = smu_v13_0_set_single_dpm_table(smu, -+ SMU_DCEFCLK, -+ dpm_table); -+ if (ret) -+ return ret; -+ } else { -+ dpm_table->count = 1; -+ dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dcefclk / 100; -+ dpm_table->dpm_levels[0].enabled = true; -+ dpm_table->min = dpm_table->dpm_levels[0].value; -+ dpm_table->max = dpm_table->dpm_levels[0].value; -+ } -+ - return 0; +- ret = smu_smc_hw_cleanup(smu); +- if (ret) +- return ret; +- +- ret = smu_reset_mp1_state(smu); +- if (ret) +- return ret; +- +- return 0; ++ return smu_smc_hw_cleanup(smu); } -@@ -794,6 +811,9 @@ static int smu_v13_0_0_get_smu_metrics_data(struct smu_context *smu, - case METRICS_CURR_FCLK: - *value = metrics->CurrClock[PPCLK_FCLK]; - break; -+ case METRICS_CURR_DCEFCLK: -+ *value = metrics->CurrClock[PPCLK_DCFCLK]; -+ break; - case METRICS_AVERAGE_GFXCLK: - if (metrics->AverageGfxActivity <= SMU_13_0_0_BUSY_THRESHOLD) - *value = metrics->AverageGfxclkFrequencyPostDs; -@@ -1047,6 +1067,9 @@ static int smu_v13_0_0_get_current_clk_freq_by_table(struct smu_context *smu, - case PPCLK_DCLK_1: - member_type = METRICS_AVERAGE_DCLK1; - break; -+ case PPCLK_DCFCLK: -+ member_type = METRICS_CURR_DCEFCLK; -+ break; - default: - return -EINVAL; - } -@@ -1196,6 +1219,9 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, - case SMU_DCLK1: - single_dpm_table = &(dpm_context->dpm_tables.dclk_table); - break; -+ case SMU_DCEFCLK: -+ single_dpm_table = &(dpm_context->dpm_tables.dcef_table); -+ break; - default: - break; - } -@@ -1209,6 +1235,7 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, - case SMU_VCLK1: - case SMU_DCLK: - case SMU_DCLK1: -+ case SMU_DCEFCLK: - ret = smu_v13_0_0_get_current_clk_freq_by_table(smu, clk_type, &curr_freq); - if (ret) { - dev_err(smu->adev->dev, "Failed to get current clock freq!"); -diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c -index b9b3bf41eed3..12949928e285 100644 ---- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c -+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c -@@ -147,6 +147,7 @@ static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = { - CLK_MAP(VCLK1, PPCLK_VCLK_1), - CLK_MAP(DCLK, PPCLK_DCLK_0), - CLK_MAP(DCLK1, PPCLK_DCLK_1), -+ CLK_MAP(DCEFCLK, PPCLK_DCFCLK), + static void smu_late_fini(void *handle) +diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +index f8b2e6cc2..e8329d157 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h ++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +@@ -419,7 +419,6 @@ enum smu_reset_mode { + enum smu_baco_state { + SMU_BACO_STATE_ENTER = 0, + SMU_BACO_STATE_EXIT, +- SMU_BACO_STATE_NONE, }; - static struct cmn2asic_mapping smu_v13_0_7_feature_mask_map[SMU_FEATURE_COUNT] = { -@@ -696,6 +697,22 @@ static int smu_v13_0_7_set_default_dpm_table(struct smu_context *smu) - pcie_table->num_of_link_levels++; - } - -+ /* dcefclk dpm table setup */ -+ dpm_table = &dpm_context->dpm_tables.dcef_table; -+ if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_DCN_BIT)) { -+ ret = smu_v13_0_set_single_dpm_table(smu, -+ SMU_DCEFCLK, -+ dpm_table); -+ if (ret) -+ return ret; -+ } else { -+ dpm_table->count = 1; -+ dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dcefclk / 100; -+ dpm_table->dpm_levels[0].enabled = true; -+ dpm_table->min = dpm_table->dpm_levels[0].value; -+ dpm_table->max = dpm_table->dpm_levels[0].value; -+ } -+ - return 0; - } - -@@ -777,6 +794,9 @@ static int smu_v13_0_7_get_smu_metrics_data(struct smu_context *smu, - case METRICS_CURR_FCLK: - *value = metrics->CurrClock[PPCLK_FCLK]; - break; -+ case METRICS_CURR_DCEFCLK: -+ *value = metrics->CurrClock[PPCLK_DCFCLK]; -+ break; - case METRICS_AVERAGE_GFXCLK: - *value = metrics->AverageGfxclkFrequencyPreDs; - break; -@@ -1027,6 +1047,9 @@ static int smu_v13_0_7_get_current_clk_freq_by_table(struct smu_context *smu, - case PPCLK_DCLK_1: - member_type = METRICS_CURR_DCLK1; - break; -+ case PPCLK_DCFCLK: -+ member_type = METRICS_CURR_DCEFCLK; -+ break; - default: - return -EINVAL; - } -@@ -1176,6 +1199,9 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, - case SMU_DCLK1: - single_dpm_table = &(dpm_context->dpm_tables.dclk_table); - break; -+ case SMU_DCEFCLK: -+ single_dpm_table = &(dpm_context->dpm_tables.dcef_table); -+ break; - default: - break; - } -@@ -1189,6 +1215,7 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, - case SMU_VCLK1: - case SMU_DCLK: - case SMU_DCLK1: -+ case SMU_DCEFCLK: - ret = smu_v13_0_7_get_current_clk_freq_by_table(smu, clk_type, &curr_freq); - if (ret) { - dev_err(smu->adev->dev, "Failed to get current clock freq!"); --- -GitLab - -From 3a2fb905145e76e4bbb32e90e0c6cd532dafb1b0 Mon Sep 17 00:00:00 2001 -From: Evan Quan -Date: Mon, 14 Aug 2023 10:16:27 +0800 -Subject: [PATCH] Revert "drm/amd/pm: disable the SMU13 OD feature support - temporarily" - -This reverts commit 3592cc20beeece83db4c50a0f400e2dd15139de9. - -The enablement for the new OD mechanism completed. Also, the support for -fan control related OD feature has been added via this new mechanism. -Thus, it is time to bring back the SMU13 OD support. - -Signed-off-by: Evan Quan -Acked-by: Alex Deucher ---- - .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 18 +++--------------- - .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 12 +++--------- - 2 files changed, 6 insertions(+), 24 deletions(-) - + struct smu_baco_context { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c -index c48f81450d24..093962a37688 100644 +index 82c4e1f1c..2ba77b1d1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c -@@ -348,13 +348,10 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu) - table_context->power_play_table; - struct smu_baco_context *smu_baco = &smu->smu_baco; - PPTable_t *pptable = smu->smu_table.driver_pptable; --#if 0 -- PPTable_t *pptable = smu->smu_table.driver_pptable; - const OverDriveLimits_t * const overdrive_upperlimits = - &pptable->SkuTable.OverDriveLimitsBasicMax; - const OverDriveLimits_t * const overdrive_lowerlimits = - &pptable->SkuTable.OverDriveLimitsMin; --#endif +@@ -2772,13 +2766,7 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu, - if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_HARDWAREDC) - smu->dc_controlled_by_gpio = true; -@@ -357,27 +357,18 @@ - smu_baco->maco_support = true; - } - -- /* -- * We are in the transition to a new OD mechanism. -- * Disable the OD feature support for SMU13 temporarily. -- * TODO: get this reverted when new OD mechanism online -- */ --#if 0 - if (!overdrive_lowerlimits->FeatureCtrlMask || - !overdrive_upperlimits->FeatureCtrlMask) - smu->od_enabled = false; - -+ table_context->thermal_controller_type = -+ powerplay_table->thermal_controller_type; -+ - /* - * Instead of having its own buffer space and get overdrive_table copied, - * smu->od_settings just points to the actual overdrive_table - */ - smu->od_settings = &powerplay_table->overdrive_table; --#else -- smu->od_enabled = false; --#endif + switch (mp1_state) { + case PP_MP1_STATE_UNLOAD: +- ret = smu_cmn_send_smc_msg_with_param(smu, +- SMU_MSG_PrepareMp1ForUnload, +- 0x55, NULL); - -- table_context->thermal_controller_type = -- powerplay_table->thermal_controller_type; - - smu->adev->pm.no_fan = - !(pptable->SkuTable.FeaturesToRun[0] & (1 << FEATURE_FAN_CONTROL_BIT)); +- if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT) +- ret = smu_v13_0_disable_pmfw_state(smu); +- ++ ret = smu_cmn_set_mp1_state(smu, mp1_state); + break; + default: + /* Ignore others */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c -index 99bc449799a6..430ad1b05ba3 100644 +index 81eafed76..19c1289d0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c -@@ -338,12 +338,10 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu) - struct smu_baco_context *smu_baco = &smu->smu_baco; - PPTable_t *smc_pptable = table_context->driver_pptable; - BoardTable_t *BoardTable = &smc_pptable->BoardTable; --#if 0 - const OverDriveLimits_t * const overdrive_upperlimits = - &smc_pptable->SkuTable.OverDriveLimitsBasicMax; - const OverDriveLimits_t * const overdrive_lowerlimits = - &smc_pptable->SkuTable.OverDriveLimitsMin; --#endif +@@ -2499,13 +2499,7 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu, - if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_HARDWAREDC) - smu->dc_controlled_by_gpio = true; -@@ -348,22 +348,18 @@ - smu_baco->maco_support = true; - } - --#if 0 - if (!overdrive_lowerlimits->FeatureCtrlMask || - !overdrive_upperlimits->FeatureCtrlMask) - smu->od_enabled = false; - -+ table_context->thermal_controller_type = -+ powerplay_table->thermal_controller_type; -+ - /* - * Instead of having its own buffer space and get overdrive_table copied, - * smu->od_settings just points to the actual overdrive_table - */ - smu->od_settings = &powerplay_table->overdrive_table; --#else -- smu->od_enabled = false; --#endif + switch (mp1_state) { + case PP_MP1_STATE_UNLOAD: +- ret = smu_cmn_send_smc_msg_with_param(smu, +- SMU_MSG_PrepareMp1ForUnload, +- 0x55, NULL); - -- table_context->thermal_controller_type = -- powerplay_table->thermal_controller_type; - - return 0; - } --- -GitLab - -From 072a8dc3b5260ba08ba2e66036c2c63abd77df52 Mon Sep 17 00:00:00 2001 -From: Lijo Lazar -Date: Thu, 24 Aug 2023 17:25:51 +0530 -Subject: [PATCH] drm/amd/pm: Fix clock reporting for SMUv13.0.6 - -On SMU v13.0.6, effective clocks are reported by FW which won't exactly -match with DPM level. Report the current clock based on the values -matching closest to the effective clock. Also, when deep sleep is -applied to a clock, report it with a special level "S:" as in sample -clock levels below - -S: 19Mhz * -0: 615Mhz -1: 800Mhz -2: 888Mhz -3: 1000Mhz - -Signed-off-by: Lijo Lazar -Reviewed-by: Hawking Zhang -Reviewed-by: Evan Quan ---- - .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 159 +++++++----------- - 1 file changed, 62 insertions(+), 97 deletions(-) - -diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c -index c2308783053c..29e1cada7667 100644 ---- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c -+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c -@@ -91,6 +91,8 @@ - #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5 - #define LINK_SPEED_MAX 4 - -+#define SMU_13_0_6_DSCLK_THRESHOLD 100 -+ - static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { - MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), - MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), -@@ -783,13 +785,61 @@ static int smu_v13_0_6_get_current_clk_freq_by_table(struct smu_context *smu, - return smu_v13_0_6_get_smu_metrics_data(smu, member_type, value); - } - -+static int smu_v13_0_6_print_clks(struct smu_context *smu, char *buf, -+ struct smu_13_0_dpm_table *single_dpm_table, -+ uint32_t curr_clk, const char *clk_name) -+{ -+ struct pp_clock_levels_with_latency clocks; -+ int i, ret, size = 0, level = -1; -+ uint32_t clk1, clk2; -+ -+ ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table); -+ if (ret) { -+ dev_err(smu->adev->dev, "Attempt to get %s clk levels failed!", -+ clk_name); -+ return ret; -+ } -+ -+ if (!clocks.num_levels) -+ return -EINVAL; -+ -+ if (curr_clk < SMU_13_0_6_DSCLK_THRESHOLD) { -+ size = sysfs_emit_at(buf, size, "S: %uMhz *\n", curr_clk); -+ for (i = 0; i < clocks.num_levels; i++) -+ size += sysfs_emit_at(buf, size, "%d: %uMhz\n", i, -+ clocks.data[i].clocks_in_khz / -+ 1000); -+ -+ } else { -+ if ((clocks.num_levels == 1) || -+ (curr_clk < (clocks.data[0].clocks_in_khz / 1000))) -+ level = 0; -+ for (i = 0; i < clocks.num_levels; i++) { -+ clk1 = clocks.data[i].clocks_in_khz / 1000; -+ -+ if (i < (clocks.num_levels - 1)) -+ clk2 = clocks.data[i + 1].clocks_in_khz / 1000; -+ -+ if (curr_clk >= clk1 && curr_clk < clk2) { -+ level = (curr_clk - clk1) <= (clk2 - curr_clk) ? -+ i : -+ i + 1; -+ } -+ -+ size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, -+ clk1, (level == i) ? "*" : ""); -+ } -+ } -+ -+ return size; -+} -+ - static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, - enum smu_clk_type type, char *buf) - { -- int i, now, size = 0; -+ int now, size = 0; - int ret = 0; - struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; -- struct pp_clock_levels_with_latency clocks; - struct smu_13_0_dpm_table *single_dpm_table; - struct smu_dpm_context *smu_dpm = &smu->smu_dpm; - struct smu_13_0_dpm_context *dpm_context = NULL; -@@ -852,26 +902,9 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, - } - - single_dpm_table = &(dpm_context->dpm_tables.uclk_table); -- ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table); -- if (ret) { -- dev_err(smu->adev->dev, -- "Attempt to get memory clk levels Failed!"); -- return ret; -- } - -- for (i = 0; i < clocks.num_levels; i++) -- size += sysfs_emit_at( -- buf, size, "%d: %uMhz %s\n", i, -- clocks.data[i].clocks_in_khz / 1000, -- (clocks.num_levels == 1) ? -- "*" : -- (smu_v13_0_6_freqs_in_same_level( -- clocks.data[i].clocks_in_khz / -- 1000, -- now) ? -- "*" : -- "")); -- break; -+ return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now, -+ "mclk"); - - case SMU_SOCCLK: - ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_SOCCLK, -@@ -883,26 +916,9 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, - } - - single_dpm_table = &(dpm_context->dpm_tables.soc_table); -- ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table); -- if (ret) { -- dev_err(smu->adev->dev, -- "Attempt to get socclk levels Failed!"); -- return ret; -- } - -- for (i = 0; i < clocks.num_levels; i++) -- size += sysfs_emit_at( -- buf, size, "%d: %uMhz %s\n", i, -- clocks.data[i].clocks_in_khz / 1000, -- (clocks.num_levels == 1) ? -- "*" : -- (smu_v13_0_6_freqs_in_same_level( -- clocks.data[i].clocks_in_khz / -- 1000, -- now) ? -- "*" : -- "")); -- break; -+ return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now, -+ "socclk"); - - case SMU_FCLK: - ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_FCLK, -@@ -914,26 +930,9 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, - } - - single_dpm_table = &(dpm_context->dpm_tables.fclk_table); -- ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table); -- if (ret) { -- dev_err(smu->adev->dev, -- "Attempt to get fclk levels Failed!"); -- return ret; -- } - -- for (i = 0; i < single_dpm_table->count; i++) -- size += sysfs_emit_at( -- buf, size, "%d: %uMhz %s\n", i, -- single_dpm_table->dpm_levels[i].value, -- (clocks.num_levels == 1) ? -- "*" : -- (smu_v13_0_6_freqs_in_same_level( -- clocks.data[i].clocks_in_khz / -- 1000, -- now) ? -- "*" : -- "")); -- break; -+ return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now, -+ "fclk"); - - case SMU_VCLK: - ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_VCLK, -@@ -945,26 +944,9 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, - } - - single_dpm_table = &(dpm_context->dpm_tables.vclk_table); -- ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table); -- if (ret) { -- dev_err(smu->adev->dev, -- "Attempt to get vclk levels Failed!"); -- return ret; -- } - -- for (i = 0; i < single_dpm_table->count; i++) -- size += sysfs_emit_at( -- buf, size, "%d: %uMhz %s\n", i, -- single_dpm_table->dpm_levels[i].value, -- (clocks.num_levels == 1) ? -- "*" : -- (smu_v13_0_6_freqs_in_same_level( -- clocks.data[i].clocks_in_khz / -- 1000, -- now) ? -- "*" : -- "")); -- break; -+ return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now, -+ "vclk"); - - case SMU_DCLK: - ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_DCLK, -@@ -976,26 +958,9 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, - } - - single_dpm_table = &(dpm_context->dpm_tables.dclk_table); -- ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table); -- if (ret) { -- dev_err(smu->adev->dev, -- "Attempt to get dclk levels Failed!"); -- return ret; -- } - -- for (i = 0; i < single_dpm_table->count; i++) -- size += sysfs_emit_at( -- buf, size, "%d: %uMhz %s\n", i, -- single_dpm_table->dpm_levels[i].value, -- (clocks.num_levels == 1) ? -- "*" : -- (smu_v13_0_6_freqs_in_same_level( -- clocks.data[i].clocks_in_khz / -- 1000, -- now) ? -- "*" : -- "")); -- break; -+ return smu_v13_0_6_print_clks(smu, buf, single_dpm_table, now, -+ "dclk"); - - default: +- if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT) +- ret = smu_v13_0_disable_pmfw_state(smu); +- ++ ret = smu_cmn_set_mp1_state(smu, mp1_state); break; --- -GitLab - + default: + /* Ignore others */ diff --git a/SOURCES/tkg-unprivileged-CLONE_NEWUSER.patch b/SOURCES/tkg-unprivileged-CLONE_NEWUSER.patch index 6fe4c39..c12229d 100644 --- a/SOURCES/tkg-unprivileged-CLONE_NEWUSER.patch +++ b/SOURCES/tkg-unprivileged-CLONE_NEWUSER.patch @@ -69,7 +69,7 @@ index 08969f5aa38d59..ff601cb7a1fae0 100644 @@ -100,6 +100,10 @@ #include #include - + +#ifdef CONFIG_USER_NS +#include +#endif @@ -146,6 +146,6 @@ index 54211dbd516c57..16ca0c1516298d 100644 +int unprivileged_userns_clone; +#endif + - static struct kmem_cache *user_ns_cachep __read_mostly; + static struct kmem_cache *user_ns_cachep __ro_after_init; static DEFINE_MUTEX(userns_state_mutex); diff --git a/SOURCES/valve-gamescope-framerate-control-fixups.patch b/SOURCES/valve-gamescope-framerate-control-fixups.patch new file mode 100644 index 0000000..425ee09 --- /dev/null +++ b/SOURCES/valve-gamescope-framerate-control-fixups.patch @@ -0,0 +1,647 @@ +From 79f7b70729663c5986c84e1a0888f50a55a81093 Mon Sep 17 00:00:00 2001 +From: Thomas Crider +Date: Mon, 18 Dec 2023 03:36:09 -0500 +Subject: [PATCH 1/6] revert 1101185bc50f5e45b8b89300914d9aa35a0c8cbe + +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +index 7dab01803..81672738a 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -6106,6 +6106,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, + + if (recalculate_timing) + drm_mode_set_crtcinfo(&saved_mode, 0); ++ else if (!old_stream) ++ drm_mode_set_crtcinfo(&mode, 0); + + /* + * If scaling is enabled and refresh rate didn't change +@@ -6669,8 +6671,6 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec + goto fail; + } + +- drm_mode_set_crtcinfo(mode, 0); +- + stream = create_validate_stream_for_sink(aconnector, mode, + to_dm_connector_state(connector->state), + NULL); +-- +2.43.0 + +From 38f2149c7e97f379210c658c21124d547e7b503a Mon Sep 17 00:00:00 2001 +From: Simon Ser +Date: Tue, 30 Aug 2022 17:29:43 +0000 +Subject: [PATCH] drm: introduce DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This new kernel capability indicates whether async page-flips are +supported via the atomic uAPI. DRM clients can use it to check +for support before feeding DRM_MODE_PAGE_FLIP_ASYNC to the kernel. + +Make it clear that DRM_CAP_ASYNC_PAGE_FLIP is for legacy uAPI only. + +Signed-off-by: Simon Ser +Cc: Daniel Vetter +Cc: Joshua Ashton +Cc: Melissa Wen +Cc: Alex Deucher +Cc: Harry Wentland +Cc: Nicholas Kazlauskas +Cc: André Almeida +Cc: Ville Syrjälä +Signed-off-by: Cristian Ciocaltea +Link: https://lore.kernel.org/r/20220830172851.269402-6-contact@emersion.fr +--- + drivers/gpu/drm/drm_ioctl.c | 5 +++++ + include/uapi/drm/drm.h | 10 +++++++++- + 2 files changed, 14 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c +index ca2a6e6101dc8..5b1591e2b46c9 100644 +--- a/drivers/gpu/drm/drm_ioctl.c ++++ b/drivers/gpu/drm/drm_ioctl.c +@@ -302,6 +302,11 @@ static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_ + case DRM_CAP_CRTC_IN_VBLANK_EVENT: + req->value = 1; + break; ++ case DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP: ++ req->value = drm_core_check_feature(dev, DRIVER_ATOMIC) && ++ dev->mode_config.async_page_flip && ++ !dev->mode_config.atomic_async_page_flip_not_supported; ++ break; + default: + return -EINVAL; + } +diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h +index 642808520d922..b1962628ecda9 100644 +--- a/include/uapi/drm/drm.h ++++ b/include/uapi/drm/drm.h +@@ -706,7 +706,8 @@ struct drm_gem_open { + /** + * DRM_CAP_ASYNC_PAGE_FLIP + * +- * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC. ++ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy ++ * page-flips. + */ + #define DRM_CAP_ASYNC_PAGE_FLIP 0x7 + /** +@@ -773,6 +773,13 @@ + * :ref:`drm_sync_objects`. + */ + #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 ++/** ++ * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP ++ * ++ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic ++ * commits. ++ */ ++#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15 + + /* DRM_IOCTL_GET_CAP ioctl argument type */ + struct drm_get_cap { +-- +GitLab + +From f6de551227de6244119f9f2bea3ae81543ee7c4f Mon Sep 17 00:00:00 2001 +From: Simon Ser +Date: Tue, 30 Aug 2022 17:29:35 +0000 +Subject: [PATCH] drm: allow DRM_MODE_PAGE_FLIP_ASYNC for atomic commits +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +If the driver supports it, allow user-space to supply the +DRM_MODE_PAGE_FLIP_ASYNC flag to request an async page-flip. +Set drm_crtc_state.async_flip accordingly. + +Document that drivers will reject atomic commits if an async +flip isn't possible. This allows user-space to fall back to +something else. For instance, Xorg falls back to a blit. +Another option is to wait as close to the next vblank as +possible before performing the page-flip to reduce latency. + +v2: document new uAPI + +Signed-off-by: Simon Ser +Co-developed-by: André Almeida +Signed-off-by: André Almeida +Cc: Daniel Vetter +Cc: Joshua Ashton +Cc: Melissa Wen +Cc: Alex Deucher +Cc: Harry Wentland +Cc: Nicholas Kazlauskas +Cc: Ville Syrjälä +Signed-off-by: Cristian Ciocaltea +Link: https://lore.kernel.org/r/20220830172851.269402-5-contact@emersion.fr +--- + drivers/gpu/drm/drm_atomic_uapi.c | 28 +++++++++++++++++++++++++--- + include/uapi/drm/drm_mode.h | 4 ++++ + 2 files changed, 29 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c +index c06d0639d552d..945761968428e 100644 +--- a/drivers/gpu/drm/drm_atomic_uapi.c ++++ b/drivers/gpu/drm/drm_atomic_uapi.c +@@ -1282,6 +1282,18 @@ static void complete_signaling(struct drm_device *dev, + kfree(fence_state); + } + ++static void ++set_async_flip(struct drm_atomic_state *state) ++{ ++ struct drm_crtc *crtc; ++ struct drm_crtc_state *crtc_state; ++ int i; ++ ++ for_each_new_crtc_in_state(state, crtc, crtc_state, i) { ++ crtc_state->async_flip = true; ++ } ++} ++ + int drm_mode_atomic_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) + { +@@ -1322,9 +1334,16 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, + } + + if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) { +- drm_dbg_atomic(dev, +- "commit failed: invalid flag DRM_MODE_PAGE_FLIP_ASYNC\n"); +- return -EINVAL; ++ if (!dev->mode_config.async_page_flip) { ++ drm_dbg_atomic(dev, ++ "commit failed: DRM_MODE_PAGE_FLIP_ASYNC not supported\n"); ++ return -EINVAL; ++ } ++ if (dev->mode_config.atomic_async_page_flip_not_supported) { ++ drm_dbg_atomic(dev, ++ "commit failed: DRM_MODE_PAGE_FLIP_ASYNC not supported with atomic\n"); ++ return -EINVAL; ++ } + } + + /* can't test and expect an event at the same time. */ +@@ -1422,6 +1441,9 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, + if (ret) + goto out; + ++ if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) ++ set_async_flip(state); ++ + if (arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) { + ret = drm_atomic_check_only(state); + } else if (arg->flags & DRM_MODE_ATOMIC_NONBLOCK) { +diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h +index 46becedf5b2fc..f1422c8387224 100644 +--- a/include/uapi/drm/drm_mode.h ++++ b/include/uapi/drm/drm_mode.h +@@ -949,6 +949,10 @@ struct hdr_output_metadata { + * Request that the page-flip is performed as soon as possible, ie. with no + * delay due to waiting for vblank. This may cause tearing to be visible on + * the screen. ++ * ++ * When used with atomic uAPI, the driver will return an error if the hardware ++ * doesn't support performing an asynchronous page-flip for this update. ++ * User-space should handle this, e.g. by falling back to a regular page-flip. + */ + #define DRM_MODE_PAGE_FLIP_ASYNC 0x02 + #define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4 +-- +GitLab + +From 9d923e79d060d8c7218c8229c65c964b7f04e864 Mon Sep 17 00:00:00 2001 +From: Simon Ser +Date: Tue, 30 Aug 2022 17:29:26 +0000 +Subject: [PATCH] drm: introduce + drm_mode_config.atomic_async_page_flip_not_supported +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This new field indicates whether the driver has the necessary logic +to support async page-flips via the atomic uAPI. This is leveraged by +the next commit to allow user-space to use this functionality. + +All atomic drivers setting drm_mode_config.async_page_flip are updated +to also set drm_mode_config.atomic_async_page_flip_not_supported. We +will gradually check and update these drivers to properly handle +drm_crtc_state.async_flip in their atomic logic. + +The goal of this negative flag is the same as +fb_modifiers_not_supported: we want to eventually get rid of all +drivers missing atomic support for async flips. New drivers should not +set this flag, instead they should support atomic async flips (if +they support async flips at all). IOW, we don't want more drivers +with async flip support for legacy but not atomic. + +v2: only set the flag on atomic drivers (remove it on amdgpu DCE and +on radeon) + +Signed-off-by: Simon Ser +Cc: Daniel Vetter +Cc: Joshua Ashton +Cc: Melissa Wen +Cc: Alex Deucher +Cc: Harry Wentland +Cc: Nicholas Kazlauskas +Cc: André Almeida +Cc: Ville Syrjälä +Signed-off-by: Cristian Ciocaltea +Link: https://lore.kernel.org/r/20220830172851.269402-4-contact@emersion.fr +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + + drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c | 1 + + drivers/gpu/drm/i915/display/intel_display_driver.c | 1 + + drivers/gpu/drm/nouveau/nouveau_display.c | 1 + + drivers/gpu/drm/vc4/vc4_kms.c | 1 + + include/drm/drm_mode_config.h | 11 +++++++++++ + 6 files changed, 16 insertions(+) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +index 81672738a..05c404fcc 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -3998,6 +3998,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) + adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* indicates support for immediate flip */ + adev_to_drm(adev)->mode_config.async_page_flip = true; ++ adev_to_drm(adev)->mode_config.atomic_async_page_flip_not_supported = true; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) +diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c +index fa0f9a93d..301b222c4 100644 +--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c ++++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c +@@ -639,6 +639,7 @@ static int atmel_hlcdc_dc_modeset_init(struct drm_device *dev) + dev->mode_config.max_height = dc->desc->max_height; + dev->mode_config.funcs = &mode_config_funcs; + dev->mode_config.async_page_flip = true; ++ dev->mode_config.atomic_async_page_flip_not_supported = true; + + return 0; + } +diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c +index 8f144d4d3..f290c5c2e 100644 +--- a/drivers/gpu/drm/i915/display/intel_display_driver.c ++++ b/drivers/gpu/drm/i915/display/intel_display_driver.c +@@ -126,6 +126,7 @@ + mode_config->helper_private = &intel_mode_config_funcs; + + mode_config->async_page_flip = HAS_ASYNC_FLIPS(i915) && !i915->params.disable_async_page_flip; ++ mode_config->atomic_async_page_flip_not_supported = true; + + /* + * Maximum framebuffer dimensions, chosen to match +diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c +index 99977e5fe..540895dab 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_display.c ++++ b/drivers/gpu/drm/nouveau/nouveau_display.c +@@ -720,6 +720,7 @@ nouveau_display_create(struct drm_device *dev) + dev->mode_config.async_page_flip = false; + else + dev->mode_config.async_page_flip = true; ++ dev->mode_config.atomic_async_page_flip_not_supported = true; + + drm_kms_helper_poll_init(dev); + drm_kms_helper_poll_disable(dev); +diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c +index 5495f2a94..5b6b311e7 100644 +--- a/drivers/gpu/drm/vc4/vc4_kms.c ++++ b/drivers/gpu/drm/vc4/vc4_kms.c +@@ -1068,6 +1068,7 @@ int vc4_kms_load(struct drm_device *dev) + dev->mode_config.helper_private = &vc4_mode_config_helpers; + dev->mode_config.preferred_depth = 24; + dev->mode_config.async_page_flip = true; ++ dev->mode_config.atomic_async_page_flip_not_supported = true; + dev->mode_config.normalize_zpos = true; + + ret = vc4_ctm_obj_init(vc4); +diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h +index 973119a91..47b005671 100644 +--- a/include/drm/drm_mode_config.h ++++ b/include/drm/drm_mode_config.h +@@ -918,6 +918,17 @@ struct drm_mode_config { + */ + bool async_page_flip; + ++ /** ++ * @atomic_async_page_flip_not_supported: ++ * ++ * If true, the driver does not support async page-flips with the ++ * atomic uAPI. This is only used by old drivers which haven't yet ++ * accomodated for &drm_crtc_state.async_flip in their atomic logic, ++ * even if they have &drm_mode_config.async_page_flip set to true. ++ * New drivers shall not set this flag. ++ */ ++ bool atomic_async_page_flip_not_supported; ++ + /** + * @fb_modifiers_not_supported: + * +-- +2.43.0 + +From 24ac301d6208f1135644fe32514994799e79a6a0 Mon Sep 17 00:00:00 2001 +From: Simon Ser +Date: Tue, 30 Aug 2022 17:29:52 +0000 +Subject: [PATCH] amd/display: indicate support for atomic async page-flips on + DC +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +amdgpu_dm_commit_planes() already sets the flip_immediate flag for +async page-flips. This flag is used to set the UNP_FLIP_CONTROL +register. Thus, no additional change is required to handle async +page-flips with the atomic uAPI. + +v2: make it clear this commit is about DC and not only DCN + +Signed-off-by: Simon Ser +Cc: Joshua Ashton +Cc: Melissa Wen +Cc: Alex Deucher +Cc: Harry Wentland +Cc: Nicholas Kazlauskas +Cc: André Almeida +Signed-off-by: Cristian Ciocaltea +Link: https://lore.kernel.org/r/20220830172851.269402-7-contact@emersion.fr +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +index 27a1e3a0046c9..a003e796aa183 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -3980,7 +3980,6 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) + adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* indicates support for immediate flip */ + adev_to_drm(adev)->mode_config.async_page_flip = true; +- adev_to_drm(adev)->mode_config.atomic_async_page_flip_not_supported = true; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) +-- +GitLab + +From 32993fef83542e3bea66ed3ceec4944b3ae9d4f1 Mon Sep 17 00:00:00 2001 +From: Joshua Ashton +Date: Mon, 14 Nov 2022 19:52:30 +0000 +Subject: [PATCH] drm/amd/display: Always set crtcinfo from + create_stream_for_sink + +Given that we always pass dm_state into here now, this won't ever +trigger anymore. + +This is needed for we will always fail mode validation with invalid +clocks or link bandwidth errors. + +Signed-off-by: Joshua Ashton +Signed-off-by: Harry Wentland +Reviewed-by: Harry Wentland + +Cc: Pekka Paalanen +Cc: Sebastian Wick +Cc: Vitaly.Prosyak@amd.com +Cc: Joshua Ashton +Cc: Simon Ser +Cc: Melissa Wen +Cc: dri-devel@lists.freedesktop.org +Cc: amd-gfx@lists.freedesktop.org +Signed-off-by: Alex Deucher +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +index 81672738a..8eb14c74a 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -6106,7 +6106,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, + + if (recalculate_timing) + drm_mode_set_crtcinfo(&saved_mode, 0); +- else if (!old_stream) ++ else + drm_mode_set_crtcinfo(&mode, 0); + + /* +-- +2.43.0 + +From 0af59135c2a9e05af87bc82f492fab13fff52fbd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Andr=C3=A9=20Almeida?= +Date: Wed, 22 Nov 2023 13:19:38 -0300 +Subject: [PATCH] drm: Refuse to async flip with atomic prop changes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Given that prop changes may lead to modesetting, which would defeat the +fast path of the async flip, refuse any atomic prop change for async +flips in atomic API. The only exception is the framebuffer ID to flip +to. Currently the only plane type supported is the primary one. + +Signed-off-by: André Almeida +Reviewed-by: Simon Ser +--- + drivers/gpu/drm/drm_atomic_uapi.c | 52 +++++++++++++++++++++++++++-- + drivers/gpu/drm/drm_crtc_internal.h | 2 +- + drivers/gpu/drm/drm_mode_object.c | 2 +- + 3 files changed, 51 insertions(+), 5 deletions(-) + +diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c +index 37caa6c33e22b3..86083184ac6bb2 100644 +--- a/drivers/gpu/drm/drm_atomic_uapi.c ++++ b/drivers/gpu/drm/drm_atomic_uapi.c +@@ -964,13 +964,28 @@ int drm_atomic_connector_commit_dpms(struct drm_atomic_state *state, + return ret; + } + ++static int drm_atomic_check_prop_changes(int ret, uint64_t old_val, uint64_t prop_value, ++ struct drm_property *prop) ++{ ++ if (ret != 0 || old_val != prop_value) { ++ drm_dbg_atomic(prop->dev, ++ "[PROP:%d:%s] No prop can be changed during async flip\n", ++ prop->base.id, prop->name); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ + int drm_atomic_set_property(struct drm_atomic_state *state, + struct drm_file *file_priv, + struct drm_mode_object *obj, + struct drm_property *prop, +- uint64_t prop_value) ++ uint64_t prop_value, ++ bool async_flip) + { + struct drm_mode_object *ref; ++ uint64_t old_val; + int ret; + + if (!drm_property_change_valid_get(prop, prop_value, &ref)) +@@ -987,6 +1002,13 @@ int drm_atomic_set_property(struct drm_atomic_state *state, + break; + } + ++ if (async_flip) { ++ ret = drm_atomic_connector_get_property(connector, connector_state, ++ prop, &old_val); ++ ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); ++ break; ++ } ++ + ret = drm_atomic_connector_set_property(connector, + connector_state, file_priv, + prop, prop_value); +@@ -1002,6 +1024,13 @@ int drm_atomic_set_property(struct drm_atomic_state *state, + break; + } + ++ if (async_flip) { ++ ret = drm_atomic_crtc_get_property(crtc, crtc_state, ++ prop, &old_val); ++ ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); ++ break; ++ } ++ + ret = drm_atomic_crtc_set_property(crtc, + crtc_state, prop, prop_value); + break; +@@ -1009,6 +1038,7 @@ int drm_atomic_set_property(struct drm_atomic_state *state, + case DRM_MODE_OBJECT_PLANE: { + struct drm_plane *plane = obj_to_plane(obj); + struct drm_plane_state *plane_state; ++ struct drm_mode_config *config = &plane->dev->mode_config; + + plane_state = drm_atomic_get_plane_state(state, plane); + if (IS_ERR(plane_state)) { +@@ -1016,6 +1046,21 @@ int drm_atomic_set_property(struct drm_atomic_state *state, + break; + } + ++ if (async_flip && prop != config->prop_fb_id) { ++ ret = drm_atomic_plane_get_property(plane, plane_state, ++ prop, &old_val); ++ ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); ++ break; ++ } ++ ++ if (async_flip && plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY) { ++ drm_dbg_atomic(prop->dev, ++ "[OBJECT:%d] Only primary planes can be changed during async flip\n", ++ obj->id); ++ ret = -EINVAL; ++ break; ++ } ++ + ret = drm_atomic_plane_set_property(plane, + plane_state, file_priv, + prop, prop_value); +@@ -1295,6 +1340,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, + struct drm_out_fence_state *fence_state; + int ret = 0; + unsigned int i, j, num_fences; ++ bool async_flip = false; + + /* disallow for drivers not supporting atomic: */ + if (!drm_core_check_feature(dev, DRIVER_ATOMIC)) +@@ -1408,8 +1454,8 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, + goto out; + } + +- ret = drm_atomic_set_property(state, file_priv, +- obj, prop, prop_value); ++ ret = drm_atomic_set_property(state, file_priv, obj, ++ prop, prop_value, async_flip); + if (ret) { + drm_mode_object_put(obj); + goto out; +diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h +index 501a10edd0e1dc..381130cebe811c 100644 +--- a/drivers/gpu/drm/drm_crtc_internal.h ++++ b/drivers/gpu/drm/drm_crtc_internal.h +@@ -251,7 +251,7 @@ int drm_atomic_set_property(struct drm_atomic_state *state, + struct drm_file *file_priv, + struct drm_mode_object *obj, + struct drm_property *prop, +- uint64_t prop_value); ++ uint64_t prop_value, bool async_flip); + int drm_atomic_get_property(struct drm_mode_object *obj, + struct drm_property *property, uint64_t *val); + +diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c +index ac0d2ce3f87041..0e8355063eee36 100644 +--- a/drivers/gpu/drm/drm_mode_object.c ++++ b/drivers/gpu/drm/drm_mode_object.c +@@ -538,7 +538,7 @@ static int set_property_atomic(struct drm_mode_object *obj, + obj_to_connector(obj), + prop_value); + } else { +- ret = drm_atomic_set_property(state, file_priv, obj, prop, prop_value); ++ ret = drm_atomic_set_property(state, file_priv, obj, prop, prop_value, false); + if (ret) + goto out; + ret = drm_atomic_commit(state); +From 1edf3fbbeb36440e1222c2fe0e8127fb804c5278 Mon Sep 17 00:00:00 2001 +From: Hamza Mahfooz +Date: Fri, 4 Aug 2023 11:13:04 -0400 +Subject: [PATCH] drm/amd/display: ensure async flips are only accepted for + fast updates + +We should be checking to see if async flips are supported in +amdgpu_dm_atomic_check() (i.e. not dm_crtc_helper_atomic_check()). Also, +async flipping isn't supported if a plane's framebuffer changes memory +domains during an atomic commit. So, move the check from +dm_crtc_helper_atomic_check() to amdgpu_dm_atomic_check() and check if +the memory domain has changed in amdgpu_dm_atomic_check(). + +Cc: stable@vger.kernel.org +Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2733 +Fixes: c1e18c44dc7f ("drm/amd/display: only accept async flips for fast updates") +Reviewed-by: Harry Wentland +Signed-off-by: Hamza Mahfooz +Signed-off-by: Alex Deucher +(cherry picked from commit a7c0cad0dc060bb77e9c9d235d68441b0fc69507) +Signed-off-by: Cristian Ciocaltea +--- + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 12 ------------ + 1 file changed, 12 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +index be1ebe826442a4..4b223db0cf2fe8 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +@@ -473,18 +473,6 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, + return -EINVAL; + } + +- /* +- * Only allow async flips for fast updates that don't change the FB +- * pitch, the DCC state, rotation, etc. +- */ +- if (crtc_state->async_flip && +- dm_crtc_state->update_type != UPDATE_TYPE_FAST) { +- drm_dbg_atomic(crtc->dev, +- "[CRTC:%d:%s] async flips are only supported for fast updates\n", +- crtc->base.id, crtc->name); +- return -EINVAL; +- } +- + /* In some use cases, like reset, no stream is attached */ + if (!dm_crtc_state->stream) + return 0; diff --git a/SOURCES/winesync.patch b/SOURCES/winesync.patch index 5b2eaf3..459bf54 100644 --- a/SOURCES/winesync.patch +++ b/SOURCES/winesync.patch @@ -15,9 +15,9 @@ diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 94e9fb4cdd76..4f9e3d80a6e8 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig -@@ -561,6 +561,17 @@ - This driver can also be built as a module. If so, the module - will be called tps6594-pfsm. +@@ -519,6 +519,17 @@ + + If you do not intend to run this kernel as a guest, say N. +config WINESYNC + tristate "Synchronization primitives for Wine" @@ -30,9 +30,9 @@ index 94e9fb4cdd76..4f9e3d80a6e8 100644 + + If unsure, say N. + - source "drivers/misc/c2port/Kconfig" - source "drivers/misc/eeprom/Kconfig" - source "drivers/misc/cb710/Kconfig" + config TMR_MANAGER + tristate "Select TMR Manager" + depends on MICROBLAZE && MB_MANAGER diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 2be8542616dd..d061fe45407b 100644 --- a/drivers/misc/Makefile @@ -2175,17 +2175,17 @@ Subject: [PATCH 13/34] selftests: winesync: Add some tests for semaphore create mode 100644 tools/testing/selftests/drivers/winesync/winesync.c diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile -index c852eb40c4f7..a366016d6254 100644 +index 8247a7c69..553c949dc 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile -@@ -14,6 +14,7 @@ TARGETS += drivers/dma-buf +@@ -18,6 +18,7 @@ TARGETS += drivers/dma-buf TARGETS += drivers/s390x/uvdevice TARGETS += drivers/net/bonding TARGETS += drivers/net/team +TARGETS += drivers/winesync + TARGETS += dt TARGETS += efivarfs TARGETS += exec - TARGETS += fchmodat2 diff --git a/tools/testing/selftests/drivers/winesync/Makefile b/tools/testing/selftests/drivers/winesync/Makefile new file mode 100644 index 000000000000..43b39fdeea10 @@ -3286,7 +3286,7 @@ diff --git a/MAINTAINERS b/MAINTAINERS index 72b9654f764c..ff31beb17835 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -22976,6 +22976,15 @@ +@@ -23391,6 +23391,15 @@ S: Maintained F: drivers/media/rc/winbond-cir.c diff --git a/SPECS/kernel.spec b/SPECS/kernel.spec index cd80e42..0256d09 100644 --- a/SPECS/kernel.spec +++ b/SPECS/kernel.spec @@ -160,18 +160,18 @@ Summary: The Linux kernel # the --with-release option overrides this setting.) %define debugbuildsenabled 1 %define buildid .fsync -%define specrpmversion 6.6.14 -%define specversion 6.6.14 -%define patchversion 6.6 +%define specrpmversion 6.7.3 +%define specversion 6.7.3 +%define patchversion 6.7 %define pkgrelease 200 %define kversion 6 -%define tarfile_release 6.6.14 +%define tarfile_release 6.7.3 # This is needed to do merge window version magic -%define patchlevel 6 +%define patchlevel 7 # This allows pkg_release to have configurable %%{?dist} tag -%define specrelease 202%{?buildid}%{?dist} +%define specrelease 201%{?buildid}%{?dist} # This defines the kabi tarball version -%define kabiversion 6.6.14 +%define kabiversion 6.7.3 # If this variable is set to 1, a bpf selftests build failure will cause a # fatal kernel package build error @@ -224,6 +224,8 @@ Summary: The Linux kernel %define with_cross_headers %{?_without_cross_headers: 0} %{?!_without_cross_headers: 1} # perf %define with_perf %{?_without_perf: 0} %{?!_without_perf: 1} +# libperf +%define with_libperf %{?_without_libperf: 0} %{?!_without_libperf: 1} # tools %define with_tools %{?_without_tools: 0} %{?!_without_tools: 1} # bpf tool @@ -285,7 +287,7 @@ Summary: The Linux kernel # Want to build a vanilla kernel build without any non-upstream patches? %define with_vanilla %{?_with_vanilla: 1} %{?!_with_vanilla: 0} -%ifarch x86_64 +%ifarch x86_64 aarch64 %define with_efiuki %{?_without_efiuki: 0} %{?!_without_efiuki: 1} %else %define with_efiuki 0 @@ -300,9 +302,10 @@ Summary: The Linux kernel # no stablelist %define with_kernel_abi_stablelists 0 # Fedora builds these separately -%define with_perf 0 -%define with_tools 0 -%define with_bpftool 0 +%define with_perf 1 +%define with_libperf 1 +%define with_tools 1 +%define with_bpftool 1 # No realtime fedora variants %define with_realtime 0 %define with_arm64_64k 0 @@ -388,6 +391,7 @@ Summary: The Linux kernel %define with_realtime 0 %define with_vdso_install 0 %define with_perf 0 +%define with_libperf 0 %define with_tools 0 %define with_bpftool 0 %define with_kernel_abi_stablelists 0 @@ -402,6 +406,7 @@ Summary: The Linux kernel %define with_base 0 %define with_vdso_install 0 %define with_perf 0 +%define with_libperf 0 %define with_tools 0 %define with_bpftool 0 %define with_kernel_abi_stablelists 0 @@ -417,6 +422,7 @@ Summary: The Linux kernel %define with_debuginfo 0 %define with_vdso_install 0 %define with_perf 0 +%define with_libperf 0 %define with_tools 0 %define with_bpftool 0 %define with_kernel_abi_stablelists 0 @@ -480,6 +486,7 @@ Summary: The Linux kernel %define with_cross_headers 0 %define with_tools 0 %define with_perf 0 +%define with_libperf 0 %define with_bpftool 0 %define with_selftests 0 %define with_debug 0 @@ -572,6 +579,7 @@ Summary: The Linux kernel %define with_debuginfo 0 %define with_perf 0 +%define with_libperf 0 %define with_tools 0 %define with_bpftool 0 %define with_selftests 0 @@ -696,7 +704,11 @@ BuildRequires: opencsd-devel >= 1.0.0 BuildRequires: python3-docutils BuildRequires: gettext ncurses-devel BuildRequires: libcap-devel libcap-ng-devel +# The following are rtla requirements +BuildRequires: python3-docutils +BuildRequires: libtraceevent-devel BuildRequires: libtracefs-devel + %ifnarch s390x BuildRequires: pciutils-devel %endif @@ -713,6 +725,9 @@ BuildRequires: zlib-devel binutils-devel %endif %if %{with_selftests} BuildRequires: clang llvm-devel fuse-devel +%ifarch x86_64 +BuildRequires: lld +%endif BuildRequires: libcap-devel libcap-ng-devel rsync libmnl-devel BuildRequires: numactl-devel %endif @@ -785,7 +800,7 @@ BuildRequires: binutils BuildRequires: lvm2 BuildRequires: systemd-boot-unsigned # For systemd-stub and systemd-pcrphase -BuildRequires: systemd-udev +BuildRequires: systemd-udev >= 252-1 # For TPM operations in UKI initramfs BuildRequires: tpm2-tools %endif @@ -799,7 +814,7 @@ BuildRequires: tpm2-tools Source0: linux-%{tarfile_release}.tar.xz Source1: Makefile.rhelver - +Source2: kernel.changelog # Name of the packaged file containing signing key %ifarch ppc64le @@ -919,8 +934,6 @@ Source77: partial-clang_lto-aarch64-debug-snip.config Source80: generate_all_configs.sh Source81: process_configs.sh -Source82: update_scripts.sh - Source84: mod-internal.list Source85: mod-partner.list @@ -984,9 +997,8 @@ Patch200: tkg.patch Patch202: fsync.patch Patch203: OpenRGB.patch Patch206: amdgpu-si-cik-default.patch +Patch207: nouveau-gsp-default.patch Patch208: winesync.patch -Patch209: tkg-BBRv2.patch -Patch210: tkg-bcachefs.patch Patch211: tkg-misc-additions.patch Patch212: tkg-unprivileged-CLONE_NEWUSER.patch @@ -1007,17 +1019,27 @@ Patch319: v10-0002-HID-asus-make-asus_kbd_init-generic-remove-rog_n.patch Patch320: v10-0003-HID-asus-add-ROG-Ally-N-Key-ID-and-keycodes.patch Patch321: v10-0004-HID-asus-add-ROG-Ally-xpad-settings.patch Patch323: rog-ally-bmc150.patch +Patch404: rog-ally-gyro-fix.patch # hdr: https://github.com/CachyOS/kernel-patches Patch326: 0001-amd-hdr.patch Patch327: 0001-add-acpi_call.patch Patch328: uinput.patch +# fixes framerate control in gamescope +# also fixes https://gitlab.freedesktop.org/drm/amd/-/issues/2733 +Patch330: valve-gamescope-framerate-control-fixups.patch + +# fixes HAINAN amdgpu card not being bootable +# https://gitlab.freedesktop.org/drm/amd/-/issues/1839 +Patch331: amdgpu-HAINAN-variant-fixup.patch + # steamdeck oled patches Patch310: steamdeck-oled-wifi.patch -Patch311: steamdeck-oled-bt.patch Patch312: steamdeck-oled-audio.patch -Patch314: steamdeck-oled-hw-quirks.patch + +# t2 macbook patches +Patch332: t2linux.patch # temporary patches Patch401: 0001-Remove-REBAR-size-quirk-for-Sapphire-RX-5600-XT-Puls.patch @@ -1179,6 +1201,23 @@ This package provides debug information for the perf python bindings. # with_perf %endif +%if %{with_libperf} +%package -n libperf +Summary: The perf library from kernel source +License: GPL-2.0-only AND (LGPL-2.1-only OR BSD-2-Clause) +%description -n libperf +This package contains the kernel source perf library. + +%package -n libperf-devel +Summary: Developement files for the perf library from kernel source +License: GPL-2.0-only AND (LGPL-2.1-only OR BSD-2-Clause) +%description -n libperf-devel +This package includes libraries and header files needed for development +of applications which use perf library from kernel source. + +# with_libperf +%endif + %if %{with_tools} %package -n %{package_name}-tools Summary: Assortment of tools for the Linux kernel @@ -1233,13 +1272,14 @@ This package provides debug information for package %{package_name}-tools. %if 0%{gemini} Epoch: %{gemini} %endif -Summary: RTLA: Real-Time Linux Analysis tools +Summary: Real-Time Linux Analysis tools +Requires: libtraceevent +Requires: libtracefs %description -n rtla -The rtla tool is a meta-tool that includes a set of commands that -aims to analyze the real-time properties of Linux. But, instead of -testing Linux as a black box, rtla leverages kernel tracing -capabilities to provide precise information about the properties -and root causes of unexpected results. +The rtla meta-tool includes a set of commands that aims to analyze +the real-time properties of Linux. Instead of testing Linux as a black box, +rtla leverages kernel tracing capabilities to provide precise information +about the properties and root causes of unexpected results. %package -n rv Summary: RV: Runtime Verification @@ -1256,18 +1296,14 @@ analysing the logical and timing behavior of Linux. %if %{with_bpftool} -%define bpftoolversion 7.3.0 - %package -n bpftool Summary: Inspection and simple manipulation of eBPF programs and maps -Version: %{bpftoolversion} %description -n bpftool This package contains the bpftool, which allows inspection and simple manipulation of eBPF programs and maps. %package -n bpftool-debuginfo Summary: Debug information for package bpftool -Version: %{bpftoolversion} Group: Development/Debug Requires: %{name}-debuginfo-common-%{_target_cpu} = %{specrpmversion}-%{release} AutoReqProv: no @@ -1287,7 +1323,7 @@ This package provides debug information for the bpftool package. %package selftests-internal Summary: Kernel samples and selftests -Requires: binutils, bpftool, iproute-tc, nmap-ncat, python3, fuse-libs +Requires: binutils, bpftool, iproute-tc, nmap-ncat, python3, fuse-libs, keyutils %description selftests-internal Kernel sample programs and selftests. @@ -1297,6 +1333,8 @@ Kernel sample programs and selftests. # of matching the pattern against the symlinks file. %{expand:%%global _find_debuginfo_opts %{?_find_debuginfo_opts} -p '.*%%{_libexecdir}/(ksamples|kselftests)/.*|XXX' -o selftests-debuginfo.list} +%define __requires_exclude ^liburandom_read.so.*$ + # with_selftests %endif @@ -1716,6 +1754,26 @@ Prebuilt debug unified kernel image for virtual machines. Prebuilt default unified kernel image for virtual machines. %endif +%if %{with_arm64_16k} && %{with_debug} && %{with_efiuki} +%description 16k-debug-uki-virt +Prebuilt 16k debug unified kernel image for virtual machines. +%endif + +%if %{with_arm64_16k_base} && %{with_efiuki} +%description 16k-uki-virt +Prebuilt 16k unified kernel image for virtual machines. +%endif + +%if %{with_arm64_64k} && %{with_debug} && %{with_efiuki} +%description 64k-debug-uki-virt +Prebuilt 64k debug unified kernel image for virtual machines. +%endif + +%if %{with_arm64_64k_base} && %{with_efiuki} +%description 64k-uki-virt +Prebuilt 64k unified kernel image for virtual machines. +%endif + %if %{with_ipaclones} %kernel_ipaclones_package %endif @@ -1790,9 +1848,8 @@ ApplyOptionalPatch tkg.patch ApplyOptionalPatch fsync.patch ApplyOptionalPatch OpenRGB.patch ApplyOptionalPatch amdgpu-si-cik-default.patch +ApplyOptionalPatch nouveau-gsp-default.patch ApplyOptionalPatch winesync.patch -ApplyOptionalPatch tkg-BBRv2.patch -ApplyOptionalPatch tkg-bcachefs.patch ApplyOptionalPatch tkg-misc-additions.patch ApplyOptionalPatch tkg-unprivileged-CLONE_NEWUSER.patch @@ -1813,17 +1870,27 @@ ApplyOptionalPatch v10-0002-HID-asus-make-asus_kbd_init-generic-remove-rog_n.pat ApplyOptionalPatch v10-0003-HID-asus-add-ROG-Ally-N-Key-ID-and-keycodes.patch ApplyOptionalPatch v10-0004-HID-asus-add-ROG-Ally-xpad-settings.patch ApplyOptionalPatch rog-ally-bmc150.patch +ApplyOptionalPatch rog-ally-gyro-fix.patch # hdr: https://github.com/CachyOS/kernel-patches ApplyOptionalPatch 0001-amd-hdr.patch ApplyOptionalPatch 0001-add-acpi_call.patch ApplyOptionalPatch uinput.patch +# fixes framerate control in gamescope +# also fixes https://gitlab.freedesktop.org/drm/amd/-/issues/2733 +ApplyOptionalPatch valve-gamescope-framerate-control-fixups.patch + +# fixes HAINAN amdgpu card not being bootable +# https://gitlab.freedesktop.org/drm/amd/-/issues/1839 +ApplyOptionalPatch amdgpu-HAINAN-variant-fixup.patch + # steamdeck oled patches ApplyOptionalPatch steamdeck-oled-wifi.patch -ApplyOptionalPatch steamdeck-oled-bt.patch ApplyOptionalPatch steamdeck-oled-audio.patch -ApplyOptionalPatch steamdeck-oled-hw-quirks.patch + +# t2 macbook patches +ApplyOptionalPatch t2linux.patch # temporary patches ApplyOptionalPatch 0001-Remove-REBAR-size-quirk-for-Sapphire-RX-5600-XT-Puls.patch @@ -1980,20 +2047,28 @@ done %endif RHJOBS=$RPM_BUILD_NCPUS SPECPACKAGE_NAME=%{name} ./process_configs.sh $OPTS %{specrpmversion} -cp %{SOURCE82} . -RPM_SOURCE_DIR=$RPM_SOURCE_DIR ./update_scripts.sh %{primary_target} - # We may want to override files from the primary target in case of building # against a flavour of it (eg. centos not rhel), thus override it here if # necessary +update_scripts() { + TARGET="$1" + + for i in "$RPM_SOURCE_DIR"/*."$TARGET"; do + NEW=${i%."$TARGET"} + cp "$i" "$(basename "$NEW")" + done +} + +update_target=%{primary_target} if [ "%{primary_target}" == "rhel" ]; then +: # no-op to avoid empty if-fi error %if 0%{?centos} + update_scripts $update_target echo "Updating scripts/sources to centos version" - RPM_SOURCE_DIR=$RPM_SOURCE_DIR ./update_scripts.sh centos -%else - echo "Not updating scripts/sources to centos version" + update_target=centos %endif fi +update_scripts $update_target # end of kernel config %endif @@ -2804,8 +2879,9 @@ InitBuildVars %ifarch aarch64 %global perf_build_extra_opts CORESIGHT=1 %endif +# LIBBPF_DYNAMIC=1 temporarily removed from the next command, it breaks the build on f39 and 38 %global perf_make \ - %{__make} %{?make_opts} EXTRA_CFLAGS="${RPM_OPT_FLAGS}" LDFLAGS="%{__global_ldflags} -Wl,-E" %{?cross_opts} -C tools/perf V=1 NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX32=1 WERROR=0 NO_LIBUNWIND=1 HAVE_CPLUS_DEMANGLE=1 NO_GTK2=1 NO_STRLCPY=1 NO_BIONIC=1 LIBBPF_DYNAMIC=1 LIBTRACEEVENT_DYNAMIC=1 %{?perf_build_extra_opts} prefix=%{_prefix} PYTHON=%{__python3} + %{__make} %{?make_opts} EXTRA_CFLAGS="${RPM_OPT_FLAGS}" LDFLAGS="%{__global_ldflags} -Wl,-E" %{?cross_opts} -C tools/perf V=1 NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX32=1 WERROR=0 NO_LIBUNWIND=1 HAVE_CPLUS_DEMANGLE=1 NO_GTK2=1 NO_STRLCPY=1 NO_BIONIC=1 LIBTRACEEVENT_DYNAMIC=1 %{?perf_build_extra_opts} prefix=%{_prefix} PYTHON=%{__python3} %if %{with_perf} # perf # make sure check-headers.sh is executable @@ -3014,6 +3090,8 @@ docdir=$RPM_BUILD_ROOT%{_datadir}/doc/kernel-doc-%{specversion}-%{pkgrelease} # copy the source over mkdir -p $docdir tar -h -f - --exclude=man --exclude='.*' -c Documentation | tar xf - -C $docdir +cat %{SOURCE2} | xz > $docdir/kernel.changelog.xz +chmod 0644 $docdir/kernel.changelog.xz # with_doc %endif @@ -3092,6 +3170,13 @@ mkdir -p %{buildroot}/%{_mandir}/man1 rm -rf %{buildroot}%{_libdir}/traceevent %endif +%if %{with_libperf} +pushd tools/lib/perf +%{tools_make} DESTDIR=%{buildroot} prefix=%{_prefix} libdir=%{_libdir} install install_headers +rm -rf %{buildroot}%{_libdir}/libperf.a +popd +%endif + %if %{with_tools} %ifarch %{cpupowerarchs} %{make} -C tools/power/cpupower DESTDIR=$RPM_BUILD_ROOT libdir=%{_libdir} mandir=%{_mandir} CPUFREQ_BENCH=false install @@ -3446,7 +3531,7 @@ fi\ %if %{with_up_base} %kernel_variant_preun -%kernel_variant_post -r kernel-smp +%kernel_variant_post %endif %if %{with_zfcpdump} @@ -3474,6 +3559,16 @@ fi\ %kernel_variant_post -v 16k-debug %endif +%if %{with_arm64_16k} && %{with_debug} && %{with_efiuki} +%kernel_variant_posttrans -v 16k-debug -u virt +%kernel_variant_preun -v 16k-debug -u virt +%endif + +%if %{with_arm64_16k_base} && %{with_efiuki} +%kernel_variant_posttrans -v 16k -u virt +%kernel_variant_preun -v 16k -u virt +%endif + %if %{with_arm64_64k_base} %kernel_variant_preun -v 64k %kernel_variant_post -v 64k @@ -3484,9 +3579,19 @@ fi\ %kernel_variant_post -v 64k-debug %endif +%if %{with_arm64_64k} && %{with_debug} && %{with_efiuki} +%kernel_variant_posttrans -v 64k-debug -u virt +%kernel_variant_preun -v 64k-debug -u virt +%endif + +%if %{with_arm64_64k_base} && %{with_efiuki} +%kernel_variant_posttrans -v 64k -u virt +%kernel_variant_preun -v 64k -u virt +%endif + %if %{with_realtime_base} %kernel_variant_preun -v rt -%kernel_variant_post -v rt -r (kernel|kernel-smp) +%kernel_variant_post -v rt -r kernel %kernel_kvm_post rt %endif @@ -3531,6 +3636,7 @@ fi\ %{_datadir}/doc/kernel-doc-%{specversion}-%{pkgrelease}/Documentation/* %dir %{_datadir}/doc/kernel-doc-%{specversion}-%{pkgrelease}/Documentation %dir %{_datadir}/doc/kernel-doc-%{specversion}-%{pkgrelease} +%{_datadir}/doc/kernel-doc-%{specversion}-%{pkgrelease}/kernel.changelog.xz %endif %if %{with_perf} @@ -3556,6 +3662,37 @@ fi\ # with_perf %endif +%if %{with_libperf} +%files -n libperf +%{_libdir}/libperf.so.0 +%{_libdir}/libperf.so.0.0.1 + +%files -n libperf-devel +%{_libdir}/libperf.so +%{_libdir}/pkgconfig/libperf.pc +%{_includedir}/internal/*.h +%{_includedir}/perf/bpf_perf.h +%{_includedir}/perf/core.h +%{_includedir}/perf/cpumap.h +%{_includedir}/perf/perf_dlfilter.h +%{_includedir}/perf/event.h +%{_includedir}/perf/evlist.h +%{_includedir}/perf/evsel.h +%{_includedir}/perf/mmap.h +%{_includedir}/perf/threadmap.h +%{_mandir}/man3/libperf.3.gz +%{_mandir}/man7/libperf-counting.7.gz +%{_mandir}/man7/libperf-sampling.7.gz +%{_docdir}/libperf/examples/sampling.c +%{_docdir}/libperf/examples/counting.c +%{_docdir}/libperf/html/libperf.html +%{_docdir}/libperf/html/libperf-counting.html +%{_docdir}/libperf/html/libperf-sampling.html + +# with_libperf +%endif + + %if %{with_tools} %ifnarch %{cpupowerarchs} %files -n %{package_name}-tools @@ -3599,12 +3736,14 @@ fi\ %ifarch %{cpupowerarchs} %files -n %{package_name}-tools-libs -%{_libdir}/libcpupower.so.1 +%{_libdir}/libcpupower.so.0 %{_libdir}/libcpupower.so.0.0.1 %files -n %{package_name}-tools-libs-devel %{_libdir}/libcpupower.so %{_includedir}/cpufreq.h +%{_includedir}/cpuidle.h +%{_includedir}/powercap.h %endif %files -n rtla @@ -3690,7 +3829,7 @@ fi\ /lib/modules/%{KVERREL}%{?3:+%{3}}/dtb \ %ghost /%{image_install_path}/dtb-%{KVERREL}%{?3:+%{3}} \ %endif\ -%attr(0600, root, root) /lib/modules/%{KVERREL}%{?3:+%{3}}/System.map\ +/lib/modules/%{KVERREL}%{?3:+%{3}}/System.map\ %ghost %attr(0600, root, root) /boot/System.map-%{KVERREL}%{?3:+%{3}}\ %dir /lib/modules\ %dir /lib/modules/%{KVERREL}%{?3:+%{3}}\ @@ -3750,11 +3889,11 @@ fi\ %else\ %if %{with_efiuki}\ %{expand:%%files %{?3:%{3}-}uki-virt}\ -%attr(0600, root, root) /lib/modules/%{KVERREL}%{?3:+%{3}}/System.map\ +/lib/modules/%{KVERREL}%{?3:+%{3}}/System.map\ /lib/modules/%{KVERREL}%{?3:+%{3}}/symvers.%compext\ /lib/modules/%{KVERREL}%{?3:+%{3}}/config\ /lib/modules/%{KVERREL}%{?3:+%{3}}/modules.builtin*\ -/lib/modules/%{KVERREL}%{?3:+%{3}}/%{?-k:%{-k*}}%{!?-k:vmlinuz}-virt.efi\ +%attr(0644, root, root) /lib/modules/%{KVERREL}%{?3:+%{3}}/%{?-k:%{-k*}}%{!?-k:vmlinuz}-virt.efi\ %ghost /%{image_install_path}/efi/EFI/Linux/%{?-k:%{-k*}}%{!?-k:*}-%{KVERREL}%{?3:+%{3}}.efi\ %endif\ %endif\ @@ -3830,85 +3969,318 @@ fi\ # # %changelog -* Thu Feb 01 2024 Jan Drögehoff - 6.6.14-202.fsync.1 -- disable valve15 patch: introduced amdgpu regressions +* Wed Feb 07 2024 Jan Drögehoff - 6.7.3-201.fsync +- kernel-fsync v6.7.3 + +* Wed Jan 31 2024 Justin M. Forbes [6.7.3-0] +- Config update for stable backport (Justin M. Forbes) +- Add some more bugs to BugsFixed (Justin M. Forbes) +- Linux v6.7.3 + +* Fri Jan 26 2024 Justin M. Forbes [6.7.2-0] +- redhat: spec: Fix update_scripts run for CentOS builds (Neal Gompa) +- BPF Tool versioning seems incompatible with stable Fedroa (Justin M. Forbes) +- Linux v6.7.2 + +* Sat Jan 20 2024 Justin M. Forbes [6.7.1-0] +- Fix up requires for UKI (Justin M. Forbes) +- Fix up libperf install (Justin M. Forbes) +- Drop soname for libcpupower.so since we reverted the bump (Justin M. Forbes) +- Turn on CONFIG_TCP_AO for Fedora (Justin M. Forbes) +- temporarily remove LIBBPF_DYNAMIC=1 from perf build (Thorsten Leemhuis) +- add libperf packages and enable perf, libperf, tools and bpftool packages (Thorsten Leemhuis) +- Revert "cpupower: Bump soname version" (Justin M. Forbes) +- Turn on Renesas RZ for Fedora IOT rhbz2257913 (Justin M. Forbes) +- Add bugs to BugsFixed (Justin M. Forbes) +- wifi: ath10k: fix NULL pointer dereference in ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev() (Xingyuan Mo) +- drivers/firmware: skip simpledrm if nvidia-drm.modeset=1 is set (Javier Martinez Canillas) +- Basic scaffolding to create a kernel-headers package (Justin M. Forbes) +- Initial config for fedora-6.7 branch (Justin M. Forbes) +- Reset RHEL_RELEASE for 6.8 series (Justin M. Forbes) +- common: cleanup MX3_IPU (Peter Robinson) +- all: The Octeon MDIO driver is aarch64/mips (Peter Robinson) +- common: rtc: remove bq4802 config (Peter Robinson) +- common: de-dupe MARVELL_GTI_WDT (Peter Robinson) +- all: Remove CAN_BXCAN (Peter Robinson) +- common: cleanup SND_SOC_ROCKCHIP (Peter Robinson) +- common: move RHEL DP83867_PHY to common (Peter Robinson) +- common: Make ASYMMETRIC_KEY_TYPE enable explicit (Peter Robinson) +- common: Disable aarch64 ARCH_MA35 universally (Peter Robinson) +- common: arm64: enable Tegra234 pinctrl driver (Peter Robinson) +- rhel: arm64: Enable qoriq thermal driver (Peter Robinson) +- common: aarch64: Cleanup some i.MX8 config options (Peter Robinson) +- all: EEPROM_LEGACY has been removed (Peter Robinson) +- all: rmeove AppleTalk hardware configs (Peter Robinson) +- all: cleanup: remove references to SLOB (Peter Robinson) +- all: cleanup: Drop unnessary BRCMSTB configs (Peter Robinson) +- all: net: remove retired network schedulers (Peter Robinson) +- all: cleanup removed CONFIG_IMA_TRUSTED_KEYRING (Peter Robinson) +- BuildRequires: lld for build with selftests for x86 (Jan Stancek) +- spec: add keyutils to selftest-internal subpackage requirements (Artem Savkov) [2166911] +- redhat/spec: exclude liburandom_read.so from requires (Artem Savkov) [2120968] +- rtla: sync summary text with upstream and update Requires (Jan Stancek) +- uki-virt: add systemd-sysext dracut module (Gerd Hoffmann) +- uki-virt: add virtiofs dracut module (Gerd Hoffmann) +- common: disable the FB device creation (Peter Robinson) +- s390x: There's no FB on Z-series (Peter Robinson) +- Linux v6.7.1 + +* Mon Jan 08 2024 Fedora Kernel Team [6.7.0-68] +- fedora: aarch64: enable SM_VIDEOCC_8350 (Peter Robinson) +- Linux v6.7.0 + +* Sun Jan 07 2024 Fedora Kernel Team [6.7.0-0.rc8.52b1853b080a.67] +- Linux v6.7.0-0.rc8.52b1853b080a + +* Sat Jan 06 2024 Fedora Kernel Team [6.7.0-0.rc8.95c8a35f1c01.66] +- fedora: arm64: enable ethernet on newer TI industrial (Peter Robinson) +- fedora: arm64: Disable VIDEO_IMX_MEDIA (Peter Robinson) +- fedora: use common config for Siemens Simatic IPC (Peter Robinson) +- fedora: arm: enable Rockchip SPI flash (Peter Robinson) +- fedora: arm64: enable DRM_TI_SN65DSI83 (Peter Robinson) +- Linux v6.7.0-0.rc8.95c8a35f1c01 + +* Fri Jan 05 2024 Fedora Kernel Team [6.7.0-0.rc8.1f874787ed9a.65] +- Linux v6.7.0-0.rc8.1f874787ed9a + +* Thu Jan 04 2024 Fedora Kernel Team [6.7.0-0.rc8.ac865f00af29.64] +- Linux v6.7.0-0.rc8.ac865f00af29 + +* Wed Jan 03 2024 Fedora Kernel Team [6.7.0-0.rc8.63] +- kernel.spec: remove kernel-smp reference from scripts (Jan Stancek) + +* Tue Jan 02 2024 Fedora Kernel Team [6.7.0-0.rc8.62] +- redhat: do not compress the full kernel changelog in the src.rpm (Herton R. Krzesinski) + +* Mon Jan 01 2024 Fedora Kernel Team [6.7.0-0.rc8.61] +- Linux v6.7.0-0.rc8 + +* Sun Dec 31 2023 Fedora Kernel Team [6.7.0-0.rc7.453f5db0619e.60] +- Linux v6.7.0-0.rc7.453f5db0619e + +* Sat Dec 30 2023 Fedora Kernel Team [6.7.0-0.rc7.f016f7547aee.59] +- Auto consolidate configs for the 6.7 cycle (Justin M. Forbes) +- Linux v6.7.0-0.rc7.f016f7547aee + +* Fri Dec 29 2023 Fedora Kernel Team [6.7.0-0.rc7.8735c7c84d1b.58] +- Linux v6.7.0-0.rc7.8735c7c84d1b + +* Thu Dec 28 2023 Fedora Kernel Team [6.7.0-0.rc7.f5837722ffec.57] +- Linux v6.7.0-0.rc7.f5837722ffec + +* Tue Dec 26 2023 Fedora Kernel Team [6.7.0-0.rc7.fbafc3e621c3.56] +- Linux v6.7.0-0.rc7.fbafc3e621c3 + +* Mon Dec 25 2023 Fedora Kernel Team [6.7.0-0.rc7.55] +- Enable sound for a line of Huawei laptops (TomZanna) + +* Sun Dec 24 2023 Fedora Kernel Team [6.7.0-0.rc7.54] +- Linux v6.7.0-0.rc7 + +* Sat Dec 23 2023 Fedora Kernel Team [6.7.0-0.rc6.5254c0cbc92d.53] +- Linux v6.7.0-0.rc6.5254c0cbc92d + +* Fri Dec 22 2023 Fedora Kernel Team [6.7.0-0.rc6.24e0d2e527a3.52] +- fedora: a few cleanups and driver enablements (Peter Robinson) +- fedora: arm64: cleanup Allwinner Pinctrl drivers (Peter Robinson) +- fedora: aarch64: Enable some DW drivers (Peter Robinson) +- Linux v6.7.0-0.rc6.24e0d2e527a3 -* Mon Jan 29 2024 Jan Drögehoff - 6.6.14-201.fsync -- kernel-fsync v6.6.14 +* Thu Dec 21 2023 Fedora Kernel Team [6.7.0-0.rc6.a4aebe936554.51] +- redhat: ship all the changelog from source git into kernel-doc (Herton R. Krzesinski) +- redhat: create an empty changelog file when changing its name (Herton R. Krzesinski) +- Linux v6.7.0-0.rc6.a4aebe936554 -* Fri Jan 26 2024 Augusto Caringi [6.6.14-0] -- Add some CVE fixes staged for 6.6.14 (Justin M. Forbes) -- Linux v6.6.14 +* Wed Dec 20 2023 Fedora Kernel Team [6.7.0-0.rc6.55cb5f43689d.50] +- redhat/self-test: Remove --all from git query (Prarit Bhargava) +- Linux v6.7.0-0.rc6.55cb5f43689d -* Sat Jan 20 2024 Justin M. Forbes [6.6.13-0] -- Linux v6.6.13 +* Tue Dec 19 2023 Fedora Kernel Team [6.7.0-0.rc6.2cf4f94d8e86.49] +- Linux v6.7.0-0.rc6.2cf4f94d8e86 -* Mon Jan 15 2024 Augusto Caringi [6.6.12-0] -- Add CVE Fixes to BugsFixed for 6.6.12 (Justin M. Forbes) -- ida: Fix crash in ida_free when the bitmap is empty (Matthew Wilcox (Oracle)) -- wifi: ath10k: fix NULL pointer dereference in ath10k_wmi_tlv_op_pull_mgmt_tx_compl_ev() (Xingyuan Mo) -- Linux v6.6.12 +* Mon Dec 18 2023 Fedora Kernel Team [6.7.0-0.rc6.48] +- Disable accel drivers for Fedora x86 (Kate Hsuan) +- redhat: scripts: An automation script for disabling unused driver for x86 (Kate Hsuan) +- Fix up Fedora LJCA configs and filters (Justin M. Forbes) +- Linux v6.7.0-0.rc6 -* Wed Jan 10 2024 Augusto Caringi [6.6.11-0] -- Linux v6.6.11 +* Sun Dec 17 2023 Fedora Kernel Team [6.7.0-0.rc5.3b8a9b2e6809.47] +- Linux v6.7.0-0.rc5.3b8a9b2e6809 -* Fri Jan 05 2024 Augusto Caringi [6.6.10-0] -- Linux v6.6.10 +* Sat Dec 16 2023 Fedora Kernel Team [6.7.0-0.rc5.c8e97fc6b4c0.46] +- Fedora configs for 6.7 (Justin M. Forbes) +- Linux v6.7.0-0.rc5.c8e97fc6b4c0 -* Mon Jan 01 2024 Justin M. Forbes [6.6.9-0] -- ALSA: hda: cs35l41: Add notification support into component binding (Stefan Binding) -- ALSA: hda: cs35l41: Support mute notifications for CS35L41 HDA (Stefan Binding) -- Linux v6.6.9 +* Fri Dec 15 2023 Fedora Kernel Team [6.7.0-0.rc5.3f7168591ebf.45] +- Linux v6.7.0-0.rc5.3f7168591ebf -* Wed Dec 20 2023 Augusto Caringi [6.6.8-0] -- Add BugsFixed entries for rhbz#2254797 and #rhbz2254704 (Justin M. Forbes) -- Add support for various laptops using CS35L41 HDA without _DSD (Stefan Binding) -- fedora: arm64: Enable required AllWinner pinctrl drivers (Peter Robinson) -- fedora: arm64: cleanup Allwinner Pinctrl drivers (Peter Robinson) -- fedora: aarch64: Enable some DW drivers (Peter Robinson) -- Basic scaffolding to create a kernel-headers package (Justin M. Forbes) -- Linux v6.6.8 +* Thu Dec 14 2023 Fedora Kernel Team [6.7.0-0.rc5.5bd7ef53ffe5.44] +- Linux v6.7.0-0.rc5.5bd7ef53ffe5 -* Wed Dec 13 2023 Augusto Caringi [6.6.7-0] -- Add rhbz#2253632 rhbz#2253633 to BugsFixed (Justin M. Forbes) +* Wed Dec 13 2023 Fedora Kernel Team [6.7.0-0.rc5.88035e5694a8.43] +- Some Fedora config updates for MLX5 (Justin M. Forbes) - Turn on DRM_ACCEL drivers for Fedora (Justin M. Forbes) -- Linux v6.6.7 +- Linux v6.7.0-0.rc5.88035e5694a8 -* Mon Dec 11 2023 Augusto Caringi [6.6.6-0] +* Tue Dec 12 2023 Fedora Kernel Team [6.7.0-0.rc5.26aff849438c.42] +- redhat: enable the kfence test (Nico Pache) +- Linux v6.7.0-0.rc5.26aff849438c + +* Mon Dec 11 2023 Fedora Kernel Team [6.7.0-0.rc5.41] - redhat/configs: Enable UCLAMP_TASK for PipeWire and WirePlumber (Neal Gompa) -- Linux v6.6.6 +- Linux v6.7.0-0.rc5 + +* Sun Dec 10 2023 Fedora Kernel Team [6.7.0-0.rc4.c527f5606aa5.40] +- Linux v6.7.0-0.rc4.c527f5606aa5 + +* Sat Dec 09 2023 Fedora Kernel Team [6.7.0-0.rc4.f2e8a57ee903.39] +- Linux v6.7.0-0.rc4.f2e8a57ee903 + +* Fri Dec 08 2023 Fedora Kernel Team [6.7.0-0.rc4.5e3f5b81de80.38] +- Turn on CONFIG_SECURITY_DMESG_RESTRICT for Fedora (Justin M. Forbes) +- Linux v6.7.0-0.rc4.5e3f5b81de80 + +* Wed Dec 06 2023 Fedora Kernel Team [6.7.0-0.rc4.bee0e7762ad2.37] +- Turn off shellcheck for the fedora-stable-release script (Justin M. Forbes) + +* Tue Dec 05 2023 Fedora Kernel Team [6.7.0-0.rc4.bee0e7762ad2.36] +- Add some initial Fedora stable branch script to redhat/scripts/fedora/ (Justin M. Forbes) +- Linux v6.7.0-0.rc4.bee0e7762ad2 + +* Mon Dec 04 2023 Fedora Kernel Team [6.7.0-0.rc4.35] +- Linux v6.7.0-0.rc4 + +* Sun Dec 03 2023 Fedora Kernel Team [6.7.0-0.rc3.968f35f4ab1c.34] +- Linux v6.7.0-0.rc3.968f35f4ab1c + +* Sat Dec 02 2023 Fedora Kernel Team [6.7.0-0.rc3.815fb87b7530.33] +- redhat: disable iptables-legacy compatibility layer (Florian Westphal) +- redhat: disable dccp conntrack support (Florian Westphal) +- configs: enable netfilter_netlink_hook in fedora too (Florian Westphal) +- Linux v6.7.0-0.rc3.815fb87b7530 + +* Fri Dec 01 2023 Fedora Kernel Team [6.7.0-0.rc3.994d5c58e50e.32] +- ext4: Mark mounting fs-verity filesystems as tech-preview (Alexander Larsson) +- erofs: Add tech preview markers at mount (Alexander Larsson) +- Enable fs-verity (Alexander Larsson) +- Enable erofs (Alexander Larsson) +- aarch64: enable uki (Gerd Hoffmann) +- redhat: enable CONFIG_SND_SOC_INTEL_SOF_DA7219_MACH as a module for x86 (Patrick Talbert) +- Turn CONFIG_MFD_CS42L43_SDW on for RHEL (Justin M. Forbes) +- Linux v6.7.0-0.rc3.994d5c58e50e -* Fri Dec 08 2023 Augusto Caringi [6.6.5-0] -- Add io_uring CVE for 6.6.5 (Justin M. Forbes) -- Linux v6.6.5 +* Thu Nov 30 2023 Fedora Kernel Team [6.7.0-0.rc3.3b47bc037bd4.31] +- Linux v6.7.0-0.rc3.3b47bc037bd4 -* Sun Dec 03 2023 Justin M. Forbes [6.6.4-0] -- redhat: Fix macro for kernel-uki-virt flavor (Neal Gompa) -- Change the uki reqs for Fedora (Justin M. Forbes) -- Linux v6.6.4 +* Wed Nov 29 2023 Fedora Kernel Team [6.7.0-0.rc3.18d46e76d7c2.30] +- Enable cryptographic acceleration config flags for PowerPC (Mamatha Inamdar) +- Also make vmlinuz-virt.efi world readable (Zbigniew Jędrzejewski-Szmek) +- Drop custom mode for System.map file (Zbigniew Jędrzejewski-Szmek) +- Linux v6.7.0-0.rc3.18d46e76d7c2 -* Tue Nov 28 2023 Justin M. Forbes [6.6.3-0] -- Add BugsFixed for 6.6.3 (Justin M. Forbes) -- Update BugsFixed (Justin M. Forbes) +* Tue Nov 28 2023 Fedora Kernel Team [6.7.0-0.rc3.df60cee26a2e.29] +- Add drm_exec_test to mod-internal.list for depmod to succeed (Mika Penttilä) +- RHEL 9.4 DRM backport (upto v6.6 kernel), sync Kconfigs (Mika Penttilä) +- Linux v6.7.0-0.rc3.df60cee26a2e + +* Mon Nov 27 2023 Fedora Kernel Team [6.7.0-0.rc3.28] +- Linux v6.7.0-0.rc3 + +* Sun Nov 26 2023 Fedora Kernel Team [6.7.0-0.rc2.090472ed9c92.27] +- Linux v6.7.0-0.rc2.090472ed9c92 + +* Sat Nov 25 2023 Fedora Kernel Team [6.7.0-0.rc2.0f5cc96c367f.26] +- Linux v6.7.0-0.rc2.0f5cc96c367f + +* Fri Nov 24 2023 Fedora Kernel Team [6.7.0-0.rc2.f1a09972a45a.25] +- Linux v6.7.0-0.rc2.f1a09972a45a + +* Thu Nov 23 2023 Fedora Kernel Team [6.7.0-0.rc2.9b6de136b5f0.24] - Turn on USB_DWC3 for Fedora (rhbz 2250955) (Justin M. Forbes) -- Revert "netfilter: nf_tables: remove catchall element in GC sync path" (Justin M. Forbes) -- More BugsFixed (Justin M. Forbes) -- netfilter: nf_tables: remove catchall element in GC sync path (Pablo Neira Ayuso) -- frop the build number back to 200 for fedora-srpm.sh (Justin M. Forbes) -- ACPI: video: Use acpi_device_fix_up_power_children() (Hans de Goede) -- ACPI: PM: Add acpi_device_fix_up_power_children() function (Hans de Goede) -- Linux v6.6.3 - -* Mon Nov 20 2023 Justin M. Forbes [6.6.2-0] -- Add bug for AMD ACPI alarm (Justin M. Forbes) -- rtc: cmos: Use ACPI alarm for non-Intel x86 systems too (Mario Limonciello) -- Add bluetooth fixes to BugsFixed (Justin M. Forbes) -- Drop F37 from release targets as it will not rebase to 6.6 (Justin M. Forbes) -- Linux v6.6.2 - -* Wed Nov 08 2023 Justin M. Forbes [6.6.1-0] -- drivers/firmware: skip simpledrm if nvidia-drm.modeset=1 is set (Javier Martinez Canillas) -- Added required files for rebase (Augusto Caringi) +- Linux v6.7.0-0.rc2.9b6de136b5f0 + +* Wed Nov 22 2023 Fedora Kernel Team [6.7.0-0.rc2.c2d5304e6c64.23] +- redhat/configs: Move IOMMUFD to common (Alex Williamson) +- redhat: Really remove cpupower files (Prarit Bhargava) +- redhat: remove update_scripts.sh (Prarit Bhargava) +- Linux v6.7.0-0.rc2.c2d5304e6c64 + +* Mon Nov 20 2023 Fedora Kernel Team [6.7.0-0.rc2.22] +- Fix s390 zfcpfdump bpf build failures for cgroups (Don Zickus) +- Linux v6.7.0-0.rc2 + +* Sun Nov 19 2023 Fedora Kernel Team [6.7.0-0.rc1.037266a5f723.21] +- Linux v6.7.0-0.rc1.037266a5f723 + +* Sat Nov 18 2023 Fedora Kernel Team [6.7.0-0.rc1.791c8ab095f7.20] +- Linux v6.7.0-0.rc1.791c8ab095f7 + +* Fri Nov 17 2023 Fedora Kernel Team [6.7.0-0.rc1.7475e51b8796.19] +- Linux v6.7.0-0.rc1.7475e51b8796 + +* Wed Nov 15 2023 Fedora Kernel Team [6.7.0-0.rc1.c42d9eeef8e5.18] +- Linux v6.7.0-0.rc1.c42d9eeef8e5 + +* Tue Nov 14 2023 Fedora Kernel Team [6.7.0-0.rc1.9bacdd8996c7.17] +- Linux v6.7.0-0.rc1.9bacdd8996c7 + +* Mon Nov 13 2023 Fedora Kernel Team [6.7.0-0.rc1.16] +- Linux v6.7.0-0.rc1 + +* Sun Nov 12 2023 Fedora Kernel Team [6.7.0-0.rc0.1b907d050735.15] +- Linux v6.7.0-0.rc0.1b907d050735 + +* Sat Nov 11 2023 Fedora Kernel Team [6.7.0-0.rc0.3ca112b71f35.14] +- Flip CONFIG_NVME_AUTH to m in pending (Justin M. Forbes) +- Linux v6.7.0-0.rc0.3ca112b71f35 + +* Fri Nov 10 2023 Fedora Kernel Team [6.7.0-0.rc0.89cdf9d55601.13] +- Linux v6.7.0-0.rc0.89cdf9d55601 + +* Thu Nov 09 2023 Fedora Kernel Team [6.7.0-0.rc0.6bc986ab839c.12] +- Linux v6.7.0-0.rc0.6bc986ab839c + +* Wed Nov 08 2023 Fedora Kernel Team [6.7.0-0.rc0.305230142ae0.11] +- Turn CONFIG_SND_SOC_INTEL_AVS_MACH_RT5514 on for Fedora x86 (Jason Montleon) +- kernel/rh_messages.c: Mark functions as possibly unused (Prarit Bhargava) +- Add snd-hda-cirrus-scodec-test to mod-internal.list (Scott Weaver) +- Linux v6.7.0-0.rc0.305230142ae0 + +* Tue Nov 07 2023 Fedora Kernel Team [6.7.0-0.rc0.be3ca57cfb77.10] +- Turn off BPF_SYSCALL in pending for zfcpdump (Justin M. Forbes) +- Linux v6.7.0-0.rc0.be3ca57cfb77 + +* Mon Nov 06 2023 Fedora Kernel Team [6.7.0-0.rc0.d2f51b3516da.9] +- Linux v6.7.0-0.rc0.d2f51b3516da + +* Sun Nov 05 2023 Fedora Kernel Team [6.7.0-0.rc0.1c41041124bd.8] +- Linux v6.7.0-0.rc0.1c41041124bd + +* Sat Nov 04 2023 Fedora Kernel Team [6.7.0-0.rc0.90b0c2b2edd1.7] +- Add mean_and_variance_test to mod-internal.list (Justin M. Forbes) +- Add cfg80211-tests and mac80211-tests to mod-internal.list (Justin M. Forbes) +- Linux v6.7.0-0.rc0.90b0c2b2edd1 + +* Fri Nov 03 2023 Fedora Kernel Team [6.7.0-0.rc0.8f6f76a6a29f.6] +- Turn on CONFIG_MFD_CS42L43_SDW for RHEL in pending (Justin M. Forbes) +- Linux v6.7.0-0.rc0.8f6f76a6a29f + +* Fri Nov 03 2023 Fedora Kernel Team [6.7.0-0.rc0.21e80f3841c0.5] +- Turn on bcachefs for Fedora (Justin M. Forbes) +- redhat: configs: fedora: Enable QSEECOM and friends (Andrew Halaney) + +* Thu Nov 02 2023 Fedora Kernel Team [6.7.0-0.rc0.21e80f3841c0.4] +- Add clk-fractional-divider_test to mod-internal.list (Thorsten Leemhuis) +- Add gso_test to mod-internal.list (Thorsten Leemhuis) +- Add property-entry-test to mod-internal.list (Thorsten Leemhuis) +- Linux v6.7.0-0.rc0.21e80f3841c0 + +* Wed Nov 01 2023 Fedora Kernel Team [6.7.0-0.rc0.8bc9e6515183.3] +- Fedora 6.7 configs part 1 (Justin M. Forbes) +- Trim changelog after version bump (Justin M. Forbes) +- Linux v6.7.0-0.rc0.8bc9e6515183 + +* Tue Oct 31 2023 Fedora Kernel Team [6.7.0-0.rc0.5a6a09e97199.2] - Reset RHEL_RELEASE for rebase (Justin M. Forbes) - [Scheduled job] Catch config mismatches early during upstream merge (Don Zickus) - redhat/self-test: Update data for KABI xz change (Prarit Bhargava) @@ -3916,61 +4288,19 @@ fi\ - redhat/kernel.spec.template: Switch KABI compression to xz (Prarit Bhargava) - redhat: self-test: Use a more complete SRPM file suffix (Andrew Halaney) - redhat: makefile: remove stray rpmbuild --without (Eric Chanudet) -- Linux v6.6.1 - -* Mon Oct 30 2023 Fedora Kernel Team [6.6.0-61] -- Linux v6.6.0 - -* Sun Oct 29 2023 Fedora Kernel Team [6.6.0-0.rc7.2af9b20dbb39.60] -- Linux v6.6.0-0.rc7.2af9b20dbb39 - -* Sat Oct 28 2023 Fedora Kernel Team [6.6.0-0.rc7.56567a20b22b.59] - Consolidate configs into common for 6.6 (Justin M. Forbes) -- Linux v6.6.0-0.rc7.56567a20b22b - -* Fri Oct 27 2023 Fedora Kernel Team [6.6.0-0.rc7.750b95887e56.58] -- Linux v6.6.0-0.rc7.750b95887e56 - -* Thu Oct 26 2023 Fedora Kernel Team [6.6.0-0.rc7.611da07b89fd.57] - Updated Fedora configs (Justin M. Forbes) - Turn on UFSHCD for Fedora x86 (Justin M. Forbes) -- Linux v6.6.0-0.rc7.611da07b89fd - -* Wed Oct 25 2023 Fedora Kernel Team [6.6.0-0.rc7.4f82870119a4.56] - redhat: configs: generic: x86: Disable CONFIG_VIDEO_OV01A10 for x86 platform (Hans de Goede) -- Linux v6.6.0-0.rc7.4f82870119a4 - -* Tue Oct 24 2023 Fedora Kernel Team [6.6.0-0.rc7.d88520ad73b7.55] - redhat: remove pending-rhel CONFIG_XFS_ASSERT_FATAL file (Patrick Talbert) - New configs in fs/xfs (Fedora Kernel Team) - crypto: rng - Override drivers/char/random in FIPS mode (Herbert Xu) - random: Add hook to override device reads and getrandom(2) (Herbert Xu) -- Linux v6.6.0-0.rc7.d88520ad73b7 - -* Mon Oct 23 2023 Fedora Kernel Team [6.6.0-0.rc7.54] -- Linux v6.6.0-0.rc7 - -* Sun Oct 22 2023 Fedora Kernel Team [6.6.0-0.rc6.1acfd2bd3f0d.53] -- Linux v6.6.0-0.rc6.1acfd2bd3f0d - -* Sat Oct 21 2023 Fedora Kernel Team [6.6.0-0.rc6.9c5d00cb7b6b.52] -- Linux v6.6.0-0.rc6.9c5d00cb7b6b - -* Fri Oct 20 2023 Fedora Kernel Team [6.6.0-0.rc6.ce55c22ec8b2.51] - redhat/configs: share CONFIG_ARM64_ERRATUM_2966298 between rhel and fedora (Mark Salter) - configs: Remove S390 IOMMU config options that no longer exist (Jerry Snitselaar) - redhat: docs: clarify where bugs and issues are created (Scott Weaver) - redhat/scripts/rh-dist-git.sh does not take any arguments: fix error message (Denys Vlasenko) - Add target_branch for gen_config_patches.sh (Don Zickus) -- Linux v6.6.0-0.rc6.ce55c22ec8b2 - -* Thu Oct 19 2023 Fedora Kernel Team [6.6.0-0.rc6.dd72f9c7e512.50] -- Linux v6.6.0-0.rc6.dd72f9c7e512 - -* Wed Oct 18 2023 Fedora Kernel Team [6.6.0-0.rc6.06dc10eae55b.49] -- Linux v6.6.0-0.rc6.06dc10eae55b - -* Tue Oct 17 2023 Fedora Kernel Team [6.6.0-0.rc6.213f891525c2.48] - redhat: disable kunit by default (Nico Pache) - redhat/configs: enable the AMD_PMF driver for RHEL (David Arcari) - Make CONFIG_ADDRESS_MASKING consistent between fedora and rhel (Chris von Recklinghausen) @@ -3980,77 +4310,18 @@ fi\ - CI: provide descriptive pipeline name for scheduled pipelines (Michael Hofmann) - CI: use job templates for variant variables (Michael Hofmann) - redhat/kernel.spec.template: simplify __modsign_install_post (Jan Stancek) -- Linux v6.6.0-0.rc6.213f891525c2 - -* Mon Oct 16 2023 Fedora Kernel Team [6.6.0-0.rc6.47] -- Linux v6.6.0-0.rc6 - -* Sun Oct 15 2023 Fedora Kernel Team [6.6.0-0.rc5.9a3dad63edbe.46] - Fedora filter updates after configs (Justin M. Forbes) - Fedora configs for 6.6 (Justin M. Forbes) -- Linux v6.6.0-0.rc5.9a3dad63edbe - -* Sat Oct 14 2023 Fedora Kernel Team [6.6.0-0.rc5.727fb8376504.45] -- Linux v6.6.0-0.rc5.727fb8376504 - -* Fri Oct 13 2023 Fedora Kernel Team [6.6.0-0.rc5.10a6e5feccb8.44] -- Linux v6.6.0-0.rc5.10a6e5feccb8 - -* Thu Oct 12 2023 Fedora Kernel Team [6.6.0-0.rc5.401644852d0b.43] -- Linux v6.6.0-0.rc5.401644852d0b - -* Wed Oct 11 2023 Fedora Kernel Team [6.6.0-0.rc5.1c8b86a3799f.42] -- Linux v6.6.0-0.rc5.1c8b86a3799f - -* Tue Oct 10 2023 Fedora Kernel Team [6.6.0-0.rc5.41] - redhat/configs: Freescale Layerscape SoC family (Steve Best) - Add clang MR/baseline pipelines (Michael Hofmann) - -* Mon Oct 09 2023 Fedora Kernel Team [6.6.0-0.rc5.40] - CI: Remove unused kpet_tree_family (Nikolai Kondrashov) -- Linux v6.6.0-0.rc5 - -* Sun Oct 08 2023 Fedora Kernel Team [6.6.0-0.rc4.b9ddbb0cde2a.39] -- Linux v6.6.0-0.rc4.b9ddbb0cde2a - -* Sat Oct 07 2023 Fedora Kernel Team [6.6.0-0.rc4.82714078aee4.38] -- Linux v6.6.0-0.rc4.82714078aee4 - -* Fri Oct 06 2023 Fedora Kernel Team [6.6.0-0.rc4.b78b18fb8ee1.37] - Add clang config framework (Don Zickus) - Apply partial snippet configs to all configs (Don Zickus) - Remove unpackaged kgcov config files (Don Zickus) - redhat/configs: enable missing Kconfig options for Qualcomm RideSX4 (Brian Masney) - enable CONFIG_ADDRESS_MASKING for x86_64 (Chris von Recklinghausen) -- Linux v6.6.0-0.rc4.b78b18fb8ee1 - -* Thu Oct 05 2023 Fedora Kernel Team [6.6.0-0.rc4.3006adf3be79.36] -- Linux v6.6.0-0.rc4.3006adf3be79 - -* Wed Oct 04 2023 Fedora Kernel Team [6.6.0-0.rc4.cbf3a2cb156a.35] -- Linux v6.6.0-0.rc4.cbf3a2cb156a - -* Tue Oct 03 2023 Fedora Kernel Team [6.6.0-0.rc4.ce36c8b14987.34] - common: aarch64: enable NXP Flex SPI (Peter Robinson) -- Linux v6.6.0-0.rc4.ce36c8b14987 - -* Mon Oct 02 2023 Fedora Kernel Team [6.6.0-0.rc4.33] -- Linux v6.6.0-0.rc4 - -* Sun Oct 01 2023 Fedora Kernel Team [6.6.0-0.rc3.e402b08634b3.32] -- Linux v6.6.0-0.rc3.e402b08634b3 - -* Sat Sep 30 2023 Fedora Kernel Team [6.6.0-0.rc3.9f3ebbef746f.31] - fedora: Switch TI_SCI_CLK and TI_SCI_PM_DOMAINS symbols to built-in (Javier Martinez Canillas) -- Linux v6.6.0-0.rc3.9f3ebbef746f - -* Fri Sep 29 2023 Fedora Kernel Team [6.6.0-0.rc3.9ed22ae6be81.30] -- Linux v6.6.0-0.rc3.9ed22ae6be81 - -* Thu Sep 28 2023 Fedora Kernel Team [6.6.0-0.rc3.633b47cb009d.29] -- Linux v6.6.0-0.rc3.633b47cb009d - -* Wed Sep 27 2023 Fedora Kernel Team [6.6.0-0.rc3.0e945134b680.28] - kernel.spec: adjust build option comment (Michael Hofmann) - kernel.spec: allow to enable arm64_16k variant (Michael Hofmann) - gitlab-ci: enable build-only pipelines for Rawhide/16k/aarch64 (Michael Hofmann) @@ -4059,122 +4330,40 @@ fi\ - redhat/self-test: Update data for cross compile fields (Prarit Bhargava) - redhat/Makefile.cross: Add message for disabled subpackages (Prarit Bhargava) - redhat/Makefile.cross: Update cross targets with disabled subpackages (Prarit Bhargava) -- Linux v6.6.0-0.rc3.0e945134b680 - -* Tue Sep 26 2023 Fedora Kernel Team [6.6.0-0.rc3.27] - Remove XFS_ASSERT_FATAL from pending-fedora (Justin M. Forbes) - -* Mon Sep 25 2023 Fedora Kernel Team [6.6.0-0.rc3.26] - Change default pending for XFS_ONLINE_SCRUB_STATSas it now selects XFS_DEBUG (Justin M. Forbes) -- Linux v6.6.0-0.rc3 - -* Sun Sep 24 2023 Fedora Kernel Team [6.6.0-0.rc2.3aba70aed91f.25] -- Linux v6.6.0-0.rc2.3aba70aed91f - -* Sat Sep 23 2023 Fedora Kernel Team [6.6.0-0.rc2.d90b0276af8f.24] -- Linux v6.6.0-0.rc2.d90b0276af8f - -* Fri Sep 22 2023 Fedora Kernel Team [6.6.0-0.rc2.27bbf45eae9c.23] - gitlab-ci: use --with debug/base to select kernel variants (Michael Hofmann) - kernel.spec: add rpmbuild --without base option (Michael Hofmann) -- Linux v6.6.0-0.rc2.27bbf45eae9c - -* Thu Sep 21 2023 Fedora Kernel Team [6.6.0-0.rc2.42dc814987c1.22] - redhat: spec: Fix typo for kernel_variant_preun for 16k-debug flavor (Neal Gompa) -- Linux v6.6.0-0.rc2.42dc814987c1 - -* Tue Sep 19 2023 Fedora Kernel Team [6.6.0-0.rc2.2cf0f7156238.21] -- Linux v6.6.0-0.rc2.2cf0f7156238 - -* Mon Sep 18 2023 Fedora Kernel Team [6.6.0-0.rc2.20] -- Linux v6.6.0-0.rc2 - -* Sun Sep 17 2023 Fedora Kernel Team [6.6.0-0.rc1.f0b0d403eabb.19] -- Linux v6.6.0-0.rc1.f0b0d403eabb - -* Sat Sep 16 2023 Fedora Kernel Team [6.6.0-0.rc1.57d88e8a5974.18] -- Linux v6.6.0-0.rc1.57d88e8a5974 - -* Fri Sep 15 2023 Fedora Kernel Team [6.6.0-0.rc1.9fdfb15a3dbf.17] - Turn off appletalk for fedora (Justin M. Forbes) -- Linux v6.6.0-0.rc1.9fdfb15a3dbf - -* Thu Sep 14 2023 Fedora Kernel Team [6.6.0-0.rc1.aed8aee11130.16] -- Linux v6.6.0-0.rc1.aed8aee11130 - -* Wed Sep 13 2023 Fedora Kernel Team [6.6.0-0.rc1.3669558bdf35.15] -- Linux v6.6.0-0.rc1.3669558bdf35 - -* Tue Sep 12 2023 Fedora Kernel Team [6.6.0-0.rc1.14] - New configs in drivers/media (Fedora Kernel Team) - redhat/docs: Add a mention of bugzilla for bugs (Prarit Bhargava) - Fix the fixup of Fedora release (Don Zickus) - -* Mon Sep 11 2023 Fedora Kernel Team [6.6.0-0.rc1.13] -- Linux v6.6.0-0.rc1 - -* Sun Sep 10 2023 Fedora Kernel Team [6.6.0-0.rc0.535a265d7f0d.12] -- Linux v6.6.0-0.rc0.535a265d7f0d - -* Sat Sep 09 2023 Fedora Kernel Team [6.6.0-0.rc0.6099776f9f26.11] -- Linux v6.6.0-0.rc0.6099776f9f26 - -* Fri Sep 08 2023 Fedora Kernel Team [6.6.0-0.rc0.a48fa7efaf11.10] -- Linux v6.6.0-0.rc0.a48fa7efaf11 - -* Thu Sep 07 2023 Fedora Kernel Team [6.6.0-0.rc0.7ba2090ca64e.9] - Fix Fedora release scheduled job (Don Zickus) - Move squashfs to kernel-modules-core (Justin M. Forbes) - redhat: Explicitly disable CONFIG_COPS (Vitaly Kuznetsov) - redhat: Add dist-check-licenses target (Vitaly Kuznetsov) - redhat: Introduce "Verify SPDX-License-Identifier tags" selftest (Vitaly Kuznetsov) - redhat: Use kspdx-tool output for the License: field (Vitaly Kuznetsov) -- Linux v6.6.0-0.rc0.7ba2090ca64e - -* Wed Sep 06 2023 Fedora Kernel Team [6.6.0-0.rc0.65d6e954e378.8] - Rename pipeline repo branch and DW tree names (Michael Hofmann) - Adjust comments that refer to ARK in a Rawhide context (Michael Hofmann) - Rename variable names starting with ark- to rawhide- (Michael Hofmann) - Rename trigger-ark to trigger-rawhide (Michael Hofmann) - Fix up config mismatches for Fedora (Justin M. Forbes) -- Linux v6.6.0-0.rc0.65d6e954e378 - -* Tue Sep 05 2023 Fedora Kernel Team [6.6.0-0.rc0.3f86ed6ec0b3.7] - redhat/configs: Texas Instruments Inc. K3 multicore SoC architecture (Steve Best) -- Linux v6.6.0-0.rc0.3f86ed6ec0b3 - -* Mon Sep 04 2023 Fedora Kernel Team [6.6.0-0.rc0.708283abf896.6] -- Linux v6.6.0-0.rc0.708283abf896 - -* Sun Sep 03 2023 Fedora Kernel Team [6.6.0-0.rc0.92901222f83d.5] - Flip CONFIG_VIDEO_V4L2_SUBDEV_API in pending RHEL due to mismatch (Justin M. Forbes) -- Linux v6.6.0-0.rc0.92901222f83d - -* Sat Sep 02 2023 Fedora Kernel Team [6.6.0-0.rc0.0468be89b3fa.4] - CONFIG_HW_RANDOM_HISI: move to common and set to m (Scott Weaver) - Turn off CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE for Fedora s390x (Justin M. Forbes) -- Linux v6.6.0-0.rc0.0468be89b3fa - -* Fri Sep 01 2023 Fedora Kernel Team [6.6.0-0.rc0.99d99825fc07.3.eln130] - Disable tests for ELN realtime pipelines (Michael Hofmann) - New configs in mm/Kconfig (Fedora Kernel Team) - Flip CONFIG_SND_SOC_CS35L56_SDW to m and clean up (Justin M. Forbes) - Add drm_exec_test to mod-internal.list (Thorsten Leemhuis) - Add new pending entry for CONFIG_SND_SOC_CS35L56_SDW to fix mismatch (Justin M. Forbes) -- Linux v6.6.0-0.rc0.99d99825fc07 - -* Thu Aug 31 2023 Fedora Kernel Team [6.6.0-0.rc0.b97d64c72259.2.eln130] - Fix tarball creation logic (Don Zickus) - redhat: bump libcpupower soname to match upstream (Patrick Talbert) - Turn on MEMFD_CREATE in pending as it is selected by CONFIG_TMPFS (Justin M. Forbes) -- Linux v6.6.0-0.rc0.b97d64c72259 - -* Wed Aug 30 2023 Fedora Kernel Team [6.6.0-0.rc0.6c1b980a7e79.1.eln130] - redhat: drop unneeded build-time dependency gcc-plugin-devel (Coiby Xu) - Reset RHEL release and trim changelog after rebase (Justin M. Forbes) -- Linux v6.6.0-0.rc0.6c1b980a7e79 - -* Tue Aug 29 2023 Fedora Kernel Team [6.6.0-0.rc0.1c59d383390f.59.eln130] - all: x86: move wayward x86 specific config home (Peter Robinson) - all: de-dupe non standard config options (Peter Robinson) - all: x86: clean up microcode loading options (Peter Robinson) @@ -4942,8 +5131,6 @@ fi\ - redhat: generate distgit changelog in genspec.sh as well (Herton R. Krzesinski) - redhat: make genspec prefer metadata from git notes (Herton R. Krzesinski) - redhat: use tags from git notes for zstream to generate changelog (Herton R. Krzesinski) -- ARK: Remove code marking drivers as tech preview (Peter Georg) -- ARK: Remove code marking devices deprecated (Peter Georg) - ARK: Remove code marking devices unmaintained (Peter Georg) - rh_message: Fix function name (Peter Georg) [2019377] - Turn on CONFIG_RANDOM_TRUST_BOOTLOADER (Justin M. Forbes) @@ -5287,7 +5474,6 @@ fi\ - Attempt to fix Intel PMT code (David Arcari) - CI: Enable realtime branch testing (Veronika Kabatova) - CI: Enable realtime checks for c9s and RHEL9 (Veronika Kabatova) -- [fs] dax: mark tech preview (Bill O'Donnell) [1995338] - ark: wireless: enable all rtw88 pcie wirless variants (Peter Robinson) - wireless: rtw88: move debug options to common/debug (Peter Robinson) - fedora: minor PTP clock driver cleanups (Peter Robinson) @@ -5462,7 +5648,6 @@ fi\ - Enable CONFIG_BPF_UNPRIV_DEFAULT_OFF (Jiri Olsa) - configs/common/s390: disable CONFIG_QETH_{OSN,OSX} (Philipp Rudo) [1903201] - nvme: nvme_mpath_init remove multipath check (Mike Snitzer) -- team: mark team driver as deprecated (Hangbin Liu) [1945477] - Make CRYPTO_EC also builtin (Simo Sorce) [1947240] - Do not hard-code a default value for DIST (David Ward) - Override %%{debugbuildsenabled} if the --with-release option is used (David Ward) @@ -5471,7 +5656,6 @@ fi\ - Revert s390x/zfcpdump part of a9d179c40281 and ecbfddd98621 (Vladis Dronov) - Embed crypto algos, modes and templates needed in the FIPS mode (Vladis Dronov) [1947240] - configs: Add and enable CONFIG_HYPERV_TESTING for debug kernels (Mohammed Gamal) -- mm/cma: mark CMA on x86_64 tech preview and print RHEL-specific infos (David Hildenbrand) [1945002] - configs: enable CONFIG_CMA on x86_64 in ARK (David Hildenbrand) [1945002] - rpmspec: build debug-* meta-packages if debug builds are disabled (Herton R. Krzesinski) - UIO: disable unused config options (Aristeu Rozanski) [1957819] @@ -5556,7 +5740,6 @@ fi\ - Limit CONFIG_USB_CDNS_SUPPORT to x86_64 and arm in Fedora (David Ward) - Fedora: Enable CHARGER_GPIO on aarch64 too (Peter Robinson) - Fedora config updates (Justin M. Forbes) -- wireguard: mark as Tech Preview (Hangbin Liu) [1613522] - configs: enable CONFIG_WIREGUARD in ARK (Hangbin Liu) [1613522] - Remove duplicate configs acroos fedora, ark and common (Don Zickus) - Combine duplicate configs across ark and fedora into common (Don Zickus) @@ -6195,7 +6378,7 @@ fi\ - [initial commit] Add scripts (Laura Abbott) - [initial commit] Add configs (Laura Abbott) - [initial commit] Add Makefiles (Laura Abbott) -- Linux v6.6.0-0.rc0.1c59d383390f +- Linux v6.7.0-0.rc0.5a6a09e97199 ### # The following Emacs magic makes C-c C-e use UTC dates. diff --git a/TOOLS/patch_configs.py b/TOOLS/patch_configs.py index 54b039f..b69dc57 100755 --- a/TOOLS/patch_configs.py +++ b/TOOLS/patch_configs.py @@ -20,17 +20,6 @@ GENERIC_PATCHES = [ ["WINESYNC", None, ENABLE], ["USER_NS_UNPRIVILEGED", None, ENABLE], ["TCP_CONG_BBR2", None, MODULE], - # bcachefs - ["BCACHEFS_FS", None, MODULE], - ["BCACHEFS_QUOTA", None, ENABLE], - ["BCACHEFS_POSIX_ACL", None, ENABLE], - ["BCACHEFS_DEBUG_TRANSACTIONS", None, UNSET], - ["BCACHEFS_DEBUG", None, UNSET], - ["BCACHEFS_TESTS", None, UNSET], - ["BCACHEFS_LOCK_TIME_STATS", None, UNSET], - ["BCACHEFS_NO_LATENCY_ACCT", None, UNSET], - ["MEAN_AND_VARIANCE_UNIT_TEST", None, UNSET], - ["DEBUG_CLOSURES", None, UNSET], # device specific config # Microsoft Surface @@ -40,9 +29,22 @@ GENERIC_PATCHES = [ ["VIDEO_DW9719", None, MODULE], ["IPC_CLASSES", None, ENABLE], ["LEDS_TPS68470", None, MODULE], - - # Steam Deck HDR Color management - ["DRM_AMD_COLOR_STEAMDECK", None, UNSET], + ["SENSORS_SURFACE_FAN", None, MODULE], + ["SENSORS_SURFACE_TEMP", None, MODULE], + + # Steam Deck / amdgpu HDR Color management + ["DRM_AMD_COLOR_STEAMDECK", None, ENABLE], + + # Rog Ally Gyro Fix + ["BMI323_I2C", None, MODULE], + ["BMI323_SPI", None, MODULE], + + # Mac T2 supprot + ["DRM_APPLETBDRM", None, MODULE], + ["HID_APPLETB_BL", None, MODULE], + ["HID_APPLETB_KBD", None, MODULE], + ["HID_APPLE_MAGIC_BACKLIGHT", None, MODULE], + ["CONFIG_APPLE_BCE", None, MODULE], ] ARCH_PATCHES = { @@ -139,10 +141,11 @@ def apply_patches(data: str, patches, flags = None) -> str: flags = [] for name, *val in patches: - c = f"CONFIG_{name}" + if not name.startswith("CONFIG_"): + name = f"CONFIG_{name}" - s = f"{c}=" - u = f"# {c} " + s = f"{name}=" + u = f"# {name} " if len(val) == 3 and val[2] not in flags: continue @@ -166,15 +169,15 @@ def apply_patches(data: str, patches, flags = None) -> str: if val[0] is not None: # verify we found what we expect - l = generate_line(c, val[0]) + l = generate_line(name, val[0]) if l != line: #print(f" Could not apply {name}: could not find expected config") continue - data = data[:line_start] + generate_line(c, val[1]) + data[line_end:] + data = data[:line_start] + generate_line(name, val[1]) + data[line_end:] elif val[0] is None: # relevant entry does not exist yet and we don't want to replace anything specific - data += generate_line(c, val[1]) + data += generate_line(name, val[1]) data += "\n" else: print(f" Couldn't find {name}") -- cgit v1.2.3