Merge pull request #2026 from folkertdev/neon-vpadd

use `intrinsics::simd` for vpadd
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index c0e46c3..88afaae 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -11488,16 +11488,9 @@ pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
 #[inline(always)]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v1f64.p0"
-        )]
-        fn _vld1_f64_x2(a: *const f64) -> float64x1x2_t;
-    }
-    _vld1_f64_x2(a)
+#[cfg_attr(test, assert_instr(ld))]
+pub unsafe fn vld1_f64_x2(ptr: *const f64) -> float64x1x2_t {
+    crate::ptr::read_unaligned(ptr.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x3)"]
@@ -11506,16 +11499,9 @@ pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
 #[inline(always)]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v1f64.p0"
-        )]
-        fn _vld1_f64_x3(a: *const f64) -> float64x1x3_t;
-    }
-    _vld1_f64_x3(a)
+#[cfg_attr(test, assert_instr(ld))]
+pub unsafe fn vld1_f64_x3(ptr: *const f64) -> float64x1x3_t {
+    crate::ptr::read_unaligned(ptr.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x4)"]
@@ -11524,16 +11510,9 @@ pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
 #[inline(always)]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v1f64.p0"
-        )]
-        fn _vld1_f64_x4(a: *const f64) -> float64x1x4_t;
-    }
-    _vld1_f64_x4(a)
+#[cfg_attr(test, assert_instr(ld))]
+pub unsafe fn vld1_f64_x4(ptr: *const f64) -> float64x1x4_t {
+    crate::ptr::read_unaligned(ptr.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x2)"]
@@ -11542,16 +11521,9 @@ pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
 #[inline(always)]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v2f64.p0"
-        )]
-        fn _vld1q_f64_x2(a: *const f64) -> float64x2x2_t;
-    }
-    _vld1q_f64_x2(a)
+#[cfg_attr(test, assert_instr(ld))]
+pub unsafe fn vld1q_f64_x2(ptr: *const f64) -> float64x2x2_t {
+    crate::ptr::read_unaligned(ptr.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x3)"]
@@ -11560,16 +11532,9 @@ pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
 #[inline(always)]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v2f64.p0"
-        )]
-        fn _vld1q_f64_x3(a: *const f64) -> float64x2x3_t;
-    }
-    _vld1q_f64_x3(a)
+#[cfg_attr(test, assert_instr(ld))]
+pub unsafe fn vld1q_f64_x3(ptr: *const f64) -> float64x2x3_t {
+    crate::ptr::read_unaligned(ptr.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x4)"]
@@ -11578,16 +11543,9 @@ pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
 #[inline(always)]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v2f64.p0"
-        )]
-        fn _vld1q_f64_x4(a: *const f64) -> float64x2x4_t;
-    }
-    _vld1q_f64_x4(a)
+#[cfg_attr(test, assert_instr(ld))]
+pub unsafe fn vld1q_f64_x4(ptr: *const f64) -> float64x2x4_t {
+    crate::ptr::read_unaligned(ptr.cast())
 }
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f64)"]
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
index 3777cc7..2df4ba7 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
@@ -1093,6 +1093,14 @@ unsafe fn $name() {
         test_vld1q_f32_x3(f32, 12, float32x4x3_t, vst1q_f32_x3, vld1q_f32_x3);
         test_vld1q_f32_x4(f32, 16, float32x4x4_t, vst1q_f32_x4, vld1q_f32_x4);
 
+        test_vld1_f64_x2(f64, 2, float64x1x2_t, vst1_f64_x2, vld1_f64_x2);
+        test_vld1_f64_x3(f64, 3, float64x1x3_t, vst1_f64_x3, vld1_f64_x3);
+        test_vld1_f64_x4(f64, 4, float64x1x4_t, vst1_f64_x4, vld1_f64_x4);
+
+        test_vld1q_f64_x2(f64, 4, float64x2x2_t, vst1q_f64_x2, vld1q_f64_x2);
+        test_vld1q_f64_x3(f64, 6, float64x2x3_t, vst1q_f64_x3, vld1q_f64_x3);
+        test_vld1q_f64_x4(f64, 8, float64x2x4_t, vst1q_f64_x4, vld1q_f64_x4);
+
         test_vld1_s8_x2(i8, 16, int8x8x2_t, vst1_s8_x2, vld1_s8_x2);
         test_vld1_s8_x3(i8, 24, int8x8x3_t, vst1_s8_x3, vld1_s8_x3);
         test_vld1_s8_x4(i8, 32, int8x8x4_t, vst1_s8_x4, vld1_s8_x4);
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 7ab68ff..9190c85 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -3479,10 +3479,10 @@
 
   - name: "vld1{neon_type[1].no}"
     doc: "Load multiple single-element structures to one, two, three, or four registers"
-    arguments: ["a: {type[0]}"]
+    arguments: ["ptr: {type[0]}"]
     return_type: "{neon_type[1]}"
     attr: [*neon-stable]
-    assert_instr: [ld1]
+    assert_instr: [ld]
     safety:
       unsafe: [neon]
     types:
@@ -3493,11 +3493,12 @@
       - ["*const f64", float64x1x4_t]
       - ["*const f64", float64x2x4_t]
     compose:
-      - LLVMLink:
-          name: "vld1{neon_type[1].no}"
-          links:
-            - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0"
-              arch: aarch64,arm64ec
+      - FnCall:
+          - 'crate::ptr::read_unaligned'
+          - - MethodCall:
+                - ptr
+                - cast
+                - []
 
   - name: "vld2{neon_type[1].lane_nox}"
     doc: Load multiple 2-element structures to two registers