Skip to content

Commit 5cfb0e4

Browse files
committed
new package: llama-cpp
1 parent f85abe7 commit 5cfb0e4

File tree

3 files changed

+141
-0
lines changed

3 files changed

+141
-0
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
--- a/CMakeLists.txt
2+
+++ b/CMakeLists.txt
3+
@@ -434,7 +434,7 @@
4+
endif()
5+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
6+
# Raspberry Pi 2
7+
- add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations)
8+
+ add_compile_options(-mno-unaligned-access -funsafe-math-optimizations)
9+
endif()
10+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
11+
# Raspberry Pi 3, 4, Zero 2 (32-bit)
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
--- a/k_quants.c
2+
+++ b/k_quants.c
3+
@@ -43,6 +43,89 @@
4+
// 2-6 bit quantization in super-blocks
5+
//
6+
7+
+#if defined(__ARM_NEON)
8+
+
9+
+#if !defined(__aarch64__)
10+
+
11+
+inline static uint16_t vaddvq_u8(uint8x16_t v) {
12+
+ return
13+
+ (uint16_t)vgetq_lane_u8(v, 0) + (uint16_t)vgetq_lane_u8(v, 1) +
14+
+ (uint16_t)vgetq_lane_u8(v, 2) + (uint16_t)vgetq_lane_u8(v, 3) +
15+
+ (uint16_t)vgetq_lane_u8(v, 4) + (uint16_t)vgetq_lane_u8(v, 5) +
16+
+ (uint16_t)vgetq_lane_u8(v, 6) + (uint16_t)vgetq_lane_u8(v, 7) +
17+
+ (uint16_t)vgetq_lane_u8(v, 8) + (uint16_t)vgetq_lane_u8(v, 9) +
18+
+ (uint16_t)vgetq_lane_u8(v, 10) + (uint16_t)vgetq_lane_u8(v, 11) +
19+
+ (uint16_t)vgetq_lane_u8(v, 12) + (uint16_t)vgetq_lane_u8(v, 13) +
20+
+ (uint16_t)vgetq_lane_u8(v, 14) + (uint16_t)vgetq_lane_u8(v, 15);
21+
+}
22+
+
23+
+inline static int16_t vaddvq_s8(int8x16_t v) {
24+
+ return
25+
+ (int16_t)vgetq_lane_s8(v, 0) + (int16_t)vgetq_lane_s8(v, 1) +
26+
+ (int16_t)vgetq_lane_s8(v, 2) + (int16_t)vgetq_lane_s8(v, 3) +
27+
+ (int16_t)vgetq_lane_s8(v, 4) + (int16_t)vgetq_lane_s8(v, 5) +
28+
+ (int16_t)vgetq_lane_s8(v, 6) + (int16_t)vgetq_lane_s8(v, 7) +
29+
+ (int16_t)vgetq_lane_s8(v, 8) + (int16_t)vgetq_lane_s8(v, 9) +
30+
+ (int16_t)vgetq_lane_s8(v, 10) + (int16_t)vgetq_lane_s8(v, 11) +
31+
+ (int16_t)vgetq_lane_s8(v, 12) + (int16_t)vgetq_lane_s8(v, 13) +
32+
+ (int16_t)vgetq_lane_s8(v, 14) + (int16_t)vgetq_lane_s8(v, 15);
33+
+}
34+
+
35+
+inline static int32_t vaddvq_s16(int16x8_t v) {
36+
+ return
37+
+ (int32_t)vgetq_lane_s16(v, 0) + (int32_t)vgetq_lane_s16(v, 1) +
38+
+ (int32_t)vgetq_lane_s16(v, 2) + (int32_t)vgetq_lane_s16(v, 3) +
39+
+ (int32_t)vgetq_lane_s16(v, 4) + (int32_t)vgetq_lane_s16(v, 5) +
40+
+ (int32_t)vgetq_lane_s16(v, 6) + (int32_t)vgetq_lane_s16(v, 7);
41+
+}
42+
+
43+
+inline static uint32_t vaddvq_u16(uint16x8_t v) {
44+
+ return
45+
+ (uint32_t)vgetq_lane_u16(v, 0) + (uint32_t)vgetq_lane_u16(v, 1) +
46+
+ (uint32_t)vgetq_lane_u16(v, 2) + (uint32_t)vgetq_lane_u16(v, 3) +
47+
+ (uint32_t)vgetq_lane_u16(v, 4) + (uint32_t)vgetq_lane_u16(v, 5) +
48+
+ (uint32_t)vgetq_lane_u16(v, 6) + (uint32_t)vgetq_lane_u16(v, 7);
49+
+}
50+
+
51+
+inline static int32_t vaddvq_s32(int32x4_t v) {
52+
+ return vgetq_lane_s32(v, 0) + vgetq_lane_s32(v, 1) + vgetq_lane_s32(v, 2) + vgetq_lane_s32(v, 3);
53+
+}
54+
+
55+
+inline static float vaddvq_f32(float32x4_t v) {
56+
+ return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
57+
+}
58+
+
59+
+inline static float vminvq_f32(float32x4_t v) {
60+
+ return
61+
+ MIN(MIN(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)),
62+
+ MIN(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3)));
63+
+}
64+
+
65+
+inline static float vmaxvq_f32(float32x4_t v) {
66+
+ return
67+
+ MAX(MAX(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)),
68+
+ MAX(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3)));
69+
+}
70+
+
71+
+inline static int32x4_t vcvtnq_s32_f32(float32x4_t v) {
72+
+ int32x4_t res;
73+
+
74+
+ res[0] = roundf(vgetq_lane_f32(v, 0));
75+
+ res[1] = roundf(vgetq_lane_f32(v, 1));
76+
+ res[2] = roundf(vgetq_lane_f32(v, 2));
77+
+ res[3] = roundf(vgetq_lane_f32(v, 3));
78+
+
79+
+ return res;
80+
+}
81+
+
82+
+inline static int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {
83+
+ const int16x4_t c = vpadd_s16(vget_low_s16(a), vget_high_s16(a));
84+
+ const int16x4_t d = vpadd_s16(vget_low_s16(b), vget_high_s16(b));
85+
+ return vcombine_s16(c, d);
86+
+}
87+
+
88+
+#endif
89+
+#endif
90+
91+
//
92+
// ===================== Helper functions

packages/llama-cpp/build.sh

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
TERMUX_PKG_HOMEPAGE=https://github.com/ggerganov/llama.cpp
2+
TERMUX_PKG_DESCRIPTION="Port of Facebook's LLaMA model in C/C++"
3+
TERMUX_PKG_LICENSE=GPL-3.0
4+
TERMUX_PKG_MAINTAINER=@termux
5+
TERMUX_PKG_VERSION=0.0.0-b1094
6+
TERMUX_PKG_SRCURL=https://github.com/ggerganov/llama.cpp/archive/refs/tags/${TERMUX_PKG_VERSION#*-}.tar.gz
7+
TERMUX_PKG_SHA256=315071e1034846e8ed448008cda35da481f056d6495696cb862ef8b94aaae0f6
8+
TERMUX_PKG_AUTO_UPDATE=true
9+
TERMUX_PKG_DEPENDS="libc++, libopenblas, openmpi"
10+
TERMUX_PKG_RECOMMENDS="python-numpy, python-sentencepiece"
11+
TERMUX_PKG_EXTRA_CONFIGURE_ARGS="
12+
-DLLAMA_MPI=ON
13+
-DBUILD_SHARED_LIBS=ON
14+
-DLLAMA_BLAS=ON
15+
-DLLAMA_BLAS_VENDOR=OpenBLAS
16+
"
17+
18+
# XXX: llama.cpp uses `int64_t`, but on 32-bit Android `size_t` is `int32_t`.
19+
# XXX: I don't think it will work if we simply casting it.
20+
TERMUX_PKG_BLACKLISTED_ARCHES="arm, i686"
21+
22+
termux_pkg_auto_update() {
23+
local latest_tag
24+
latest_tag="$(
25+
termux_github_api_get_tag "${TERMUX_PKG_SRCURL}" "${TERMUX_PKG_UPDATE_TAG_TYPE}"
26+
)"
27+
28+
if [[ -z "${latest_tag}" ]]; then
29+
termux_error_exit "ERROR: Unable to get tag from ${TERMUX_PKG_SRCURL}"
30+
fi
31+
termux_pkg_upgrade_version "0.0.0-${latest_tag}"
32+
}
33+
34+
termux_step_post_make_install() {
35+
cd "$TERMUX_PREFIX/bin" || exit 1
36+
mv main llama
37+
mv server llama-server
38+
}

0 commit comments

Comments
 (0)