Skip to content

Update to latest #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 123 commits into from
Nov 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
123 commits
Select commit Hold shift + click to select a range
34b2a5e
server : do not release slot on image input (#3798)
ggerganov Oct 26, 2023
2f9ec7e
cuda : improve text-generation and batched decoding performance (#3776)
ggerganov Oct 27, 2023
c8d6a1f
simple : fix batch handling (#3803)
tterrasson Oct 27, 2023
6d459cb
llama : correctly report GGUFv3 format (#3818)
cebtenzzre Oct 27, 2023
41aee4d
speculative : ensure draft and target model vocab matches (#3812)
KerfuffleV2 Oct 27, 2023
fdee152
starcoder : add GPU offloading (#3827)
ggerganov Oct 28, 2023
1774611
common : print that one line of the syntax help *also* to standard ou…
HenkPoley Oct 28, 2023
ee1a0ec
llama : add option for greedy sampling with probs (#3813)
ggerganov Oct 28, 2023
bd6d9e2
llama : allow quantizing k-quants to fall back when tensor size incom…
KerfuffleV2 Oct 28, 2023
8a2f2fe
convert : ignore tokens if their IDs are within [0, vocab_size) (#3831)
ggerganov Oct 28, 2023
ba231e8
issues : change label from bug to bug-unconfirmed (#3748)
ggerganov Oct 28, 2023
82a6646
metal : try cwd for ggml-metal.metal if bundle lookup fails (#3793)
akx Oct 28, 2023
ff3bad8
flake : update flake.lock for newer transformers version + provide ex…
Green-Sky Oct 28, 2023
d69d777
ggml : quantization refactoring (#3833)
ggerganov Oct 29, 2023
71a09da
llama : fix kv shift bug (#3835)
ggerganov Oct 29, 2023
2046eb4
make : remove unnecessary dependency on build-info.h (#3842)
cebtenzzre Oct 29, 2023
6e08281
Extend llama_kv_cache_seq_rm to allow matching any sequence (#3843)
KerfuffleV2 Oct 29, 2023
207b519
ggml : move FP16 <-> FP32 code to ggml-impl.h (#3861)
ggerganov Oct 30, 2023
07178c9
flake.nix: fix for rocm 5.7 (#3853)
Tungsten842 Oct 31, 2023
238657d
samplers : Min-P sampler implementation [alternative to Top P/Top K] …
kalomaze Oct 31, 2023
71e3718
llama : refactor graph build code (#3837)
ggerganov Nov 1, 2023
ca190bc
server : re-enable completion and embedded at the same time (#3876)
a-h Nov 1, 2023
f0e2093
scripts : add server-llm.sh (#3868)
ggerganov Nov 1, 2023
73bdcb3
finetune : add -ngl parameter (#3762)
AndrewGodfrey Nov 1, 2023
9a3b4f6
ggml : fix UNUSED macro (#3762)
ggerganov Nov 1, 2023
e75dfdd
sampling : null grammar field after reset (#3885)
l3utterfly Nov 1, 2023
a2758d0
log : make generating separate log files optional (#3787)
staviq Nov 1, 2023
0e40806
common : allow caller to handle help/argument exceptions (#3715)
bandoti Nov 1, 2023
5033796
llm : add llm_build_context (#3881)
ggerganov Nov 1, 2023
ff8f9a8
common : minor (#3715)
ggerganov Nov 1, 2023
e16b9fa
metal : multi-simd softmax (#3710)
ggerganov Nov 1, 2023
523e49b
llm : fix falcon norm after refactoring (#3837)
ggerganov Nov 1, 2023
c43c2da
llm : fix llm_build_kqv taking unused tensor (benign, #3837)
ggerganov Nov 1, 2023
898aeca
llama : implement YaRN RoPE scaling (#2268)
cebtenzzre Nov 1, 2023
d02e98c
ggml-cuda : compute ptrs for cublasGemmBatchedEx in a kernel (#3891)
slaren Nov 1, 2023
0eb332a
llama : fix llama_context_default_params after #2268 (#3893)
cebtenzzre Nov 1, 2023
2fffa0d
cuda : fix RoPE after #2268 (#3897)
cebtenzzre Nov 2, 2023
183b3fa
metal : fix build errors and kernel sig after #2268 (#3898)
ggerganov Nov 2, 2023
4d719a6
cuda : check if this fixes Pascal card regression (#3882)
ggerganov Nov 2, 2023
b12fa0d
build : link against build info instead of compiling against it (#3879)
cebtenzzre Nov 2, 2023
1efae9b
llm : prevent from 1-D tensors being GPU split (#3697)
ggerganov Nov 2, 2023
2756c4f
gguf : remove special-case code for GGUFv1 (#3901)
ggerganov Nov 2, 2023
21958bb
cmake : disable LLAMA_NATIVE by default (#3906)
slaren Nov 2, 2023
4ff1046
gguf : print error for GGUFv1 files (#3908)
ggerganov Nov 2, 2023
d606905
cuda : use CUDA memory pool with async memory allocation/deallocation…
young-developer Nov 2, 2023
c7743fe
cuda : fix const ptrs warning causing ROCm build issues (#3913)
ggerganov Nov 2, 2023
224e7d5
readme : add notice about #3912
ggerganov Nov 2, 2023
51b2fc1
cmake : fix relative path to git submodule index (#3915)
abetlen Nov 2, 2023
629f917
cuda : add ROCM aliases for CUDA pool stuff (#3918)
KerfuffleV2 Nov 2, 2023
3fdbe6b
llama : change yarn_ext_factor placeholder to -1 (#3922)
cebtenzzre Nov 3, 2023
0581602
common : YAYF (yet another YARN fix) (#3925)
ggerganov Nov 3, 2023
8f961ab
speculative : change default p_accept to 0.5 + CLI args (#3919)
ggerganov Nov 3, 2023
abb77e7
ggml-cuda : move row numbers to x grid dim in mmv kernels (#3921)
slaren Nov 3, 2023
5ba3746
ggml-metal: fix yarn rope (#3937)
jxy Nov 3, 2023
d9b33fe
metal : round up to 16 to fix MTLDebugComputeCommandEncoder assertion…
psugihara Nov 3, 2023
f28af0d
gguf-py: Support 01.AI Yi models (#3943)
KerfuffleV2 Nov 4, 2023
48ade94
cuda : revert CUDA pool stuff (#3944)
slaren Nov 5, 2023
a7fac01
ci : use intel sde when ci cpu doesn't support avx512 (#3949)
netrunnereve Nov 5, 2023
c41ea36
cmake : MSVC instruction detection (fixed up #809) (#3923)
netrunnereve Nov 5, 2023
3d48f42
llama : mark LLM_ARCH_STARCODER as full offload supported (#3945)
wsxiaoys Nov 5, 2023
132d25b
cuda : fix disabling device with --tensor-split 1,0 (#3951)
cebtenzzre Nov 5, 2023
bb60fd0
server : fix typo for --alias shortcut from -m to -a (#3958)
RoyalHeart Nov 5, 2023
d9ccce2
Allow common process_escapes to handle \x sequences (#3928)
KerfuffleV2 Nov 5, 2023
2833a6f
ggml-cuda : fix f16 mul mat (#3961)
slaren Nov 5, 2023
381efbf
llava : expose as a shared library for downstream projects (#3613)
damian0815 Nov 6, 2023
46876d2
cuda : supports running on CPU for GGML_USE_CUBLAS=ON build (#3946)
wsxiaoys Nov 7, 2023
54b4df8
Use params when loading models in llava-cli (#3976)
tejom Nov 7, 2023
e9c1cec
ggml : fix backward rope after YaRN (#3974)
xaedes Nov 7, 2023
413503d
make : do not add linker flags when compiling static llava lib (#3977)
ggerganov Nov 7, 2023
0a7c980
gguf : track writer state, free unneeded tensors, cleanup (#3871)
cebtenzzre Nov 7, 2023
875fb42
ggml-alloc : fix backend assignments of views (#3982)
slaren Nov 8, 2023
57ad015
server : add min_p param (#3877)
Mihaiii Nov 9, 2023
a75fa57
scripts: Generalize convert scripts (#3838)
Galunid Nov 9, 2023
df9d129
Unbreak persimmon after #3837 (#4010)
Galunid Nov 10, 2023
4a4fd3e
server : allow continue edit on completion mode (#3950)
jhen0409 Nov 10, 2023
34b0a08
gguf-py: Refactor and allow reading/modifying existing GGUF files (#3…
KerfuffleV2 Nov 11, 2023
d96ca7d
server : fix crash when prompt exceeds context size (#3996)
z80maniac Nov 11, 2023
e86fc56
Fix gguf-convert-endian script (#4037)
monatis Nov 11, 2023
532dd74
Fix some documentation typos/grammar mistakes (#4032)
richardkiss Nov 12, 2023
21fd874
gguf-py: gguf_writer: Use bytearray to build metadata (#4051)
KerfuffleV2 Nov 12, 2023
bb50a79
Add ReLU and SQR CUDA ops to (partially) fix Persimmon offloading (#4…
KerfuffleV2 Nov 13, 2023
4760e7c
sync : ggml (backend v2) (#3912)
ggerganov Nov 13, 2023
c049b37
readme : update hot topics
ggerganov Nov 13, 2023
3d68f36
ggml : sync (im2col, GPU conv, 32-bit arm compat) (#4060)
ggerganov Nov 13, 2023
bd90eca
llava : fix regression for square images in #3613 (#4056)
monatis Nov 13, 2023
b46d12f
convert.py: also look for plain model.safetensors (#4043)
afrideva Nov 14, 2023
36eed0c
stablelm : StableLM support (#3586)
Galunid Nov 14, 2023
6bb4908
Fix MacOS Sonoma model quantization (#4052)
TortoiseHam Nov 14, 2023
1cf2850
ggml-cuda : increase max graph size (#4084)
slaren Nov 15, 2023
a6fc554
llama : restore prefix space in llama tokenizer (#4081)
cebtenzzre Nov 15, 2023
8da4627
gguf : fix potential infinite loops while parsing (#4100)
texmex76 Nov 16, 2023
91f6499
Respect tokenizer.ggml.add_bos_token value when tokenizing (#4040)
KerfuffleV2 Nov 17, 2023
4f447a4
llama : fix data units (#4101)
ggerganov Nov 17, 2023
b83e149
cuda : get_row_rounding F32 (#4095)
AndrewGodfrey Nov 17, 2023
947f64f
finetune : zero the loraB initial vectors (#4082)
AndrewGodfrey Nov 17, 2023
3e916a0
finetune : speed-up ggml_compute_forward_out_prod_f32 via BLAS (#4079)
gwjr Nov 17, 2023
e85bb1a
llama : add functions to get the model's metadata (#4013)
slaren Nov 17, 2023
ba4cf5c
train : move number of gpu layers argument parsing to common/train.cp…
jpodivin Nov 17, 2023
f7d5e97
py : remove superfluous import statements (#4076)
jpodivin Nov 17, 2023
c7cce12
llava : fix compilation warning that fread return value is not used (…
huawei-lin Nov 17, 2023
9e87ef6
common : improve yaml log escaping (#4080)
joennlae Nov 17, 2023
11173c9
py : Falcon HF compatibility (#4104)
cmp-nct Nov 17, 2023
2ab0707
convert : use 'model' value if it exists. This allows karpathy/tinyll…
dmahurin Nov 17, 2023
2fa02b4
examples : add tokenize (#4039)
zakkor Nov 17, 2023
5ad387e
tokenize : fix trailing whitespace
ggerganov Nov 17, 2023
8e93610
build : support ppc64le build for make and CMake (#3963)
bufferoverflow Nov 17, 2023
bbecf3f
llama : increase max nodes (#4115)
slaren Nov 17, 2023
2923f17
Clean up ggml-cuda.cu warnings when compiling with clang (for ROCM) (…
KerfuffleV2 Nov 18, 2023
0b5c3b0
scripts : Remove missed baichuan convert script (#4127)
Galunid Nov 18, 2023
28a2e6e
tokenize example: Respect normal add BOS token behavior (#4126)
KerfuffleV2 Nov 18, 2023
e937066
gguf-py : export chat templates (#4125)
slaren Nov 19, 2023
35985ac
gitignore : tokenize
ggerganov Nov 19, 2023
262005a
common : comma should be semicolon (#4137)
kchro3 Nov 19, 2023
936c79b
server : relay error messages (#4131)
SoftwareRenderer Nov 19, 2023
05e8301
finetune : add --n-gpu-layers flag info to --help (#4128)
csaben Nov 19, 2023
dae06c0
Revert "finetune : add --n-gpu-layers flag info to --help (#4128)"
ggerganov Nov 19, 2023
40a34fe
speculative : fix prompt tokenization in speculative example (#4025)
AutonomicPerfectionist Nov 20, 2023
f23c035
ci : add flake8 to github actions (python linting) (#4129)
Galunid Nov 20, 2023
881800d
main : Add ChatML functionality to main example (#4046)
Sebby37 Nov 20, 2023
dfc7cd4
readme : update ROCm Windows instructions (#4122)
jammm Nov 20, 2023
0b871f1
finetune - update readme to mention llama support only (#4148)
Galunid Nov 20, 2023
8e672ef
stablelm : simplify + speedup generation (#4153)
Galunid Nov 21, 2023
d1252f8
Merge remote-tracking branch 'ggreganov/master' into update-to-latest
Andreybest Nov 22, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/bug.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
name: Bug template
about: Used to report bugs in llama.cpp
labels: ["bug"]
labels: ["bug-unconfirmed"]
assignees: ''

---
Expand Down
15 changes: 14 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ jobs:
OPENBLAS_VERSION: 0.3.23
OPENCL_VERSION: 2023.04.17
CLBLAST_VERSION: 1.6.0
SDE_VERSION: 9.21.1-2023-04-24

strategy:
matrix:
Expand Down Expand Up @@ -383,11 +384,23 @@ jobs:

- name: Test
id: cmake_test
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # Test AVX-512 only when possible
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512
run: |
cd build
ctest -C Release --verbose --timeout 900

- name: Test (Intel SDE)
id: cmake_test_sde
if: ${{ matrix.build == 'avx512' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
run: |
curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/777395/sde-external-${env:SDE_VERSION}-win.tar.xz"
# for some weird reason windows tar doesn't like sde tar.xz
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
cd build
& $sde -future -- ctest -C Release --verbose --timeout 900

- name: Determine tag name
id: tag
shell: bash
Expand Down
20 changes: 20 additions & 0 deletions .github/workflows/python-lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: flake8 Lint

on: [push, pull_request]

jobs:
flake8-lint:
runs-on: ubuntu-latest
name: Lint
steps:
- name: Check out source repository
uses: actions/checkout@v3
- name: Set up Python environment
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: flake8 Lint
uses: py-actions/flake8@v2
with:
ignore: "E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704"
exclude: "examples/*,examples/*/**,*/**/__init__.py"
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
.DS_Store
.build/
.cache/
.ccls-cache/
.direnv/
.envrc
.swiftpm
Expand Down Expand Up @@ -45,7 +46,7 @@ models-mnt
/infill
/libllama.so
/llama-bench
/llava
/llava-cli
/main
/metal
/perplexity
Expand All @@ -63,8 +64,9 @@ models-mnt
/speculative
/parallel
/train-text-from-scratch
/tokenize
/vdot
build-info.h
/common/build-info.cpp
arm_neon.h
compile_commands.json
CMakeSettings.json
Expand Down
75 changes: 31 additions & 44 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ endif()

set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
set(LLAMA_STANDALONE ON)

# configure project version
Expand Down Expand Up @@ -82,6 +82,7 @@ set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
option(LLAMA_CUBLAS "llama: use CUDA" OFF)
#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF)
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF)
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF)
Expand All @@ -93,46 +94,12 @@ option(LLAMA_CLBLAST "llama: use CLBlast"
option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT})
option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF)
option(LLAMA_MPI "llama: use MPI" OFF)
option(LLAMA_K_QUANTS "llama: use k-quants" ON)
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)

option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example" ON)

#
# Build info header
#

# Generate initial build-info.h
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)

if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/.git")

# Is git submodule
if(NOT IS_DIRECTORY "${GIT_DIR}")
file(READ ${GIT_DIR} REAL_GIT_DIR_LINK)
string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK})
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/${REAL_GIT_DIR}")
endif()

# Add a custom target for build-info.h
add_custom_target(BUILD_INFO ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.h")

# Add a custom command to rebuild build-info.h when .git/index changes
add_custom_command(
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/build-info.h"
COMMENT "Generating build details from Git"
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION} -DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
DEPENDS "${GIT_DIR}/index"
VERBATIM
)
else()
message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.")
endif()

#
# Compile flags
#
Expand Down Expand Up @@ -277,13 +244,8 @@ if (LLAMA_BLAS)
endif()
endif()

if (LLAMA_K_QUANTS)
set(GGML_HEADERS_EXTRA k_quants.h)
set(GGML_SOURCES_EXTRA k_quants.c)
add_compile_definitions(GGML_USE_K_QUANTS)
if (LLAMA_QKK_64)
add_compile_definitions(GGML_QKK_64)
endif()
if (LLAMA_QKK_64)
add_compile_definitions(GGML_QKK_64)
endif()

if (LLAMA_CUBLAS)
Expand All @@ -305,6 +267,9 @@ if (LLAMA_CUBLAS)
if (LLAMA_CUDA_FORCE_DMMV)
add_compile_definitions(GGML_CUDA_FORCE_DMMV)
endif()
if (LLAMA_CUDA_FORCE_MMQ)
add_compile_definitions(GGML_CUDA_FORCE_MMQ)
endif()
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
if (DEFINED LLAMA_CUDA_DMMV_Y)
Expand Down Expand Up @@ -405,6 +370,9 @@ if (LLAMA_HIPBLAS)
if (LLAMA_CUDA_FORCE_DMMV)
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_FORCE_DMMV)
endif()
if (LLAMA_CUDA_FORCE_MMQ)
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_FORCE_MMQ)
endif()
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
target_compile_definitions(ggml-rocm PRIVATE K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
Expand Down Expand Up @@ -490,6 +458,15 @@ if (LLAMA_LTO)
endif()
endif()

# this version of Apple ld64 is buggy
execute_process(
COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v
ERROR_VARIABLE output
)
if (output MATCHES "dyld-1015\.7")
add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
endif()

# Architecture specific
# TODO: probably these flags need to be tweaked on some architectures
# feel free to update the Makefile for your architecture and send a pull request or issue
Expand Down Expand Up @@ -542,6 +519,10 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATC
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "^(x86_64|i686|amd64|x64)$" )
message(STATUS "x86 detected")
if (MSVC)
# instruction set detection for MSVC only
if (LLAMA_NATIVE)
include(cmake/FindSIMD.cmake)
endif ()
if (LLAMA_AVX512)
add_compile_options($<$<COMPILE_LANGUAGE:C>:/arch:AVX512>)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX512>)
Expand Down Expand Up @@ -593,8 +574,12 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
message(STATUS "PowerPC detected")
add_compile_options(-mcpu=native -mtune=native)
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
add_compile_options(-mcpu=powerpc64le)
else()
add_compile_options(-mcpu=native -mtune=native)
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
endif()
else()
message(STATUS "Unknown architecture")
endif()
Expand Down Expand Up @@ -666,6 +651,8 @@ add_library(ggml OBJECT
ggml-alloc.h
ggml-backend.c
ggml-backend.h
ggml-quants.c
ggml-quants.h
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
Expand Down
Loading