-
Notifications
You must be signed in to change notification settings - Fork 73
/
Copy pathchat_completions.py
57 lines (52 loc) · 2.32 KB
/
chat_completions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from groq import Groq
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk import trace as trace_sdk
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from openinference.instrumentation.groq import GroqInstrumentor
# Configure GroqInstrumentor with Phoenix endpoint
endpoint = "http://127.0.0.1:6006/v1/traces"
tracer_provider = trace_sdk.TracerProvider()
tracer_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter(endpoint)))
GroqInstrumentor().instrument(tracer_provider=tracer_provider)
if __name__ == "__main__":
client = Groq()
chat_completion = client.chat.completions.create(
#
# Required parameters
#
messages=[
# Set an optional system message. This sets the behavior of the
# assistant and can be used to provide specific instructions for
# how it should behave throughout the conversation.
{"role": "system", "content": "you are a helpful assistant."},
# Set a user message for the assistant to respond to.
{
"role": "user",
"content": "Explain the importance of low latency LLMs",
},
],
# The language model which will generate the completion.
model="mixtral-8x7b-32768",
#
# Optional parameters
#
# Controls randomness: lowering results in less random completions.
# As the temperature approaches zero, the model will become deterministic
# and repetitive.
temperature=0.5,
# The maximum number of tokens to generate. Requests can use up to
# 2048 tokens shared between prompt and completion.
max_tokens=1024,
# Controls diversity via nucleus sampling: 0.5 means half of all
# likelihood-weighted options are considered.
top_p=1,
# A stop sequence is a predefined or user-specified text string that
# signals an AI to stop generating content, ensuring its responses
# remain focused and concise. Examples include punctuation marks and
# markers like "[end]".
stop=None,
# If set, partial message deltas will be sent.
stream=False,
)
# Print the completion returned by the LLM.
print(chat_completion.choices[0].message.content)