Skip to content

Commit ddba25d

Browse files
authored
feat: Multimodal block display + Steamship agent (a16z-infra#49)
* Contribute initial multimodal agent support & Steamship agent
1 parent 5fb986e commit ddba25d

File tree

12 files changed

+239
-10
lines changed

12 files changed

+239
-10
lines changed

.env.local.example

+4-1
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,7 @@ UPSTASH_REDIS_REST_TOKEN=AZ****
3131

3232
# Twilio related environment variables
3333
TWILIO_ACCOUNT_SID=AC***
34-
TWILIO_AUTH_TOKEN=*****
34+
TWILIO_AUTH_TOKEN=*****
35+
36+
# Steamship related environment variables
37+
STEAMSHIP_API_KEY=****

.gitignore

+7-1
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,10 @@ yarn-error.log*
3535
next-env.d.ts
3636

3737
/.env.prod
38-
/fly.toml
38+
/fly.toml
39+
40+
# JetBrains
41+
.idea
42+
43+
# Yarn Lockfiles (since this project uses NPM)
44+
yarn.lock

README.md

+11-1
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,24 @@ e. **Upstash API key**
114114
<img width="866" alt="Screen Shot 2023-07-10 at 11 07 21 PM" src="https://github.com/a16z-infra/companion-app/assets/3489963/f8e6c43f-8810-423e-86b4-9e8aa70598c9">
115115

116116

117-
e. **Supabase API key** (optional)
117+
f. **Supabase API key** (optional)
118118
If you prefer to use Supabase, you will need to uncomment `VECTOR_DB=supabase` and fill out the Supabase credentials in `.env.local`.
119119

120120
- Create a Supabase instance [here](https://supabase.com/dashboard/projects); then go to Project Settings -> API
121121
- `SUPABASE_URL` is the URL value under "Project URL"
122122
- `SUPABASE_PRIVATE_KEY` is the key starts with `ey` under Project API Keys
123123
- Now, you should enable pgvector on Supabase and create a schema. You can do this easily by clicking on "SQL editor" on the left hand side on Supabase UI and then clicking on "+New Query". Copy paste [this code snippet](https://github.com/a16z-infra/ai-getting-started/blob/main/pgvector.sql) in the SQL editor and click "Run".
124124

125+
g. **Steamship API key**
126+
127+
You can connect a Steamship agent instance as an LLM with personality, voice and image generation capabilities built in. It also includes its own vector storage and tools. To do so:
128+
129+
- Create an account on [Steamship](https://steamship.com/account)
130+
- Copy the API key from your account settings page
131+
- Add it as the `STEAMSHIP_API_KEY` variable
132+
133+
If you'd like to create your own character personality, add a custom voice, or use a different image model, visit [Steamship Agent Guidebook](https://www.steamship.com/learn/agent-guidebook), create your own instance and connect it in `companions.json` using the *Rick* example as a guide.
134+
125135
### 4. Generate embeddings
126136

127137
The `companions/` directory contains the "personalities" of the AIs in .txt files. To generate embeddings and load them into the vector database to draw from during the chat, run the following command:

companions/companions.json

+9
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@
2020
"llm": "vicuna13b",
2121
"phone": "OPTIONAL_COMPANION_PHONE_NUMBER"
2222
},
23+
{
24+
"name": "Rick",
25+
"title": "I can generate voice and pictures",
26+
"imageUrl": "/rick.jpeg",
27+
"llm": "steamship",
28+
"generateEndpoint": "https://a16z.steamship.run/rick/ai-companion-59f5d9816b627a45856239ae9f83525e/answer",
29+
"phone": "OPTIONAL_COMPANION_PHONE_NUMBER",
30+
"telegramLink": "https://t.me/rick_a16z_bot"
31+
},
2332
{
2433
"name": "Sebastian",
2534
"title": "I'm a travel blogger and a mystery novel writer",

package-lock.json

+9
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
"react-tooltip": "^5.16.1",
3939
"replicate": "^0.9.3",
4040
"tailwindcss": "3.3.2",
41+
"ts-md5": "^1.3.1",
4142
"twilio": "^4.12.0",
4243
"typescript": "5.1.3"
4344
},

public/rick.jpeg

6.53 KB
Loading

src/app/api/steamship/route.ts

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import dotenv from "dotenv";
2+
import clerk from "@clerk/clerk-sdk-node";
3+
import { NextResponse } from "next/server";
4+
import { currentUser } from "@clerk/nextjs";
5+
import { rateLimit } from "@/app/utils/rateLimit";
6+
import {Md5} from 'ts-md5'
7+
import ConfigManager from "@/app/utils/config";
8+
9+
dotenv.config({ path: `.env.local` });
10+
11+
function returnError(code: number, message: string) {
12+
return new NextResponse(
13+
JSON.stringify({ Message: message }),
14+
{
15+
status: code,
16+
headers: {
17+
"Content-Type": "application/json",
18+
},
19+
}
20+
);
21+
}
22+
23+
export async function POST(req: Request) {
24+
let clerkUserId;
25+
let user;
26+
let clerkUserName;
27+
const { prompt, isText, userId, userName } = await req.json();
28+
const companionName = req.headers.get("name");
29+
30+
// Load the companion config
31+
const configManager = ConfigManager.getInstance();
32+
const companionConfig = configManager.getConfig("name", companionName);
33+
if (!companionConfig) {
34+
return returnError(404, `Hi, we were unable to find the configuration for a companion named ${companionName}.`)
35+
}
36+
37+
// Make sure we're not rate limited
38+
const identifier = req.url + "-" + (userId || "anonymous");
39+
const { success } = await rateLimit(identifier);
40+
if (!success) {
41+
console.log("INFO: rate limit exceeded");
42+
return returnError(429, `Hi, the companions can't talk this fast.`)
43+
}
44+
45+
if (!process.env.STEAMSHIP_API_KEY) {
46+
return returnError(500, `Please set the STEAMSHIP_API_KEY env variable and make sure ${companionName} is connected to an Agent instance that you own.`)
47+
}
48+
49+
console.log(`Companion Name: ${companionName}`)
50+
console.log(`Prompt: ${prompt}`);
51+
52+
if (isText) {
53+
clerkUserId = userId;
54+
clerkUserName = userName;
55+
} else {
56+
user = await currentUser();
57+
clerkUserId = user?.id;
58+
clerkUserName = user?.firstName;
59+
}
60+
61+
if (!clerkUserId || !!!(await clerk.users.getUser(clerkUserId))) {
62+
console.log("user not authorized");
63+
return new NextResponse(
64+
JSON.stringify({ Message: "User not authorized" }),
65+
{
66+
status: 401,
67+
headers: {
68+
"Content-Type": "application/json",
69+
},
70+
}
71+
);
72+
}
73+
74+
// Create a chat session id for the user
75+
const chatSessionId = Md5.hashStr(userId || "anonymous");
76+
77+
// Make sure we have a generate endpoint.
78+
// TODO: Create a new instance of the agent per user if this proves advantageous.
79+
const agentUrl = companionConfig.generateEndpoint
80+
if (!agentUrl) {
81+
return returnError(500, `Please add a Steamship 'generateEndpoint' to your ${companionName} configuration in companions.json.`)
82+
}
83+
84+
// Invoke the generation. Tool invocation, chat history management, backstory injection, etc is all done within this endpoint.
85+
// To build, deploy, and host your own multi-tenant agent see: https://www.steamship.com/learn/agent-guidebook
86+
const response = await fetch(agentUrl, {
87+
method: "POST",
88+
headers: {
89+
"Content-Type": "application/json",
90+
"Authorization": `Bearer ${process.env.STEAMSHIP_API_KEY}`
91+
},
92+
body: JSON.stringify({
93+
question: prompt,
94+
chat_session_id: chatSessionId
95+
})
96+
});
97+
98+
if (response.ok) {
99+
const responseText = await response.text()
100+
const responseBlocks = JSON.parse(responseText)
101+
return NextResponse.json(responseBlocks)
102+
} else {
103+
return returnError(500, await response.text())
104+
}
105+
}

src/components/ChatBlock.tsx

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Represents a unit of multimodal chat: text, video, audio, or image.
3+
*
4+
* For streaming responses, just update the `text` argument.
5+
*/
6+
export function ChatBlock({text, mimeType, url} : {
7+
text?: string,
8+
mimeType?: string,
9+
url?: string
10+
}) {
11+
let internalComponent = <></>
12+
if (text) {
13+
internalComponent = <span>{text}</span>
14+
} else if (mimeType && url) {
15+
if (mimeType.startsWith("audio")) {
16+
internalComponent = <audio controls={true} src={url} />
17+
} else if (mimeType.startsWith("video")) {
18+
internalComponent = <video controls width="250">
19+
<source src={url} type={mimeType} />
20+
Download the <a href={url}>video</a>
21+
</video>
22+
} else if (mimeType.startsWith("image")) {
23+
internalComponent = <img src={url} />
24+
}
25+
} else if (url) {
26+
internalComponent = <a href={url}>Link</a>
27+
}
28+
29+
return (
30+
<p className="text-sm text-gray-200 pb-2">
31+
{internalComponent}
32+
</p>
33+
);
34+
}
35+
36+
/*
37+
* Take a completion, which may be a string, JSON encoded as a string, or JSON object,
38+
* and produce a list of ChatBlock objects. This is intended to be a one-size-fits-all
39+
* method for funneling different LLM output into structure that supports different media
40+
* types and can easily grow to support more metadata (such as speaker).
41+
*/
42+
export function responseToChatBlocks(completion: any) {
43+
// First we try to parse completion as JSON in case we're dealing with an object.
44+
console.log("got completoin", completion, typeof completion)
45+
if (typeof completion == "string") {
46+
try {
47+
completion = JSON.parse(completion)
48+
} catch {
49+
// Do nothing; we'll just treat it as a string.
50+
console.log("Couldn't parse")
51+
}
52+
}
53+
let blocks = []
54+
if (typeof completion == "string") {
55+
console.log("still string")
56+
blocks.push(<ChatBlock text={completion} />)
57+
} else if (Array.isArray(completion)) {
58+
console.log("Is array")
59+
for (let block of completion) {
60+
console.log(block)
61+
blocks.push(<ChatBlock {...block} />)
62+
}
63+
} else {
64+
blocks.push(<ChatBlock {...completion} />)
65+
}
66+
console.log(blocks)
67+
return blocks
68+
}
69+

src/components/Examples.tsx

+6-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ export default function Examples() {
2020
imageUrl: "",
2121
llm: "",
2222
phone: "",
23+
telegramLink: null
2324
},
2425
]);
2526

@@ -34,6 +35,7 @@ export default function Examples() {
3435
imageUrl: entry.imageUrl,
3536
llm: entry.llm,
3637
phone: entry.phone,
38+
telegramLink: entry.telegramLink
3739
}));
3840
setExamples(setme);
3941
} catch (err) {
@@ -80,7 +82,10 @@ export default function Examples() {
8082
<dl className="mt-1 flex flex-grow flex-col justify-between">
8183
<dt className="sr-only"></dt>
8284
<dd className="text-sm text-slate-400">
83-
{example.title}. Running on <b>{example.llm}</b>
85+
{example.title}. Running on <b>{example.llm}</b>.
86+
{example.telegramLink && (
87+
<span className="ml-1"><a onClick={(event) => {event?.stopPropagation(); event?.preventDefault}} href={example.telegramLink}>Chat on <b>Telegram</b></a>.</span>
88+
)}
8489
</dd>
8590
</dl>
8691
<dl className="mt-1 flex flex-grow flex-col justify-between">

src/components/QAModal.tsx

+17-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
"use client";
22

3-
import { Fragment, useEffect } from "react";
3+
import {Fragment, useEffect, useState} from "react";
44
import { Dialog, Transition } from "@headlessui/react";
55
import { useCompletion } from "ai/react";
6+
import {ChatBlock, responseToChatBlocks} from "@/components/ChatBlock";
67

78
var last_name = "";
89

@@ -36,6 +37,17 @@ export default function QAModal({
3637
headers: { name: example.name },
3738
});
3839

40+
let [blocks, setBlocks] = useState<ChatBlock[] | null>(null)
41+
42+
useEffect(() => {
43+
// When the completion changes, parse it to multimodal blocks for display.
44+
if (completion) {
45+
setBlocks(responseToChatBlocks(completion))
46+
} else {
47+
setBlocks(null)
48+
}
49+
}, [completion])
50+
3951
if (!example) {
4052
console.log("ERROR: no companion selected");
4153
return null;
@@ -82,7 +94,7 @@ export default function QAModal({
8294
className={"w-full flex-auto rounded-md border-0 bg-white/5 px-3.5 py-2 shadow-sm focus:outline-none sm:text-sm sm:leading-6 " + (isLoading && !completion ? "text-gray-600 cursor-not-allowed" : "text-white")}
8395
value={input}
8496
onChange={handleInputChange}
85-
disabled={isLoading && !completion}
97+
disabled={isLoading && !blocks}
8698
/>
8799
</form>
88100
<div className="mt-3 sm:mt-5">
@@ -91,13 +103,13 @@ export default function QAModal({
91103
Chat with {example.name}
92104
</p>
93105
</div>
94-
{completion && (
106+
{blocks && (
95107
<div className="mt-2">
96-
<p className="text-sm text-gray-200">{completion}</p>
108+
{blocks}
97109
</div>
98110
)}
99111

100-
{isLoading && !completion && (
112+
{isLoading && !blocks && (
101113
<p className="flex items-center justify-center mt-4">
102114
<svg
103115
className="animate-spin -ml-1 mr-3 h-5 w-5 text-white"

src/components/actions.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import { parse } from "path";
99
export async function getCompanions() {
1010
const COMPFILE = "./companions/companions.json";
1111
var companions = [];
12-
console.log("Loading companion descriptions from "+COMPFILE);
12+
// console.log("Loading companion descriptions from "+COMPFILE);
1313
var fs = require('fs');
1414
const data = fs.readFileSync(COMPFILE);
1515
console.log(String(data));

0 commit comments

Comments
 (0)