diff --git a/packages/inference/README.md b/packages/inference/README.md index 241ba7a52..cccb01e57 100644 --- a/packages/inference/README.md +++ b/packages/inference/README.md @@ -48,6 +48,7 @@ You can send inference requests to third-party providers with the inference clie Currently, we support the following providers: - [Fal.ai](https://fal.ai) +- [Featherless AI](https://featherless.ai) - [Fireworks AI](https://fireworks.ai) - [Hyperbolic](https://hyperbolic.xyz) - [Nebius](https://studio.nebius.ai) @@ -76,6 +77,7 @@ When authenticated with a third-party provider key, the request is made directly Only a subset of models are supported when requesting third-party providers. You can check the list of supported models per pipeline tasks here: - [Fal.ai supported models](https://huggingface.co/api/partners/fal-ai/models) +- [Featherless AI supported models](https://huggingface.co/api/partners/featherless-ai/models) - [Fireworks AI supported models](https://huggingface.co/api/partners/fireworks-ai/models) - [Hyperbolic supported models](https://huggingface.co/api/partners/hyperbolic/models) - [Nebius supported models](https://huggingface.co/api/partners/nebius/models) diff --git a/packages/inference/src/lib/getProviderHelper.ts b/packages/inference/src/lib/getProviderHelper.ts index 8c345208a..d60f7e305 100644 --- a/packages/inference/src/lib/getProviderHelper.ts +++ b/packages/inference/src/lib/getProviderHelper.ts @@ -2,9 +2,9 @@ import * as BlackForestLabs from "../providers/black-forest-labs"; import * as Cerebras from "../providers/cerebras"; import * as Cohere from "../providers/cohere"; import * as FalAI from "../providers/fal-ai"; +import * as FeatherlessAI from "../providers/featherless-ai"; import * as Fireworks from "../providers/fireworks-ai"; import * as HFInference from "../providers/hf-inference"; - import * as Hyperbolic from "../providers/hyperbolic"; import * as Nebius from "../providers/nebius"; import * as Novita from "../providers/novita"; @@ -62,6 +62,10 @@ export const PROVIDERS: Record { + choices: Array<{ + text: string; + finish_reason: TextGenerationOutputFinishReason; + seed: number; + logprobs: unknown; + index: number; + }>; +} + +const FEATHERLESS_API_BASE_URL = "https://api.featherless.ai"; + +export class FeatherlessAIConversationalTask extends BaseConversationalTask { + constructor() { + super("featherless-ai", FEATHERLESS_API_BASE_URL); + } +} + +export class FeatherlessAITextGenerationTask extends BaseTextGenerationTask { + constructor() { + super("featherless-ai", FEATHERLESS_API_BASE_URL); + } + + override preparePayload(params: BodyParams): Record { + return { + ...params.args, + ...(params.args.parameters as Record), + model: params.model, + prompt: params.args.inputs, + }; + } + + override async getResponse(response: FeatherlessAITextCompletionOutput): Promise { + if ( + typeof response === "object" && + "choices" in response && + Array.isArray(response?.choices) && + typeof response?.model === "string" + ) { + const completion = response.choices[0]; + return { + generated_text: completion.text, + }; + } + throw new InferenceOutputError("Expected Featherless AI text generation response format"); + } +} diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts index 1e465c00f..1c4c5ac1e 100644 --- a/packages/inference/src/types.ts +++ b/packages/inference/src/types.ts @@ -42,6 +42,7 @@ export const INFERENCE_PROVIDERS = [ "cerebras", "cohere", "fal-ai", + "featherless-ai", "fireworks-ai", "hf-inference", "hyperbolic", diff --git a/packages/inference/test/InferenceClient.spec.ts b/packages/inference/test/InferenceClient.spec.ts index 73d526f28..28b4deaad 100644 --- a/packages/inference/test/InferenceClient.spec.ts +++ b/packages/inference/test/InferenceClient.spec.ts @@ -1046,6 +1046,79 @@ describe.concurrent("InferenceClient", () => { TIMEOUT ); + describe.concurrent( + "Featherless", + () => { + HARDCODED_MODEL_INFERENCE_MAPPING["featherless-ai"] = { + "meta-llama/Llama-3.1-8B": { + providerId: "meta-llama/Meta-Llama-3.1-8B", + hfModelId: "meta-llama/Llama-3.1-8B", + task: "text-generation", + status: "live", + }, + "meta-llama/Llama-3.1-8B-Instruct": { + providerId: "meta-llama/Meta-Llama-3.1-8B-Instruct", + hfModelId: "meta-llama/Llama-3.1-8B-Instruct", + task: "text-generation", + status: "live", + }, + }; + + it("chatCompletion", async () => { + const res = await chatCompletion({ + accessToken: env.HF_FEATHERLESS_KEY ?? "dummy", + model: "meta-llama/Llama-3.1-8B-Instruct", + provider: "featherless-ai", + messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }], + temperature: 0.1, + }); + + expect(res).toBeDefined(); + expect(res.choices).toBeDefined(); + expect(res.choices?.length).toBeGreaterThan(0); + + if (res.choices && res.choices.length > 0) { + const completion = res.choices[0].message?.content; + expect(completion).toBeDefined(); + expect(typeof completion).toBe("string"); + expect(completion).toContain("two"); + } + }); + + it("chatCompletion stream", async () => { + const stream = chatCompletionStream({ + accessToken: env.HF_FEATHERLESS_KEY ?? "dummy", + model: "meta-llama/Llama-3.1-8B-Instruct", + provider: "featherless-ai", + messages: [{ role: "user", content: "Complete the equation 1 + 1 = , just the answer" }], + }) as AsyncGenerator; + let out = ""; + for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + out += chunk.choices[0].delta.content; + } + } + expect(out).toContain("2"); + }); + + it("textGeneration", async () => { + const res = await textGeneration({ + accessToken: env.HF_FEATHERLESS_KEY ?? "dummy", + model: "meta-llama/Llama-3.1-8B", + provider: "featherless-ai", + inputs: "Paris is a city of ", + parameters: { + temperature: 0, + top_p: 0.01, + max_tokens: 10, + }, + }); + expect(res).toMatchObject({ generated_text: "2.2 million people, and it is the" }); + }); + }, + TIMEOUT + ); + describe.concurrent( "Replicate", () => { diff --git a/packages/inference/test/tapes.json b/packages/inference/test/tapes.json index b8096f13d..0bd502715 100644 --- a/packages/inference/test/tapes.json +++ b/packages/inference/test/tapes.json @@ -7456,5 +7456,97 @@ "server": "UploadServer" } } + }, + "114810cb7fc73df3a897d2100ff6d2aa61072ceacd2ac8419cd7d0301b5f2038": { + "url": "https://api.featherless.ai/v1/chat/completions", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST", + "body": "{\"messages\":[{\"role\":\"user\",\"content\":\"Complete this sentence with words, one plus one is equal \"}],\"temperature\":0.1,\"model\":\"meta-llama/Meta-Llama-3.1-8B-Instruct\"}" + }, + "response": { + "body": "{\"id\":\"XA2hY8\",\"object\":\"chat.completion\",\"created\":1744738127375,\"model\":\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"...to two.\"},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"system_fingerprint\":\"\",\"usage\":{\"prompt_tokens\":17,\"completion_tokens\":4,\"total_tokens\":21}}", + "status": 200, + "statusText": "OK", + "headers": { + "access-control-allow-credentials": "true", + "cache-control": "no-cache", + "cf-cache-status": "DYNAMIC", + "cf-ray": "930d294a9afdaca5-YYZ", + "connection": "keep-alive", + "content-encoding": "br", + "content-type": "application/json", + "nel": "{\"success_fraction\":0,\"report_to\":\"cf-nel\",\"max_age\":604800}", + "report-to": "{\"endpoints\":[{\"url\":\"https:\\/\\/a.nel.cloudflare.com\\/report\\/v4?s=YgZmhbIzMEx2tnSIcGrXly33AJVRZLvogEbxvuKRW%2BywW%2FXtW2MjGmB7VQTK%2FnV6pP4tLAwxJ7L0856uiUvl8EamnWgFbIUky0%2BIsUklVJT6DVUmQm%2B3h0fJHq9qZgRT3eZWEw%3D%3D\"}],\"group\":\"cf-nel\",\"max_age\":604800}", + "server": "cloudflare", + "server-timing": "cfL4;desc=\"?proto=TCP&rtt=29041&min_rtt=23313&rtt_var=12834&sent=4&recv=5&lost=0&retrans=0&sent_bytes=2847&recv_bytes=1024&delivery_rate=124222&cwnd=218&unsent_bytes=0&cid=e073b0090fe1f02c&ts=2740&x=0\"", + "strict-transport-security": "max-age=15724800; includeSubDomains", + "transfer-encoding": "chunked", + "vary": "Origin" + } + } + }, + "36395857d7dc0298b346b9b2755ac22dede1384a02249b01c22958e9b6dcc27d": { + "url": "https://api.featherless.ai/v1/chat/completions", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST", + "body": "{\"messages\":[{\"role\":\"user\",\"content\":\"Complete the equation 1 + 1 = , just the answer\"}],\"stream\":true,\"model\":\"meta-llama/Meta-Llama-3.1-8B-Instruct\"}" + }, + "response": { + "body": ": FEATHERLESS PROCESSING\n: FEATHERLESS PROCESSING\ndata: {\"id\":\"T1R1ho\",\"object\":\"chat.completion.chunk\",\"created\":1744738127478,\"model\":\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\"choices\":[{\"delta\":{\"role\":\"assistant\",\"content\":\"\"},\"index\":0,\"finish_reason\":null}]}\n\ndata: {\"id\":\"T1R1ho\",\"object\":\"chat.completion.chunk\",\"created\":1744738128780,\"model\":\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\"choices\":[{\"delta\":{\"content\":\"\"},\"index\":0,\"finish_reason\":null}]}\n\ndata: {\"id\":\"T1R1ho\",\"object\":\"chat.completion.chunk\",\"created\":1744738129059,\"model\":\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\"choices\":[{\"delta\":{\"content\":\"\"},\"index\":0,\"finish_reason\":null}]}\n\ndata: {\"id\":\"T1R1ho\",\"object\":\"chat.completion.chunk\",\"created\":1744738129092,\"model\":\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\"choices\":[{\"delta\":{\"content\":\"2\"},\"index\":0,\"finish_reason\":null}]}\n\ndata: {\"id\":\"T1R1ho\",\"object\":\"text_completion\",\"created\":1744738129671,\"model\":\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\"choices\":[{\"delta\":{},\"index\":0,\"logprobs\":null,\"finish_reason\":\"stop\"}]}\n\ndata: [DONE]\n\n", + "status": 200, + "statusText": "OK", + "headers": { + "access-control-allow-credentials": "true", + "cache-control": "no-cache", + "cf-cache-status": "DYNAMIC", + "cf-ray": "930d294aadf5eb61-ORD", + "connection": "keep-alive", + "content-type": "text/event-stream", + "nel": "{\"success_fraction\":0,\"report_to\":\"cf-nel\",\"max_age\":604800}", + "report-to": "{\"endpoints\":[{\"url\":\"https:\\/\\/a.nel.cloudflare.com\\/report\\/v4?s=7awdH7G%2FfIt%2F6A%2FCcVcyItEaJLW%2FzD5RcOVIIWM4wkWBqx6BV9BaI18U%2BTFlSiQfa5X5jEnLLmSTdgr7E7M%2B38UIxgNDezqK45dJvwzVF4pjFps1xPMG%2F2C0Vz5ylgIGs4ZxCA%3D%3D\"}],\"group\":\"cf-nel\",\"max_age\":604800}", + "server": "cloudflare", + "server-timing": "cfL4;desc=\"?proto=TCP&rtt=35139&min_rtt=32588&rtt_var=14043&sent=4&recv=5&lost=0&retrans=0&sent_bytes=2847&recv_bytes=1010&delivery_rate=88867&cwnd=222&unsent_bytes=0&cid=70e9c03ed3d0c745&ts=3251&x=0\"", + "strict-transport-security": "max-age=15724800; includeSubDomains", + "transfer-encoding": "chunked", + "vary": "Origin" + } + } + }, + "9e6bef930cb9728402e4e3963ee107398bee0776948025369cc2036ba7ada176": { + "url": "https://api.featherless.ai/v1/completions", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST", + "body": "{\"inputs\":\"Paris is a city of \",\"parameters\":{\"temperature\":0,\"top_p\":0.01,\"max_tokens\":10},\"temperature\":0,\"top_p\":0.01,\"max_tokens\":10,\"model\":\"meta-llama/Meta-Llama-3.1-8B\",\"prompt\":\"Paris is a city of \"}" + }, + "response": { + "body": "{\"id\":\"djPvCp\",\"object\":\"text_completion\",\"created\":1744738132411,\"model\":\"meta-llama/Meta-Llama-3.1-8B\",\"choices\":[{\"index\":0,\"text\":\"2.2 million people, and it is the\",\"logprobs\":null,\"finish_reason\":\"stop\"}],\"system_fingerprint\":\"\",\"usage\":{\"prompt_tokens\":6,\"completion_tokens\":10,\"total_tokens\":16}}", + "status": 200, + "statusText": "OK", + "headers": { + "access-control-allow-credentials": "true", + "cache-control": "no-cache", + "cf-cache-status": "DYNAMIC", + "cf-ray": "930d294adddb7ca5-EWR", + "connection": "keep-alive", + "content-encoding": "br", + "content-type": "application/json", + "nel": "{\"success_fraction\":0,\"report_to\":\"cf-nel\",\"max_age\":604800}", + "report-to": "{\"endpoints\":[{\"url\":\"https:\\/\\/a.nel.cloudflare.com\\/report\\/v4?s=UDo4w4zRyJSxdhlgdtsAf%2BjJWPk%2FAigX2XxpJEdzRlxnbUe9RbZin%2FuOch0VxhRsjz8hCeqe1BShVb%2BzxblGyosdydMFFThBXM9uBlHs%2BwNmyOB5NYfB8jbsbpiuwClYHqT77w%3D%3D\"}],\"group\":\"cf-nel\",\"max_age\":604800}", + "server": "cloudflare", + "server-timing": "cfL4;desc=\"?proto=TCP&rtt=43135&min_rtt=40171&rtt_var=17181&sent=4&recv=5&lost=0&retrans=0&sent_bytes=2847&recv_bytes=1060&delivery_rate=72091&cwnd=251&unsent_bytes=0&cid=fc8be36f821730f7&ts=6031&x=0\"", + "strict-transport-security": "max-age=15724800; includeSubDomains", + "transfer-encoding": "chunked", + "vary": "Origin" + } + } } } \ No newline at end of file