✨ Support for Featherless.ai as inference provider.

wxgeorge · wxgeorge · commit 762207ae848b · 2025-03-24T01:59:59.000-04:00
diff --git a/packages/inference/README.md b/packages/inference/README.md
@@ -48,6 +48,7 @@ You can send inference requests to third-party providers with the inference clie
 
 Currently, we support the following providers:
 - [Fal.ai](https://fal.ai)
+- [Featherless](https://featherless.ai)
 - [Fireworks AI](https://fireworks.ai)
 - [Hyperbolic](https://hyperbolic.xyz)
 - [Nebius](https://studio.nebius.ai)
@@ -76,6 +77,7 @@ When authenticated with a third-party provider key, the request is made directly
 
 Only a subset of models are supported when requesting third-party providers. You can check the list of supported models per pipeline tasks here:
 - [Fal.ai supported models](https://huggingface.co/api/partners/fal-ai/models)
+- [Featherless supported models](https://huggingface.co/api/partners/featherless-ai/models)
 - [Fireworks AI supported models](https://huggingface.co/api/partners/fireworks-ai/models)
 - [Hyperbolic supported models](https://huggingface.co/api/partners/hyperbolic/models)
 - [Nebius supported models](https://huggingface.co/api/partners/nebius/models)
diff --git a/packages/inference/src/lib/makeRequestOptions.ts b/packages/inference/src/lib/makeRequestOptions.ts
@@ -3,6 +3,7 @@ import { BLACK_FOREST_LABS_CONFIG } from "../providers/black-forest-labs";
 import { CEREBRAS_CONFIG } from "../providers/cerebras";
 import { COHERE_CONFIG } from "../providers/cohere";
 import { FAL_AI_CONFIG } from "../providers/fal-ai";
+import { FEATHERLESS_AI_CONFIG } from "../providers/featherless-ai";
 import { FIREWORKS_AI_CONFIG } from "../providers/fireworks-ai";
 import { HF_INFERENCE_CONFIG } from "../providers/hf-inference";
 import { HYPERBOLIC_CONFIG } from "../providers/hyperbolic";
@@ -33,6 +34,7 @@ const providerConfigs: Record<InferenceProvider, ProviderConfig> = {
 	cerebras: CEREBRAS_CONFIG,
 	cohere: COHERE_CONFIG,
 	"fal-ai": FAL_AI_CONFIG,
+	"featherless-ai": FEATHERLESS_AI_CONFIG,
 	"fireworks-ai": FIREWORKS_AI_CONFIG,
 	"hf-inference": HF_INFERENCE_CONFIG,
 	hyperbolic: HYPERBOLIC_CONFIG,
diff --git a/packages/inference/src/providers/consts.ts b/packages/inference/src/providers/consts.ts
@@ -20,6 +20,7 @@ export const HARDCODED_MODEL_ID_MAPPING: Record<InferenceProvider, Record<ModelI
 	cerebras: {},
 	cohere: {},
 	"fal-ai": {},
+	"featherless-ai": {},
 	"fireworks-ai": {},
 	"hf-inference": {},
 	hyperbolic: {},
diff --git a/packages/inference/src/providers/featherless-ai.ts b/packages/inference/src/providers/featherless-ai.ts
@@ -0,0 +1,51 @@
+/**
+ * See the registered mapping of HF model ID => Featherless model ID here:
+ *
+ * https://huggingface.co/api/partners/featherless/models
+ *
+ * This is a publicly available mapping.
+ *
+ * If you want to try to run inference for a new model locally before it's registered on huggingface.co,
+ * you can add it to the dictionary "HARDCODED_MODEL_ID_MAPPING" in consts.ts, for dev purposes.
+ *
+ * - If you work at Featherless and want to update this mapping, please use the model mapping API we provide on huggingface.co
+ * - If you're a community member and want to add a new supported HF model to Featherless, please open an issue on the present repo
+ * and we will tag Featherless team members.
+ *
+ * Thanks!
+ */
+import type { ProviderConfig, UrlParams, HeaderParams, BodyParams } from "../types";
+
+const FEATHERLESS_API_BASE_URL = "https://api.featherless.ai";
+
+const makeBody = (params: BodyParams): Record<string, unknown> => {
+	const { inputs, parameters, ...args } = params.args;
+
+	if (inputs) {
+		args.prompt = inputs;
+	}
+
+	return {
+		...args,
+		...(parameters as object),
+		model: params.model,
+	};
+};
+
+const makeHeaders = (params: HeaderParams): Record<string, string> => {
+	return { Authorization: `Bearer ${params.accessToken}` };
+};
+
+const makeUrl = (params: UrlParams): string => {
+	if (params.chatCompletion) {
+		return `${params.baseUrl}/v1/chat/completions`;
+	}
+	return `${params.baseUrl}/v1/completions`;
+};
+
+export const FEATHERLESS_AI_CONFIG: ProviderConfig = {
+	baseUrl: FEATHERLESS_API_BASE_URL,
+	makeBody,
+	makeHeaders,
+	makeUrl,
+};
diff --git a/packages/inference/src/tasks/nlp/textGeneration.ts b/packages/inference/src/tasks/nlp/textGeneration.ts
@@ -12,7 +12,7 @@ import { omit } from "../../utils/omit";
 
 export type { TextGenerationInput, TextGenerationOutput };
 
-interface TogeteherTextCompletionOutput extends Omit<ChatCompletionOutput, "choices"> {
+interface OpenAICompatibleTextCompletion extends Omit<ChatCompletionOutput, "choices"> {
 	choices: Array<{
 		text: string;
 		finish_reason: TextGenerationOutputFinishReason;
@@ -35,9 +35,9 @@ export async function textGeneration(
 	args: BaseArgs & TextGenerationInput,
 	options?: Options
 ): Promise<TextGenerationOutput> {
-	if (args.provider === "together") {
+	if (args.provider === "together" || args.provider === "featherless-ai") {
 		args.prompt = args.inputs;
-		const raw = await request<TogeteherTextCompletionOutput>(args, {
+		const raw = await request<OpenAICompatibleTextCompletion>(args, {
 			...options,
 			task: "text-generation",
 		});
diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts
@@ -33,6 +33,7 @@ export const INFERENCE_PROVIDERS = [
 	"cerebras",
 	"cohere",
 	"fal-ai",
+	"featherless-ai",
 	"fireworks-ai",
 	"hf-inference",
 	"hyperbolic",
diff --git a/packages/inference/test/InferenceClient.spec.ts b/packages/inference/test/InferenceClient.spec.ts
@@ -884,6 +884,69 @@ describe.concurrent("InferenceClient", () => {
 		TIMEOUT
 	);
 
+	describe.concurrent(
+		"Featherless",
+		() => {
+			HARDCODED_MODEL_ID_MAPPING['featherless-ai'] = {
+				"meta-llama/Llama-3.1-8B": "meta-llama/Meta-Llama-3.1-8B",
+				"meta-llama/Llama-3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+			};
+
+			it("chatCompletion", async () => {
+				const res = await chatCompletion({
+					accessToken: env.HF_FEATHERLESS_KEY ?? "dummy",
+					model: "meta-llama/Llama-3.1-8B-Instruct",
+					provider: "featherless-ai",
+					messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
+					temperature: 0.1,
+				});
+
+				expect(res).toBeDefined();
+				expect(res.choices).toBeDefined();
+				expect(res.choices?.length).toBeGreaterThan(0);
+
+				if (res.choices && res.choices.length > 0) {
+					const completion = res.choices[0].message?.content;
+					expect(completion).toBeDefined();
+					expect(typeof completion).toBe("string");
+					expect(completion).toContain("two");
+				}
+			});
+
+			it("chatCompletion stream", async () => {
+				const stream = chatCompletionStream({
+					accessToken: env.HF_FEATHERLESS_KEY ?? "dummy",
+					model: "meta-llama/Llama-3.1-8B-Instruct",
+					provider: "featherless-ai",
+					messages: [{ role: "user", content: "Complete the equation 1 + 1 = , just the answer" }],
+				}) as AsyncGenerator<ChatCompletionStreamOutput>;
+				let out = "";
+				for await (const chunk of stream) {
+					if (chunk.choices && chunk.choices.length > 0) {
+						out += chunk.choices[0].delta.content;
+					}
+				}
+				expect(out).toContain("2");
+			});
+
+			it("textGeneration", async () => {
+				const res = await textGeneration({
+					accessToken: env.HF_FEATHERLESS_KEY ?? "dummy",
+					model: "meta-llama/Llama-3.1-8B",
+					provider: "featherless-ai",
+					inputs: "Paris is",
+					parameters: {
+						temperature: 0,
+						top_p: 0.01,
+						max_tokens: 10,
+					},
+				});
+				expect(res).toMatchObject({ generated_text: " a city of romance, art, and culture." });
+			});
+		},
+		TIMEOUT
+	);
+
 	describe.concurrent(
 		"Replicate",
 		() => {
diff --git a/packages/tasks/src/inference-providers.ts b/packages/tasks/src/inference-providers.ts
@@ -4,6 +4,7 @@ const INFERENCE_PROVIDERS = [
 	"cerebras",
 	"cohere",
 	"fal-ai",
+	"featherless-ai",
 	"fireworks-ai",
 	"hf-inference",
 	"hyperbolic",