363 lines
No EOL
11 KiB
Perl
363 lines
No EOL
11 KiB
Perl
package OpenAIAsync::Server;
|
|
|
|
use v5.36.0;
|
|
use Object::Pad;
|
|
use IO::Async::SSL; # We're not directly using it but I want to enforce that we pull it in when detecting dependencies, since openai itself is always https
|
|
use Future::AsyncAwait;
|
|
use IO::Async;
|
|
|
|
use OpenAIAsync::Types::Results;
|
|
use OpenAIAsync::Types::Requests;
|
|
|
|
our $VERSION = '0.02';
|
|
|
|
# ABSTRACT: Async server for OpenAI style REST API for various AI systems (LLMs, Images, Video, etc.)
|
|
|
|
=pod
|
|
|
|
=head1 NAME
|
|
|
|
OpenAIAsync::Server - IO::Async based server for OpenAI compatible APIs
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use IO::Async::Loop;
|
|
use OpenAIAsync::Server;
|
|
use builtin qw/true false/;
|
|
|
|
my $loop = IO::Async::Loop->new();
|
|
|
|
class MyServer {
|
|
inherit OpenAIAsync::Server;
|
|
|
|
method init() {
|
|
# We return the info on where we should be listening, and any other settings for Net::Async::HTTP::Server
|
|
|
|
return [
|
|
{
|
|
port => "8085",
|
|
listen => "127.0.0.1",
|
|
...
|
|
},
|
|
...
|
|
];
|
|
}
|
|
|
|
async method auth_check($key, $http_req) {
|
|
# we can implement any auth checks that we need here
|
|
return true; # AIs need to be free!
|
|
}
|
|
|
|
async method chat_completion($chat_completion_req) {
|
|
...
|
|
|
|
return $chat_completion_response;
|
|
}
|
|
|
|
...
|
|
}
|
|
|
|
my $server = MyServer->new();
|
|
|
|
$loop->add($server);
|
|
|
|
$loop->run()
|
|
|
|
=head1 THEORY OF OPERATION
|
|
|
|
This module implements the L<IO::Async::Notifier> interface, this means that you create a new server and then call C<< $loop->add($client) >>
|
|
this casues all L<Future>s that are created to be part of the L<IO::Async::Loop> of your program. This way when you call C<await> on any method
|
|
it will properly suspend the execution of your program and do something else concurrently (probably waiting on requests).
|
|
|
|
All the methods that you provide must be async, this is so that multiple requests can be easily handled internally with ease.
|
|
|
|
You subclass the ::Server class and implement your own methods for handling the limited number of requests, and provide some
|
|
configuration information for the servers to start in the configure() method. The reason to put this into a method is so that
|
|
The server object can handle reloading itself without being recreated. This way any sockets that are already listening don't have
|
|
to be closed and reopened to be reconfigured (assuming that the new configuration keeps them open).
|
|
|
|
Streaming from ::Server is still being designed, I'll publish this system WITHOUT streaming support the first time around
|
|
since I need to write at least a new http client module that supports it in order to test things properly, and the make OpenAIAsync::Client
|
|
work with streaming events anyway.
|
|
|
|
=head1 Methods
|
|
|
|
=head2 new()
|
|
|
|
Create a new OpenAIAsync::Server. You'll need to register the server with C<< $loop->add($server) >> after creation.
|
|
|
|
=head3 PARAMETERS
|
|
|
|
=over 4
|
|
|
|
=item * ?
|
|
|
|
TODO FILL IN
|
|
|
|
=back
|
|
|
|
=head2 init($loop)
|
|
|
|
The intialization phase, this happens before the configure() method is called, so that you can setup any local data stores and validate
|
|
the environment. This will only be called once, but configure() may be called multiple times, such as from a signal handler (SIGHUP or SIGUSR1?)
|
|
to reload any configuration for the HTTP endpoints.
|
|
|
|
Not completely sure if I should have these separated but I kept not liking doing it all in one method for some reason. Maybe fold this into BUILD {} instead?
|
|
|
|
=back
|
|
|
|
=head2 configure($loop)
|
|
|
|
The configuration phase, returns a list of the arguments to be given to Net::Async::HTTP::Server
|
|
|
|
TODO bring in some docuemtation, this is derived from C<IO::Async::Loop> 's listen method, and we also need to refer to the connect method for more details
|
|
basically, just C<< listen => [addr => '::1', port => 8080, socktype => 'stream'] >> as a basic example.
|
|
|
|
We handle multiple of these so we can have this proxy listen on multiple ports, and then handle any other behavior for it
|
|
|
|
The context key will be passed to all of the methods doing the handling here, so you can use it for static data based on which
|
|
connection actually was used. This should help handle multiple networks/vlans or auth patterns a bit easier from a single daemon.
|
|
|
|
I think this should also let you pass in a listening handle from another module like IO::Socket::SSL or something to do native SSL
|
|
But I'm not going to be testing that for some time myself, and would recommend using something else like nginx, haproxy, etc. as a
|
|
terminator for TLS connections
|
|
|
|
[
|
|
{
|
|
listen => {addrs => ['::1', ...], ...},
|
|
context => {...}
|
|
on_something => code
|
|
},
|
|
...
|
|
]
|
|
|
|
=back
|
|
|
|
=head2 auth_check($key, $ctx, $http_req)
|
|
|
|
This method requres async keyword.
|
|
|
|
Return a true or false value on if a request should be authorized. This is given the API-Key value from the Authorization header
|
|
|
|
if $key is undef then you can use the $http_req object to get a look at the full headers, so you can implement whatever other authorization system you need
|
|
|
|
By default all OpenAI compatible APIs are expecting ONLY an API-Key type Authorization header so doing anytyhing else isn't strictly compatible but there's no reason that this server shouldn't be able to do more if you're customizing things
|
|
|
|
=head2 async completion($ctx, $completion)
|
|
|
|
=head3 DEPRECATED
|
|
|
|
This method requres async keyword.
|
|
|
|
Handle a completion request, takes in a request object, must return a response object.
|
|
|
|
=head2 async chat($ctx, $chat_completion)
|
|
|
|
This method requres async keyword.
|
|
|
|
Handle a chat completion request
|
|
|
|
=head2 async embedding($ctx, $embedding)
|
|
|
|
This method requres async keyword.
|
|
|
|
Handle an embedding request
|
|
|
|
=head2 async image_generate
|
|
|
|
This method requres async keyword.
|
|
|
|
Unimplemented, but once present will be used to generate images with Dall-E (or for self hosted, stable diffusion).
|
|
|
|
=head2 async text_to_speech
|
|
|
|
This method requres async keyword.
|
|
|
|
Unimplemented, but can be used to turn text to speech using whatever algorithms/models are supported.
|
|
|
|
=head2 async speech_to_text
|
|
|
|
This method requres async keyword.
|
|
|
|
Unimplemented. The opposite of the above.
|
|
|
|
=head2 async vision
|
|
|
|
This method requres async keyword.
|
|
|
|
Unimplemented, I've not investigated this one much yet but I believe it's to get a description of an image and it's contents.
|
|
|
|
=head2 Missing apis
|
|
|
|
At least some for getting the list of models and some other meta information, those will be added next after I get some more documentation written
|
|
|
|
=head1 See Also
|
|
|
|
L<IO::Async>, L<Future::AsyncAwait>, L<Net::Async::HTTP>
|
|
|
|
=head1 License
|
|
|
|
Artistic 2.0
|
|
|
|
=head1 Author
|
|
|
|
Ryan Voots, ... etc.
|
|
|
|
=cut
|
|
|
|
class OpenAIAsync::Server :repr(HASH) :isa(IO::Async::Notifier) :strict(params) {
|
|
use JSON::MaybeXS qw//;
|
|
use Net::Async::HTTP;
|
|
use Feature::Compat::Try;
|
|
use URI;
|
|
|
|
field $_json = JSON::MaybeXS->new(utf8 => 1, convert_blessed => 1);
|
|
field $http;
|
|
|
|
# TODO document these directly, other options gets mixed in BEFORE all of these
|
|
field $_http_max_in_flight :param(http_max_in_flight) = 2;
|
|
field $_http_max_redirects :param(http_max_redirects) = 3;
|
|
field $_http_max_connections_per_host :param(http_max_connections_per_host) = 2;
|
|
field $_http_timeout :param(http_timeout) = 120; # My personal server is kinda slow, use a generous default
|
|
field $_http_stall_timeout :param(http_stall_timeout) = 600; # generous for my slow personal server
|
|
field $_http_other :param(http_other_options) = {};
|
|
field $_http_user_agent :param(http_user_agent) = __PACKAGE__." Perl/$VERSION (Net::Async::HTTP/".$Net::Async::HTTP::VERSION." IO::Async/".$IO::Async::VERSION." Perl/$])";
|
|
|
|
field $api_base :param(api_base) = $ENV{OPENAI_API_BASE} // "https://api.openai.com/v1";
|
|
field $api_key :param(api_key) = $ENV{OPENAI_API_KEY};
|
|
|
|
field $api_org_name :param(api_org_name) = undef;
|
|
|
|
field $io_async_notifier_params :param = undef;
|
|
|
|
method configure(%params) {
|
|
# We require them to go this way, so that there is no conflicts
|
|
# TODO document this
|
|
my %io_async_params = ($params{io_async_notifier_params} // {})->%*;
|
|
IO::Async::Notifier::configure($self, %io_async_params);
|
|
}
|
|
|
|
method __make_http() {
|
|
die "Missing API Key for OpenAI" unless $api_key;
|
|
|
|
return Net::Async::HTTP->new(
|
|
$_http_other->%*,
|
|
user_agent => "SNN OpenAI Client 1.0",
|
|
+headers => {
|
|
"Authorization" => "Bearer $api_key",
|
|
"Content-Type" => "application/json",
|
|
$api_org_name ? (
|
|
'OpenAI-Organization' => $api_org_name,
|
|
) : ()
|
|
},
|
|
max_redirects => $_http_max_redirects,
|
|
max_connections_per_host => $_http_max_connections_per_host,
|
|
max_in_flight => $_http_max_in_flight,
|
|
timeout => $_http_timeout,
|
|
stall_timeout => $_http_stall_timeout,
|
|
)
|
|
}
|
|
|
|
ADJUST {
|
|
$http = $self->__make_http;
|
|
|
|
$api_base =~ s|/$||; # trim an accidental final / since we will be putting it on the endpoints
|
|
}
|
|
|
|
async method _make_request($endpoint, $data) {
|
|
my $json = $_json->encode($data);
|
|
|
|
my $url = URI->new($api_base . $endpoint );
|
|
|
|
my $result = await $http->do_request(
|
|
uri => $url,
|
|
method => "POST",
|
|
content => $json,
|
|
content_type => 'application/json',
|
|
);
|
|
|
|
if ($result->is_success) {
|
|
my $json = $result->decoded_content;
|
|
my $out_data = $_json->decode($json);
|
|
|
|
return $out_data;
|
|
} else {
|
|
die "Failure in talking to OpenAI service: ".$result->status_line.": ".$result->decoded_content;
|
|
}
|
|
}
|
|
|
|
method _add_to_loop($loop) {
|
|
$loop->add($http);
|
|
}
|
|
|
|
method _remove_from_loop($loop) {
|
|
$loop->remove($http);
|
|
$http = $self->__make_http; # overkill? want to make sure we have a clean one
|
|
}
|
|
|
|
# This is the legacy completion api
|
|
async method completion($input) {
|
|
|
|
if (ref($input) eq 'HASH') {
|
|
$input = OpenAIAsync::Types::Requests::Completion->new($input->%*);
|
|
} elsif (ref($input) eq 'OpenAIAsync::Types::Requests::Completion') {
|
|
# dummy, nothing to do
|
|
} else {
|
|
die "Unsupported input type [".ref($input)."]";
|
|
}
|
|
|
|
my $data = await $self->_make_request($input->_endpoint(), $input);
|
|
|
|
my $type_result = OpenAIAsync::Types::Results::Completion->new($data->%*);
|
|
|
|
return $type_result;
|
|
}
|
|
|
|
async method chat($input) {
|
|
if (ref($input) eq 'HASH') {
|
|
$input = OpenAIAsync::Types::Requests::ChatCompletion->new($input->%*);
|
|
} elsif (ref($input) eq 'OpenAIAsync::Types::Requests::ChatCompletion') {
|
|
# dummy, nothing to do
|
|
} else {
|
|
die "Unsupported input type [".ref($input)."]";
|
|
}
|
|
|
|
my $data = await $self->_make_request($input->_endpoint(), $input);
|
|
|
|
my $type_result = OpenAIAsync::Types::Results::ChatCompletion->new($data->%*);
|
|
|
|
return $type_result;
|
|
}
|
|
|
|
async method embedding($input) {
|
|
if (ref($input) eq 'HASH') {
|
|
$input = OpenAIAsync::Types::Requests::Embedding->new($input->%*);
|
|
} elsif (ref($input) eq 'OpenAIAsync::Types::Requests::Embedding') {
|
|
# dummy, nothing to do
|
|
} else {
|
|
die "Unsupported input type [".ref($input)."]";
|
|
}
|
|
|
|
my $data = await $self->_make_request($input->_endpoint(), $input);
|
|
|
|
my $type_result = OpenAIAsync::Types::Results::Embedding->new($data->%*);
|
|
|
|
return $type_result;
|
|
}
|
|
|
|
async method image_generate($input) {
|
|
...
|
|
}
|
|
|
|
async method text_to_speech($text) {
|
|
...
|
|
}
|
|
|
|
async method speech_to_text($sound_data) {
|
|
...
|
|
}
|
|
|
|
async method vision($image, $prompt) {
|
|
...
|
|
}
|
|
} |