diff --git a/.gitignore b/.gitignore
index 68bc17f..2129d58 100644
--- a/.gitignore
+++ b/.gitignore
@@ -158,3 +158,5 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+.vscode
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d249b75
--- /dev/null
+++ b/README.md
@@ -0,0 +1,63 @@
+# 🌺 **Petals Server**
+
+![Petals Logo](<INSERT_IMAGE_PLACEHOLDER_HERE>.png)
+
+**One decentralized tool for text generation and community collaboration**
+
+## Table of Contents
+1. [Introduction](#intro)
+2. [Requirements](#requirements)
+3. [Installation](#installation)
+4. [Usage](#usage)
+5. [License](#license)
+6. [Contact](#contact)
+7. [Endpoints](#endpoints)
+
+---
+
+## 🌺 **Introduction** (<span id="intro">)</span>
+
+Petals is a decentralized text generation network designed to connect users with large language models, allowing them to harness the power of the community for efficient and collaborative text generation. With Petals Server, you can share your hardware resources (CPU and GPU) to contribute to the network while also utilizing it to generate text on demand.
+
+---
+
+## 🌺 **Requirements** (<span id="requirements">)</span>
+
+To get started with Petals Server, ensure you have the following prerequisites:
+- Git for cloning the repository
+- Python 3.11 or higher
+- Operating system: Linux, macOS, or Windows with WSL (Windows Subsystem for Linux)
+
+---
+
+## 🌺 **Installation** (<span id="installation">)</span>
+
+Follow these steps to install Petals Server on your local machine:
+1. Clone the Git repository using `git clone https://github.com/ParisNeo/petals_server.git`
+2. Navigate into the cloned directory (`cd petals_server`)
+3. Install dependencies with pip by running `pip install -e .`
+4. Launch the server with `petals_server`
+
+---
+
+## 🌺 **Usage** (<span id="usage">)</span>
+
+Once installed, you can use Petals Server as a decentralized text generation client and contribute your hardware resources to the network.
+
+---
+
+## 🌺 **License** (<span id="license">)</span>
+
+Petals Server is licensed under the [Apache License v2.0](https://www.apache.org/licenses/LICENSE-2.0).
+
+---
+
+## 🌺 **Contact** (<span id="contact">)</span>
+
+For any queries or feedback, reach out to ParisNeo on Twitter (@SpaceNerduino), Discord (https://discord.gg/BDxacQmv), or subscribe to the r/lollms Subreddit for community updates and discussions.
+
+---
+
+## 🌺 **Endpoints** (<span id="endpoints">)</span>
+
+To explore all available endpoints, navigate to `http://localhost:8000/docs`.
diff --git a/config.ini b/config.ini
new file mode 100644
index 0000000..eeea4d6
--- /dev/null
+++ b/config.ini
@@ -0,0 +1,7 @@
+[SERVER1]
+url = http://localhost:11434
+
+[SERVER2]
+url = http://localhost:3002
+
+# Add more servers as you need.
diff --git a/ollama_proxy_server/main.py b/ollama_proxy_server/main.py
new file mode 100644
index 0000000..41e9f6f
--- /dev/null
+++ b/ollama_proxy_server/main.py
@@ -0,0 +1,84 @@
+import configparser
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from socketserver import ThreadingMixIn
+from urllib.parse import urlparse, parse_qs
+from queue import Queue
+import requests
+import threading
+import argparse
+
+
+def get_config(filename):
+    config = configparser.ConfigParser()
+    config.read(filename)
+    return [(name, {'url': config[name]['url'], 'queue': Queue()}) for name in config.sections()]
+
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config',default="config.ini", help='Path to the config file') # , required=True
+    parser.add_argument('--port', type=int, default=8000, help='Port number for the server')
+    args = parser.parse_args()
+    servers = get_config(args.config)  
+
+    class RequestHandler(BaseHTTPRequestHandler):
+
+        def _send_response(self, response):
+            self.send_response(response.status_code)
+            self.send_header('Content-type', response.headers['content-type'])
+            self.end_headers()
+            self.wfile.write(response.content)
+
+        def do_GET(self):
+            self.proxy()
+
+        def do_POST(self):
+            self.proxy()
+
+        def proxy(self):
+            url = urlparse(self.path)
+            path = url.path
+            get_params = parse_qs(url.query) or {}
+
+            if self.command == "POST":
+                content_length = int(self.headers['Content-Length'])
+                post_data = self.rfile.read(content_length)
+                post_params = post_data# parse_qs(post_data.decode('utf-8'))
+            else:
+                post_params = {}
+
+
+            # Find the server with the lowest number of queue entries.
+            min_queued_server = servers[0]
+            for server in servers:
+                cs = server[1]
+                if cs['queue'].qsize() < min_queued_server[1]['queue'].qsize():
+                    min_queued_server = server
+
+            # Apply the queuing mechanism only for a specific endpoint.
+            if path == '/api/generate':
+                que = min_queued_server[1]['queue']
+                que.put_nowait(1)
+                try:
+                    response = requests.request(self.command, min_queued_server[1]['url'] + path, params=get_params, data=post_params)
+                    self._send_response(response)
+                    self.wfile.write(response.content)
+                finally:
+                    que.get_nowait()
+            else:
+                # For other endpoints, just mirror the request.
+                response = requests.request(self.command, min_queued_server[1]['url'] + path, params=get_params, data=post_params)
+                self._send_response(response)
+
+    class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
+        pass
+
+
+    print('Starting server')
+    server = ThreadedHTTPServer(('', args.port), RequestHandler)  # Set the entry port here.
+    print(f'Running server on port {args.port}')
+    server.serve_forever()
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..3dd9c9c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+configparser
+queues
+requests
+urllib3
+requests
\ No newline at end of file
diff --git a/requirements_dev.txt b/requirements_dev.txt
new file mode 100644
index 0000000..908455f
--- /dev/null
+++ b/requirements_dev.txt
@@ -0,0 +1,6 @@
+configparser==1.5.0
+queues==6.4.0
+requests==2.27.1
+socketserver==3.5.0
+urllib3==1.26.8
+requests
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..f100db1
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,42 @@
+from pathlib import Path
+from typing import Union
+
+import setuptools
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+
+def read_requirements(path: Union[str, Path]):
+    with open(path, "r") as file:
+        return file.read().splitlines()
+
+
+requirements = read_requirements("requirements.txt")
+requirements_dev = read_requirements("requirements_dev.txt")
+
+
+setuptools.setup(
+    name="ollama_proxy_server",
+    version="7.1.0",
+    author="Saifeddine ALOUI (ParisNeo)",
+    author_email="aloui.saifeddine@gmail.com",
+    description="A fastapi server for petals decentralized text generation",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/ParisNeo/ollama_proxy_server",
+    packages=setuptools.find_packages(),  
+    include_package_data=True,
+    install_requires=requirements,
+    entry_points={
+        'console_scripts': [
+            'ollama_proxy_server = ollama_proxy_server.main:main',
+        ],
+    },
+    extras_require={"dev": requirements_dev},
+    classifiers=[
+        "Programming Language :: Python :: 3.11",
+        "License :: OSI Approved :: Apache Software License",
+        "Operating System :: OS Independent",
+    ],
+)