Multi PDF Chat in Langchain

Hello,

I am trying to add a few features to the “langchain_pdf_assistant.py” from panel-chat-examples and needed some helps.

  1. Take multi pdf - Done , but welcome suggestion for improvement.
  2. Stream the output - it does stream in vs code terminal output, but not in the chatinterface. Help !
  3. Spinning icon - I checked out this post, but still not able to get it up and running, does it only work in streaming mode ?
  4. The original langchain_pdf_assistant.py, the send button is deactivated after ONE send or question, I’m trying to make it available for multi send/questions, but cant seems to make it works. Several attempts are tried in functions respond().

Thanks !

"""
Demonstrates how to use the `ChatInterface` to chat about a PDF using
OpenAI, [LangChain](https://python.langchain.com/docs/get_started/introduction) and
[Chroma](https://docs.trychroma.com/).
"""

import tempfile
from pathlib import Path

import os
import sys
import asyncio
import panel as pn
import param
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain_community.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma

from langchain_community.vectorstores import FAISS
#from langchain_pinecone import PineconeVectorStore
from langchain_community.llms import CTransformers
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import DocArrayInMemorySearch
from ctransformers import AutoModelForCausalLM
from langchain.prompts import PromptTemplate
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler



#from panel_chat_examples import EnvironmentWidgetBase
FirstRun=0
EXAMPLE_PDF = Path(__file__).parent / "example.pdf"
TTL = 1800  # 30 minutes

#global global_documents
#global_documents = []

pn.extension()

# Define the Retrieval Question/ Answer Chain
# We use caching to speed things up

def update_print_file(pdf):
    if file_input.filename is not None:
        filenames = "\n".join(f"Uploaded PDF: {filename}" for filename in file_input.filename)
        pdf_list_panel.object = (f"\n{filenames}")
        loading_spinner.visible = False
    else:
        pdf_list_panel.object = ("No PDF uploaded.")




#@pn.cache(ttl=TTL)
def _get_retrieval_qa(retriever):
    #callbacks =  pn.chat.langchain.PanelCallbackHandler(chat_interface)
    print("LLM...")
    callbacks =  StreamingStdOutCallbackHandler()
    CONFIG = {"context_length": 4096, "max_new_tokens": 2560, "temperature": 0}
    if state.llm_model_selector == "Llama":
        llm=CTransformers(model="TheBloke/Llama-2-7B-chat-GGUF", model_file="llama-2-7b-chat.Q5_K_M.gguf", config=CONFIG, gpu_layers=50, streaming=True, callbacks=[callbacks]) #, callbacks=callbacks streaming=True,

    if state.llm_model_selector == "Mistral":
        llm=CTransformers(model="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf", config=CONFIG, gpu_layers=50, streaming=True, callbacks=[callbacks])
    
    #if state.llm_model_selector == "Dolphin":
    #    llm=CTransformers(model="TheBloke/dolphin-2.6-mistral-7B-GGUF", model_file="dolphin-2.6-mistral-7b.Q4_K_M.gguf", config=CONFIG, gpu_layers=50, streaming=True, callbacks=callbacks)

    SYSTEM_PROMPT = """
    You are a friendly chat bot who's willing to help answer the user:
    """
    B_INST, E_INST = "[INST]", "[/INST]"
    B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

    SYSTEM_PROMPT = B_SYS + SYSTEM_PROMPT + E_SYS

    instruction = """
    {context}

    Question: {question}
    """

    template_ = B_INST + SYSTEM_PROMPT + instruction + E_INST

    template_

    prompt = PromptTemplate(template=template_, input_variables=["context", "question"])

    
    qa = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type=state.chain_type,
        retriever=retriever,
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt},
        verbose=True)
    
    global global_qa
    global_qa = qa

    global FirstRun
    FirstRun=1
    
    print("state.param.pdf_get_retrieval_qa:", state.param.pdf)
    print("state.pdf_get_retrieval_qa:", state.pdf)
    
    return qa 
    

#@pn.cache(ttl=TTL)
def _get_retriever(docsearch):
    retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": state.number_of_chunks})
    print("Retrieving...")
    return _get_retrieval_qa(retriever)


#@pn.cache(ttl=TTL)
def _get_vector_db(global_documents):

    # split the documents into chunks
    #text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=350,chunk_overlap=100)
    documents = text_splitter.split_documents(global_documents)

    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    # create the vectorestore to use as the index
    docsearch = FAISS.from_documents(documents, embeddings)
    print("Embedding...")
    return _get_retriever (docsearch)


#@pn.cache(ttl=TTL)
def _get_texts(pdf):
    print("Loading & Chunking...")
    data = file_input.value
    state.pdf_list = file_input.value
    loading_spinner.visible = True
    print("file_input.value:",file_input.value) 
    print("len file_input.value:",len(file_input.value))
    print("file_input.filename:",file_input.filename) 
    #print("self.pdfs:", self.pdf) 
    #print("finput:",finput) 
    if data is not None:
        documents = []
        with tempfile.TemporaryDirectory(delete=False) as tmpdirname:  
            print('created temporary directory', tmpdirname)  
            for pdf_data in data:
                    with tempfile.NamedTemporaryFile("wb", delete=False, suffix='.pdf', dir=tmpdirname) as f:
                        f.write(pdf_data)
                        file_name = f.name
                        f.close()
                        print("file_name:",file_name) 
        loader = PyPDFDirectoryLoader(tmpdirname)
        documents = loader.load()
        
        global global_documents
        global_documents = documents

        print("global pdf:",global_documents) 
        print("tmpdirname.name:",tmpdirname)
        print("len pdf:",len(documents)) 
        print("state.param.pdf:", state.param.pdf)
        print("state.pdf:", state.pdf)
        print("state.pdf_list:", state.pdf_list)
    
    chat_interface.active_widget.placeholder = "Ask questions here!" 
    chat_interface.disabled = False
    return  update_print_file(pdf)   , chat_interface.send({"user": state.llm_model_selector, "object": "Let's chat about the PDF!"}, respond=False)   #_get_vector_db(documents) , 



def _get_response(contents):
    print("global pdf__get_response:",len(global_documents))
    print("FirstRun:", FirstRun)
    if FirstRun == 1 and contents is not None:
        qa = global_qa
    elif contents is not None:
        qa =  _get_vector_db(global_documents)
        

    response = qa({"query": contents})
    chunks = []

    for chunk in response["source_documents"][::-1]:
        name = f"Chunk {chunk.metadata['page']}"
        content = chunk.page_content
        chunks.insert(0, (name, content))
    return response, chunks


class State(param.Parameterized):
    pdf: bytes = param.Bytes()
    pdf_list: list = param.List()
    number_of_chunks: int = param.Integer(default=2, bounds=(1, 10), step=1)
    chain_type: str = param.Selector(
        objects=["stuff", "map_reduce", "refine", "map_rerank"]
    )
    llm_model_selector: str = param.Selector(
        objects=["Llama", "Mistral"] 
    )


state = State()


@pn.depends(state.param.pdf_list, watch=True)
def _enable_chat_interface(pdf_list):
    if state.pdf_list:
        chat_interface.disabled = False
    else:
        chat_interface.disabled = True


def _get_validation_message():
    pdf = state.pdf_list
    if not pdf:
        chat_interface.disabled = True
        return "Please first upload a PDF!"
    return ""


def _send_not_ready_message(chat_interface) -> bool:
    message = _get_validation_message()

    if message:
        chat_interface.send({"user": "System", "object": message}, respond=False)
    return bool(message)

# Define the widgets
#file_input = pn.widgets.FileInput.from_param(state.param.pdf, accept=".pdf", height=50, name="Upload PDF", multiple=True)
file_input = pn.widgets.FileInput(accept=".pdf", multiple=True)
file_input.param.watch(_get_texts, 'value')
file_input.from_param(state.param.pdf)
text_input = pn.widgets.TextAreaInput(placeholder="First, upload a PDF!") #, auto_grow=True, max_rows=2
chain_type_input = pn.widgets.RadioButtonGroup.from_param(
    state.param.chain_type,
    orientation="horizontal",
    sizing_mode="stretch_width",
    button_type="primary",
    button_style="outline",
)
llm_model_input = pn.widgets.RadioButtonGroup.from_param(
    state.param.llm_model_selector,
    orientation="horizontal",
    sizing_mode="stretch_width",
    button_type="primary",
    button_style="outline",
)
pdf_list_panel = pn.panel("No PDFs uploaded yet.")
loading_spinner = pn.indicators.LoadingSpinner(value=True ,visible=False, size=20, name='Loading...')






async def respond(contents, user, instance: pn.chat.ChatInterface):
    if _send_not_ready_message(chat_interface):
        return
    if chat_interface.active == 0:
        chat_interface.active = 1
        chat_interface.active_widget.placeholder = "Ask questions here!"
        print("state.param.pdf_respond:", state.param.pdf)
        print("state.pdf_respond:", state.pdf)
        yield {"user": state.llm_model_selector, "object": "Let's chat about the PDF!"}
        return
    if contents is not None:
        print("contents=", contents)
        response, documents =  _get_response(contents)
        chat_interface.disabled = False 
        chat_interface.active = 1
        ##==== not streaming ========
        #pages_layout = pn.Accordion(*documents, sizing_mode="stretch_width", max_width=800)        
        #answers = pn.Column(response["result"], pages_layout)
        #yield {"user": state.llm_model_selector, "object": answers}   
        #chat_interface.send({"user": state.llm_model_selector, "object": answers})

        ##===== streaming answer =======
        message = None
        for chunk in response["result"]:
                message = instance.stream(chunk, user=state.llm_model_selector, message=message)
    
        return


chat_interface = pn.chat.ChatInterface(
    callback=respond,
    callback_exception="verbose",
    sizing_mode="stretch_width",
    widgets=[text_input],
    disabled=False,
    height=500, 
    placeholder_threshold=0.1, 
    show_button_name=True,
    show_rerun=False,
    show_undo=False,
)

_send_not_ready_message(chat_interface)
 
print("state.param.pdf:", state.param.pdf)
print("state.pdf:", state.pdf)
## Wrap the app in a nice template

tab1 = pn.Column(
    #pn.Row(inp),
    #pn.layout.Divider(),
    pn.panel(chat_interface,  loading_indicator=True, height=500),
    #pn.layout.Divider(),
)
tab2= pn.Column(
    #pn.panel(cb.get_lquest),
    pn.layout.Divider(),
    #pn.panel(cb.get_sources ),
)

dashboard = pn.template.BootstrapTemplate(
    title='Private GPT',
    sidebar=[
        #environ,
        state.param.number_of_chunks,
        "LLM Model:",
        llm_model_input,
        "Chain Type:",
        chain_type_input,
        "PDF Upload:",
        file_input,
        loading_spinner,
        pdf_list_panel,
    ],
    #main=[chat_interface],
    main=[
        pn.Tabs(('Conversation', tab1), ('Database', tab2),)
        #chat_interface
    ],
)   
dashboard.servable()

Hi!

For 3, do you have the latest panel?

Can you clarify why you need to make it available for multi send?

Hi Andrew,

For 3, just to understand further.
In this post, there is .sleep(), how can i implement this under the repond() function?[quote=“ahuang11, post:9, topic:6582”]
await asyncio.sleep(3)
[/quote]

Multi send to keep the question and answering going without having to re-upload the pdf.

Thanks.

Sorry I don’t quite understand the above; which post?

Multi send to keep the question and answering going without having to re-upload the pdf.

I don’t think you need to re-upload the PDF; it should persist in memory.