<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="letter" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JDERM</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Dermatol</journal-id>
      <journal-title>JMIR Dermatology</journal-title>
      <issn pub-type="epub">2562-0959</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v6i1e49889</article-id>
      <article-id pub-id-type="pmid">38096013</article-id>
      <article-id pub-id-type="doi">10.2196/49889</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Research Letter</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Research Letter</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>The Accuracy and Appropriateness of ChatGPT Responses on Nonmelanoma Skin Cancer Information Using Zero-Shot Chain of Thought Prompting</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Solomon</surname>
            <given-names>James</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Brooks</surname>
            <given-names>Ian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hidki</surname>
            <given-names>Asmaa</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kanike</surname>
            <given-names>Uday</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chrimes</surname>
            <given-names>Dillon</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>O'Hagan</surname>
            <given-names>Ross</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2310-756X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Poplausky</surname>
            <given-names>Dina</given-names>
          </name>
          <degrees>BA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5037-1630</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Young</surname>
            <given-names>Jade N</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0887-3319</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Gulati</surname>
            <given-names>Nicholas</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4347-0710</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Levoska</surname>
            <given-names>Melissa</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2848-759X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Ungar</surname>
            <given-names>Benjamin</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0882-8163</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Ungar</surname>
            <given-names>Jonathan</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Dermatology</institution>
            <institution>Icahn School of Medicine at Mount Sinai</institution>
            <addr-line>5th Floor</addr-line>
            <addr-line>5 East 98th Street</addr-line>
            <addr-line>New York, NY, 10029</addr-line>
            <country>United States</country>
            <phone>1 212 241 3288</phone>
            <email>jonathan.ungar@mountsinai.org</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6885-6890</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Dermatology</institution>
        <institution>Icahn School of Medicine at Mount Sinai</institution>
        <addr-line>New York, NY</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Jonathan Ungar <email>jonathan.ungar@mountsinai.org</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>14</day>
        <month>12</month>
        <year>2023</year>
      </pub-date>
      <volume>6</volume>
      <elocation-id>e49889</elocation-id>
      <history>
        <date date-type="received">
          <day>12</day>
          <month>6</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>21</day>
          <month>9</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>2</day>
          <month>10</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>3</day>
          <month>12</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Ross O'Hagan, Dina Poplausky, Jade N Young, Nicholas Gulati, Melissa Levoska, Benjamin Ungar, Jonathan Ungar. Originally published in JMIR Dermatology (http://derma.jmir.org), 14.12.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Dermatology, is properly cited. The complete bibliographic information, a link to the original publication on http://derma.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://derma.jmir.org/2023/1/e49889" xlink:type="simple"/>
      <kwd-group>
        <kwd>ChatGPT</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>large language models</kwd>
        <kwd>nonmelanoma skin</kwd>
        <kwd>skin cancer</kwd>
        <kwd>cell carcinoma</kwd>
        <kwd>chatbot</kwd>
        <kwd>dermatology</kwd>
        <kwd>dermatologist</kwd>
        <kwd>epidermis</kwd>
        <kwd>dermis</kwd>
        <kwd>oncology</kwd>
        <kwd>cancer</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Nonmelanoma skin cancer (NMSC) represents the most prevalent form of cancer worldwide [<xref ref-type="bibr" rid="ref1">1</xref>]. Patients with NMSC seek information from various resources. Work has already shown that language learning models (LLMs) such as ChatGPT can generate medical information in response to questions [<xref ref-type="bibr" rid="ref2">2</xref>]; however, results vary significantly based on the prompts entered. Previous work has shown that a few-shot approach, where one provides several example prompts and outputs, has good results [<xref ref-type="bibr" rid="ref3">3</xref>], as does the few-shot chain of thought approach, where answers include examples and the reasoning for correct answers, encouraging the model to reason through the question [<xref ref-type="bibr" rid="ref4">4</xref>]. Zero-shot chain of thought (ZS-COT) prompting does not provide example prompts; instead, it uses phrases to encourage the LLMs to “think” through their responses, with significant improvement in accuracy in some contexts [<xref ref-type="bibr" rid="ref5">5</xref>]. In this study, we explore ChatGPT’s performance in answering questions about NMSC using both standard and ZS-COT prompting.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>We generated 25 common clinical questions about NMSC in four categories: general, diagnosis, management, and risk factors. Prompts were entered into ChatGPT 4.0 on March 31, 2023, and responses were recorded for both standard and ZS-COT prompting (<xref rid="figure1" ref-type="fig">Figure 1</xref>A). Ending ZS-COT prompting queries with “Let’s think step by step” has been shown to improve performance in previous papers [<xref ref-type="bibr" rid="ref5">5</xref>]. Three attending dermatologists independently reviewed and graded whether the outputs would be appropriate for a patient-facing website and an electronic health record (EHR) message draft to a patient. Responses were also evaluated for accuracy on a 5-point scale, with 1 being completely inaccurate and 5 being completely accurate, and reviewers assessed which of the two prompting styles they preferred. Statistical differences between prompts were computed using the Wilcoxon test. Statistical analysis was performed in R version 4.2.2 (R Foundation for Statistical Computing).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>(A) Example of several popular language learning model prompting techniques. (B) Percent of appropriate responses for each question category by medium. (C) Accuracy scores by prompt style. COT: chain of thought; EHR: electronic health record; NMSC: nonmelanoma skin cancer; RF: risk factor.</p>
          </caption>
          <graphic xlink:href="derma_v6i1e49889_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study did not require institutional review board approval.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>Averaging all accuracy scores from a scale (range 1-5), we found that the combined accuracy for both the original prompt and ZS-COT prompt was 4.89. The average accuracy score from all 25 questions asked for the original prompt and ZS-COT prompt was 4.92 and 4.87, respectively, representing a nonsignificant difference of 1.03%. Both models were deemed 100% appropriate for a patient-facing information portal for general, diagnosis, management, and risk factor questions. For EHR message responses, outputs were appropriate for 97% of general questions, 92% of diagnosis questions, 85% of management questions, and 100% of risk factor questions (<xref rid="figure1" ref-type="fig">Figure 1</xref>B). The lowest accuracy grade for the standard prompting responses and ZS-COT prompting was 4 and 2, respectively (<xref rid="figure1" ref-type="fig">Figure 1</xref>C). This score was given for the prompt “What causes basal cell carcinoma?” (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>This exploratory qualitative study found that LLMs can provide accurate patient information regarding NMSC appropriate for both general websites and EHR messages. We found that ZS-COT prompting does not provide more accurate dermatology information. The limitations of this study include that we only explored a subset of clinical questions patients may have about NMSC, there is no objective standard for appropriateness, and the personal biases of the dermatologists may bias response preference. As LLMs continue to grow and be adapted, clinicians must monitor their clinical utility and how different prompting methods may change the quality of results.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Evaluated nonmelanoma skin cancer questions.</p>
        <media xlink:href="derma_v6i1e49889_app1.docx" xlink:title="DOCX File , 18 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">LLM</term>
          <def>
            <p>language learning model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">NMSC</term>
          <def>
            <p>nonmelanoma skin cancer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">ZS-COT</term>
          <def>
            <p>zero-shot chain of thought</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="conflict">
        <p>BU is an employee of Mount Sinai and has received research funds (grants paid to the institution) from Incyte, Rapt Therapeutics, and Pfizer. He is also a consultant for Arcutis Biotherapeutics, Castle Biosciences, Fresenius Kabi, Pfizer, and Sanofi. JU is an employee of Mount Sinai and is a consultant for AbbVie, Castle Biosciences, Dermavant, Janssen, Menlo Therapeutics, Mitsubishi Tanabe Pharma America, and UCB. The rest of the authors declare no relevant conflicts of interest.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dubas</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Ingraffea</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Nonmelanoma skin cancer</article-title>
          <source>Facial Plast Surg Clin North Am</source>
          <year>2013</year>
          <month>02</month>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>43</fpage>
          <lpage>53</lpage>
          <pub-id pub-id-type="doi">10.1016/j.fsc.2012.10.003</pub-id>
          <pub-id pub-id-type="medline">23369588</pub-id>
          <pub-id pub-id-type="pii">S1064-7406(12)00144-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarraju</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bruemmer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Van Iterson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Laffin</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Appropriateness of cardiovascular disease prevention recommendations obtained from a popular online chat-based artificial intelligence model</article-title>
          <source>JAMA</source>
          <year>2023</year>
          <month>03</month>
          <day>14</day>
          <volume>329</volume>
          <issue>10</issue>
          <fpage>842</fpage>
          <lpage>844</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36735264"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2023.1044</pub-id>
          <pub-id pub-id-type="medline">36735264</pub-id>
          <pub-id pub-id-type="pii">2801244</pub-id>
          <pub-id pub-id-type="pmcid">PMC10015303</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ryder</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Subbiah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kaplan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dhariwal</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Neelakantan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shyam</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sastry</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Askell</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Herbert-Voss</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Krueger</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Henighan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Child</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ramesh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ziegler</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Winter</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hesse</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sigler</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Litwin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chess</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Berner</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McCandlish</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Amodei</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Language models are few-shot learners</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on May 28, 2020. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/2005.14165.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Schuurmans</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bosma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ichter</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Chain-of-thought prompting elicits reasoning in large language models</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on January 28, 2022. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/2201.11903.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kojima</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Reid</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Matsuo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Iwasawa</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Large language models are zero-shot reasoners</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on May 24, 2022. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/2205.11916.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
