<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="letter"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Dermatol</journal-id><journal-id journal-id-type="publisher-id">derma</journal-id><journal-id journal-id-type="index">29</journal-id><journal-title>JMIR Dermatology</journal-title><abbrev-journal-title>JMIR Dermatol</abbrev-journal-title><issn pub-type="epub">2562-0959</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v8i1e74040</article-id><article-id pub-id-type="doi">10.2196/74040</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Letter</subject></subj-group></article-categories><title-group><article-title>Evaluating Artificial Intelligence Models in Dermatology: Comparative Analysis</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Patel</surname><given-names>Aneri Bhargav</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Driscoll</surname><given-names>William</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lee</surname><given-names>Conan H</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zachary</surname><given-names>Cameron</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Golbari</surname><given-names>Nicole M</given-names></name><degrees>MD, MPH</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Smith</surname><given-names>Janellen</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Dermatology, University of California, Irvine</institution><addr-line>850 Health Sciences Road, 2nd floor</addr-line><addr-line>Irvine</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff2"><institution>School of Medicine, University of California, Davis</institution><addr-line>Sacramento</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff3"><institution>New York Genomics</institution><addr-line>New York</addr-line><addr-line>NY</addr-line><country>United States</country></aff><aff id="aff4"><institution>Department of Dermatology, University of California, Irvine</institution><addr-line>850 Health Sciences Road, 2nd floor</addr-line><addr-line>Irvine</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff5"><institution>Department of Dermatology, NYU Langone Health</institution><addr-line>New York</addr-line><addr-line>NY</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Dellavalle</surname><given-names>Robert</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Luzuriaga</surname><given-names>Arlene Ruiz de</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Tasci</surname><given-names>Burak</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Aneri Bhargav Patel, BS, Department of Dermatology, University of California, Irvine, 850 Health Sciences Road, 2nd floor, Irvine, CA, 92617, United States, 1 9498240606; <email>abppatel@ucdavis.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>4</day><month>12</month><year>2025</year></pub-date><volume>8</volume><elocation-id>e74040</elocation-id><history><date date-type="received"><day>16</day><month>03</month><year>2025</year></date><date date-type="rev-recd"><day>15</day><month>10</month><year>2025</year></date><date date-type="accepted"><day>02</day><month>11</month><year>2025</year></date></history><copyright-statement>&#x00A9; Aneri Bhargav Patel, William Driscoll, Conan H Lee, Cameron Zachary, Nicole M Golbari, Janellen Smith. Originally published in JMIR Dermatology (<ext-link ext-link-type="uri" xlink:href="http://derma.jmir.org">http://derma.jmir.org</ext-link>), 4.12.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Dermatology, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="http://derma.jmir.org">http://derma.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://derma.jmir.org/2025/1/e74040"/><abstract><p>DermGPT demonstrated strong potential for improving answer clarity and conciseness in dermatology-related queries, while ChatGPT provided more robust source citations, enhancing trust in evidence-based responses.</p></abstract><kwd-group><kwd>ChatGPT</kwd><kwd>DermGPT</kwd><kwd>artificial intelligence</kwd><kwd>dermatology</kwd><kwd>education</kwd><kwd>LLM</kwd><kwd>large language model</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Large language models (LLMs) like OpenAI&#x2019;s GPT-4o use transformer architectures with self-attention to process and generate human-like responses. ChatGPT, developed by OpenAI [<xref ref-type="bibr" rid="ref1">1</xref>], enhances a GPT-4 model with reinforcement learning from human feedback, filtering inappropriate content [<xref ref-type="bibr" rid="ref2">2</xref>]. These models predict the next word based on prior context. Trained on vast internet data, they can address diverse topics, including dermatology. However, LLMs may &#x201C;hallucinate,&#x201D; producing plausible but incorrect information [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>], limiting clinical utility.</p><p>DermGPT [<xref ref-type="bibr" rid="ref5">5</xref>], developed under the Palo Alto Medical Foundation, is tailored for dermatology. Beyond drafting notes and authorizations, it answers dermatology questions using a GPT base model enhanced by a research database. By sourcing answers from this material and showing citations, DermGPT aims to reduce hallucinations and better support dermatologists [<xref ref-type="bibr" rid="ref6">6</xref>]. We compared its responses to those of ChatGPT.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>ChatGPT was selected for its popularity and prior evidence of superiority in dermatology-related tasks. A double-blind study found dermatologists preferred ChatGPT over Google&#x2019;s Bard for patient handouts [<xref ref-type="bibr" rid="ref7">7</xref>]. ChatGPT 4o was used. DermGPT&#x2019;s only available model was used.</p><p>Two dermatology residents, CZ and NMG, authored a list of questions posed to each LLM (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Three questions to which DermGPT did not provide a response were excluded as nonevaluable item pairs. The two models&#x2019; answers for a given question were paired and assigned as A or B using a computer-generated randomization list. Any identifiable metadata such as formatting was cleared. The survey was distributed to dermatologists at the University of California, Irvine, and the University of California, Davis, via email and QR codes. Survey takers were informed that both responses were produced by LLMs, but they were blinded to which model produced which response. They were asked to choose their preferred answers based on quality&#x2014;specifically, which answer they thought would be best suited for patient care or was most accurate.</p><p>The rating options were as follows:</p><list list-type="bullet"><list-item><p>Model A better</p></list-item><list-item><p>Model B better</p></list-item><list-item><p>Equal quality</p></list-item><list-item><p>Both inadequate</p></list-item></list><p>Statistical analysis was conducted using SAS OnDemand for Academics (version 9.4). <italic>&#x03C7;</italic><sup>2</sup> tests (<italic>P</italic>&#x003C;.05) assessed significance. Interrater reliability was not prespecified and not assessed; ratings were aggregated at the item level.</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>This study used a voluntary, anonymous survey of physicians and residents. According to institutional and national guidelines, the project did not require institutional review board review because no identifiable information was collected and the study posed minimal risk.</p><p>Participants provided implied consent by completing the survey after being informed of its purpose and their ability to withdraw at any time. No compensation was provided. The survey responses were analyzed in aggregate to ensure anonymity and privacy in accordance with institutional standards. The study followed the ethical principles of the Declaration of Helsinki, adhered to Committee on Publication Ethics guidelines<bold>,</bold> and met all institutional requirements for minimal-risk survey research.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>Of 64 dermatology faculty and 30 residents across the University of California, Irvine, and the University of California, Davis, we received a total of 19 responses, comprising 13 attending physicians and 6 residents or fellows. This corresponds to an overall response rate of approximately 20%.</p></sec><sec id="s3-2"><title>Which LLM&#x2019;s Answer Was Better: ChatGPT or DermGPT?</title><p>Overall, DermGPT&#x2019;s answers (48.1%) were preferred over ChatGPT&#x2019;s (28.4%); the <italic>&#x03C7;</italic><sup>2</sup> test was significant with <italic>P</italic>=.04 (<italic>P</italic>&#x003C;.05). In the attending group, DermGPT&#x2019;s answers were preferred (93/195, 47.7%) over ChatGPT&#x2019;s (56/195, 28.7%). Likewise, in the resident group, DermGPT&#x2019;s answers were preferred (44/90, 48.9%) versus ChatGPT (25/90, 27.8%) (<xref ref-type="table" rid="table1">Table 1</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>User-preferred artificial intelligence answer.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Group/responses</td><td align="left" valign="bottom" colspan="2">ChatGPT</td><td align="left" valign="bottom" colspan="2">DermGPT</td><td align="left" valign="bottom" colspan="2">Other</td><td align="left" valign="bottom" colspan="2">Total answers</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Values, n (%)</td><td align="left" valign="bottom">Percentage of total responses</td><td align="left" valign="bottom">Values, n (%)</td><td align="left" valign="bottom">Percentage of total responses</td><td align="left" valign="bottom">Values, n (%)</td><td align="left" valign="bottom">Percentage of total responses</td><td align="left" valign="bottom">Responses</td><td align="left" valign="bottom">Percentage of total responses</td></tr></thead><tbody><tr><td align="left" valign="top">Attending</td><td align="left" valign="top">56 (28.7)</td><td align="left" valign="top">19.6</td><td align="left" valign="top">93 (47.7)</td><td align="char" char="." valign="top">32</td><td align="left" valign="top">46 (23.6)</td><td align="left" valign="top">16.1</td><td align="left" valign="top">195</td><td align="left" valign="top">68.4</td></tr><tr><td align="left" valign="top">Resident</td><td align="left" valign="top">25 (27.8)</td><td align="left" valign="top">8.8</td><td align="left" valign="top">44 (48.9)</td><td align="char" char="." valign="top">15.4</td><td align="left" valign="top">21 (23.3)</td><td align="left" valign="top">7.4</td><td align="left" valign="top">90</td><td align="left" valign="top">31.6</td></tr><tr><td align="left" valign="top">Total</td><td align="left" valign="top">81</td><td align="left" valign="top">28.4</td><td align="left" valign="top">137</td><td align="char" char="." valign="top">48.1</td><td align="left" valign="top">67</td><td align="left" valign="top">23.5</td><td align="left" valign="top">285</td><td align="left" valign="top">100</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup><italic>&#x03C7;</italic><sup>2</sup> test: <italic>P</italic>=.04.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Which LLM&#x2019;s References Were Better: ChatGPT or DermGPT?</title><p>Overall, ChatGPT references (46%) were preferred over DermGPT (23.5%; <italic>&#x03C7;</italic><sup>2</sup><sub>2</sub>=1.385; <italic>P</italic>=.50). In the attending group, ChatGPT references were also preferred (94/195, 48.2%) over DermGPT (45/195, 23.1%). Likewise, in the resident group, ChatGPT references were preferred (37/90, 41.1%) versus DermGPT (22/90, 24.4%) (<xref ref-type="table" rid="table2">Table 2</xref>).</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Overall preference for references.<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Group</td><td align="left" valign="bottom" colspan="2">ChatGPT</td><td align="left" valign="bottom" colspan="2">DermGPT</td><td align="left" valign="bottom" colspan="2">Other</td><td align="left" valign="bottom" colspan="2">Total answers</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Values, n (%)</td><td align="left" valign="top">Percent of total responses</td><td align="left" valign="top">Values, n (%)</td><td align="left" valign="top">Percent of total responses</td><td align="left" valign="top">Values, n (%)</td><td align="left" valign="top">Percent of total responses</td><td align="left" valign="top">Responses</td><td align="left" valign="top">Percent of total responses</td></tr></thead><tbody><tr><td align="left" valign="top">Attending</td><td align="left" valign="top">94 (48.2)</td><td align="char" char="." valign="top">33</td><td align="left" valign="top">45 (23.1)</td><td align="left" valign="top">15.8</td><td align="left" valign="top">56 (28.7)</td><td align="char" char="." valign="top">19.6</td><td align="left" valign="top">195</td><td align="left" valign="top">68.4</td></tr><tr><td align="left" valign="top">Resident</td><td align="left" valign="top">37 (41.1)</td><td align="char" char="." valign="top">13</td><td align="left" valign="top">22 (24.4)</td><td align="left" valign="top">7.7</td><td align="left" valign="top">31 (34.4)</td><td align="char" char="." valign="top">10.9</td><td align="left" valign="top">90</td><td align="left" valign="top">31.6</td></tr><tr><td align="left" valign="top">Total</td><td align="left" valign="top">131</td><td align="char" char="." valign="top">46</td><td align="left" valign="top">67</td><td align="left" valign="top">23.5</td><td align="left" valign="top">87</td><td align="char" char="." valign="top">30.5</td><td align="left" valign="top">285</td><td align="left" valign="top">100</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup><italic>&#x03C7;</italic><sup>2</sup><sub>2</sub>=1.385; <italic>P</italic>=.50.</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><p>Out of 195 responses, users generally preferred DermGPT&#x2019;s answers, while ChatGPT was favored for its reference citations (<xref ref-type="table" rid="table2">Table 2</xref>). DermGPT&#x2019;s concise and well-phrased responses made it accessible for quick clinical reference. However, 3 questions were excluded because DermGPT issued disclaimers instead of direct answers, recommending consultation with a dermatologist or guidelines. The multimedia appendices show the results tabulated from SAS as well as the questionnaire and responses (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendices 1</xref><xref ref-type="supplementary-material" rid="app3"/><xref ref-type="supplementary-material" rid="app4"/><xref ref-type="supplementary-material" rid="app5"/><xref ref-type="supplementary-material" rid="app6"/>-<xref ref-type="supplementary-material" rid="app7">6</xref>).</p><p>ChatGPT consistently cited reputable references such as the <italic>Journal of the American Academy of Dermatology</italic> and the <italic>Journal of the American Medical Association</italic>, contributing to user trust and perceived academic rigor. Although DermGPT offers clarity, ChatGPT&#x2019;s strong sourcing enhances credibility. These results suggest the potential for a hybrid model that combines both strengths.</p></sec><sec id="s4-2"><title>Limitations</title><p>Our study was constrained by a small rater sample (n=19) and multiple ratings per rater and per question. As a result, <italic>P</italic> values should be interpreted as exploratory rather than confirmatory. The sample may not represent all dermatology clinicians, limiting generalizability. Subgroup patterns were underpowered.</p></sec><sec id="s4-3"><title>Comparison With Prior Work</title><p>Several studies have compared LLMs to each other and to humans. He et al [<xref ref-type="bibr" rid="ref8">8</xref>] found GPT-4 sometimes produced inaccurate, nonindividualized responses to laboratory-related queries. Iannantuono et al [<xref ref-type="bibr" rid="ref9">9</xref>] compared ChatGPT-4, ChatGPT-3.5, and Google Bard in immunooncology, stressing the need for expert verification. Fern&#x00E1;ndez-Pichel et al [<xref ref-type="bibr" rid="ref10">10</xref>] found LLMs answered 80% of health questions accurately, though results were sensitive to prompt phrasing. This is the first study comparing ChatGPT and DermGPT for dermatologic responses.</p></sec><sec id="s4-4"><title>Conclusions and Future Directions</title><p>Future research should include models like Claude and Gemini, expand sample size, and explore combining DermGPT&#x2019;s brevity with ChatGPT&#x2019;s sourcing. These results highlight the importance of balancing clarity and citation in artificial intelligence&#x2013;assisted medical tools.</p></sec></sec></body><back><ack><p>We are grateful to the dermatology residents, attendings, and fellows at the University of California, Irvine, and University of California, Davis, who took the time to take our survey and make this study possible. We used the generative AI tool ChatGPT by OpenAI and DermGPT to generate answers and references for our survey, which we analyzed. The original answers and questions posed have been made available in Multimedia Appendix files.</p></ack><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">LLM</term><def><p>large language model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>ChatGPT</article-title><source>OpenAI</source><year>2024</year><access-date>2025-11-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openai.com/chatgpt">https://openai.com/chatgpt</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Chaudhari</surname><given-names>S</given-names> </name><name name-style="western"><surname>Aggarwal</surname><given-names>P</given-names> </name><name name-style="western"><surname>Murahari</surname><given-names>V</given-names> </name><etal/></person-group><article-title>RLHF deciphered: a critical analysis of reinforcement learning from human feedback for LLMs</article-title><source>ArXiv</source><comment>Preprint posted online on  Apr 12, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2404.08555</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Blank</surname><given-names>IA</given-names> </name></person-group><article-title>What are large language models supposed to model?</article-title><source>Trends Cogn Sci (Regul Ed)</source><year>2023</year><month>11</month><volume>27</volume><issue>11</issue><fpage>987</fpage><lpage>989</lpage><pub-id pub-id-type="doi">10.1016/j.tics.2023.08.006</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Berberette</surname><given-names>E</given-names> </name><name name-style="western"><surname>Hutchins</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sadovnik</surname><given-names>A</given-names> </name></person-group><article-title>Redefining &#x201C;hallucination&#x201D; in LLMs: towards a psychology-informed framework for mitigating misinformation</article-title><source>ArXiv</source><comment>Preprint posted online on  Feb 1, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2402.01769</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="web"><source>DermGPT</source><year>2024</year><access-date>2025-11-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://dermgpt.com">https://dermgpt.com</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Kamnagar</surname><given-names>R</given-names> </name></person-group><article-title>AI-based DermGPT focuses on improved clinic productivity</article-title><source>Pract Dermatol</source><year>2024</year><access-date>2025-11-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://practicaldermatology.com/topics/practice-management/ai-based-dermgpt-focuses-on-improved-clinic-productivity/23990/">https://practicaldermatology.com/topics/practice-management/ai-based-dermgpt-focuses-on-improved-clinic-productivity/23990/</ext-link></comment></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Robinson</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Belzberg</surname><given-names>M</given-names> </name><name name-style="western"><surname>Thakker</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Assessing the accuracy, usefulness, and readability of artificial-intelligence-generated responses to common dermatologic surgery questions for patient education: a double-blinded comparative study of ChatGPT and Google Bard</article-title><source>J Am Acad Dermatol</source><year>2024</year><month>05</month><volume>90</volume><issue>5</issue><fpage>1078</fpage><lpage>1080</lpage><pub-id pub-id-type="doi">10.1016/j.jaad.2024.01.037</pub-id><pub-id pub-id-type="medline">38296195</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>He</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Bhasuran</surname><given-names>B</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>Q</given-names> </name><etal/></person-group><article-title>Quality of answers of generative large language models versus peer users for interpreting laboratory test results for lay patients: evaluation study</article-title><source>Journal of Medical Internet Research</source><comment>Preprint posted online on  Apr 17, 2024</comment><pub-id pub-id-type="doi">10.2196/preprints.56655</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Iannantuono</surname><given-names>GM</given-names> </name><name name-style="western"><surname>Bracken-Clarke</surname><given-names>D</given-names> </name><name name-style="western"><surname>Karzai</surname><given-names>F</given-names> </name><name name-style="western"><surname>Choo-Wosoba</surname><given-names>H</given-names> </name><name name-style="western"><surname>Gulley</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Floudas</surname><given-names>CS</given-names> </name></person-group><article-title>Comparison of large language models in answering immuno-oncology questions: a cross-sectional study</article-title><source>Oncologist</source><year>2024</year><month>05</month><day>3</day><volume>29</volume><issue>5</issue><fpage>407</fpage><lpage>414</lpage><pub-id pub-id-type="doi">10.1093/oncolo/oyae009</pub-id><pub-id pub-id-type="medline">38309720</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fern&#x00E1;ndez-Pichel</surname><given-names>M</given-names> </name><name name-style="western"><surname>Pichel</surname><given-names>JC</given-names> </name><name name-style="western"><surname>Losada</surname><given-names>DE</given-names> </name></person-group><article-title>Evaluating search engines and large language models for answering health questions</article-title><source>NPJ Digit Med</source><year>2025</year><month>03</month><day>10</day><volume>8</volume><issue>1</issue><fpage>153</fpage><pub-id pub-id-type="doi">10.1038/s41746-025-01546-w</pub-id><pub-id pub-id-type="medline">40065094</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Study process.</p><media xlink:href="derma_v8i1e74040_app1.png" xlink:title="PNG File, 48 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Survey questionnaire.</p><media xlink:href="derma_v8i1e74040_app2.docx" xlink:title="DOCX File, 4134 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>SAS results, part 1.</p><media xlink:href="derma_v8i1e74040_app3.docx" xlink:title="DOCX File, 79 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>SAS results, part 2.</p><media xlink:href="derma_v8i1e74040_app4.docx" xlink:title="DOCX File, 15 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>SAS results, part 3.</p><media xlink:href="derma_v8i1e74040_app5.docx" xlink:title="DOCX File, 15 KB"/></supplementary-material><supplementary-material id="app6"><label>Multimedia Appendix 6</label><p>Survey questions and answers.</p><media xlink:href="derma_v8i1e74040_app6.docx" xlink:title="DOCX File, 32 KB"/></supplementary-material><supplementary-material id="app7"><label>Multimedia Appendix 7</label><p>Comments from survey takers.</p><media xlink:href="derma_v8i1e74040_app7.docx" xlink:title="DOCX File, 7 KB"/></supplementary-material></app-group></back></article>