<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="letter"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Dermatol</journal-id><journal-id journal-id-type="publisher-id">derma</journal-id><journal-id journal-id-type="index">29</journal-id><journal-title>JMIR Dermatology</journal-title><abbrev-journal-title>JMIR Dermatol</abbrev-journal-title><issn pub-type="epub">2562-0959</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v8i1e59054</article-id><article-id pub-id-type="doi">10.2196/59054</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Letter</subject></subj-group></article-categories><title-group><article-title>Evaluating the Readability of Pediatric Neurocutaneous Syndromes&#x2013;Related Patient Education Material Created by a Custom GPT With Retrieval Augmentation</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Ede</surname><given-names>Nneka</given-names></name><degrees>MSE, MD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Okereke</surname><given-names>Robyn</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Biomedical Engineering, Cockrell School of Engineering, The University of Texas at Austin</institution><addr-line>1500 Red River Street</addr-line><addr-line>Austin</addr-line><addr-line>TX</addr-line><country>United States</country></aff><aff id="aff2"><institution>Dell Medical School, The University of Texas at Austin</institution><addr-line>Austin</addr-line><addr-line>TX</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Dermatology, Oregon Health and Sciences University</institution><addr-line>Portland</addr-line><addr-line>OR</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Brooks</surname><given-names>Ian</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Chrimes</surname><given-names>Dillon</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Khan</surname><given-names>Hikmat</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Mondal</surname><given-names>Himel</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>El-Hafeez</surname><given-names>Tarek Abd</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Nneka Ede, MSE, MD, Department of Biomedical Engineering, Cockrell School of Engineering, The University of Texas at Austin, 1500 Red River Street, Austin, TX, 78701, United States, +15124955555; <email>nne1st@aol.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>16</day><month>7</month><year>2025</year></pub-date><volume>8</volume><elocation-id>e59054</elocation-id><history><date date-type="received"><day>31</day><month>03</month><year>2024</year></date><date date-type="rev-recd"><day>07</day><month>05</month><year>2025</year></date><date date-type="accepted"><day>28</day><month>05</month><year>2025</year></date></history><copyright-statement>&#x00A9; Nneka Ede, Robyn Okereke. Originally published in JMIR Dermatology (<ext-link ext-link-type="uri" xlink:href="http://derma.jmir.org">http://derma.jmir.org</ext-link>), 16.7.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Dermatology, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="http://derma.jmir.org">http://derma.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://derma.jmir.org/2025/1/e59054"/><abstract><p>In our study, we developed a GPT assistant with a custom knowledge base for neurocutaneous diseases, tested its ability to answer common patient questions, and showed that a GPT using retrieval augmentation generation can improve the readability of patient educational material without being prompted for a specific reading level.</p></abstract><kwd-group><kwd>ChatGPT</kwd><kwd>large language model</kwd><kwd>LLMs</kwd><kwd>natural language processing</kwd><kwd>NLP</kwd><kwd>machine learning</kwd><kwd>artificial intelligence</kwd><kwd>generative AI</kwd><kwd>application programming interface</kwd><kwd>API</kwd><kwd>OpenAI</kwd><kwd>neurocutaneous syndromes</kwd><kwd>cutaneous</kwd><kwd>skin</kwd><kwd>dermatology</kwd><kwd>patient education</kwd><kwd>educational</kwd><kwd>GPT assistant</kwd><kwd>custom GPT</kwd><kwd>readability</kwd><kwd>answer</kwd><kwd>response</kwd><kwd>health education</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Children with rare diseases and their families often face the challenge of understanding information regarding such diseases, and educational material is often written above the American Medical Association&#x2019;s recommended sixth-grade level [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. GPTs can create patient education materials, but their readability often exceeds readers&#x2019; comprehension levels [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. GPT assistants are custom GPTs that can use retrieval augmentation generation (RAG) to access specific knowledge [<xref ref-type="bibr" rid="ref6">6</xref>]. This study aims to evaluate a GPT assistant&#x2019;s ability to provide readable patient information on pediatric neurocutaneous syndromes in comparison to ChatGPT-4.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><p>A GPT assistant was developed by using Python and OpenAI&#x2019;s application program interface (API; <xref ref-type="fig" rid="figure1">Figure 1</xref>). It was not programmed to answer questions at a specific reading level. Clinician and patient educational materials on four neurocutaneous diseases&#x2014;tuberous sclerosis complex, neurofibromatosis type 1, neurofibromatosis type 2, and Sturge-Weber syndrome&#x2014;were integrated into the configuration, with readability ranging from the eighth-grade level to the collegiate level, including sources like UpToDate and Johns Hopkins Medicine.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Flow diagram of the creation of the GPT assistant and how it functions. This figure was created in BioRender [<xref ref-type="bibr" rid="ref7">7</xref>].</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="derma_v8i1e59054_fig01.png"/></fig><p>Five frequently asked patient and caregiver questions surrounding etiology, diagnosis, and management for each of the four diseases were asked to the GPT assistant, with and without a prompt for a response at a sixth-grade reading level (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). This process was repeated in ChatGPT-4. To minimize overoptimization of the models as questions were asked, no data were cached between API requests, and chat history and training were disabled. Readability was assessed by averaging the following eight readability formulas: Automated Readability Index, Flesch Reading Ease Formula, Gunning Fog Index, Flesch-Kincaid Grade Level Formula, Coleman-Liau Index, SMOG (Simple Measure of Gobbledygook) Index, Linsear Write Formula, and FORCAST Readability Formula (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>) [<xref ref-type="bibr" rid="ref8">8</xref>]. Two-tailed <italic>t</italic> tests and an ANOVA were used for comparison. Response accuracy was assessed via the OpenFactCheck Python package [<xref ref-type="bibr" rid="ref9">9</xref>] and then confirmed by the authors (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>).</p></sec><sec id="s3" sec-type="results"><title>Results</title><p>The overall average reading level of information generated without any specific prompting for a reading level was 11.4 (SD 2.04) for the custom GPT assistant and 15.41 (SD 2.0) for ChatGPT-4 (<xref ref-type="table" rid="table1">Table 1</xref>), revealing that the use of a GPT assistant with a knowledge base of patient educational material improved readability by approximately 4 reading levels (<italic>t</italic><sub>35</sub>=&#x2212;6.02; <italic>P</italic>&#x003C;.001). When prompted to answer questions at a sixth-grade reading level, the custom GPT assistant and ChatGPT-4 had average reading levels of 8.8 (SD 0.83) and 9.5 (SD 1.28), respectively, revealing a 0.7 difference in reading level (<italic>t</italic><sub>38</sub>=&#x2212;2.05; <italic>P</italic>=.047). The combined use of a GPT assistant and reading level prompt resulted in the best performance (<italic>F</italic><sub>3,73</sub>=61.74; <italic>P</italic>&#x003C;.001; <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Average of readability scores for responses generated by the custom GPT assistant without a prompt for reading level, by ChatGPT-4 without a prompt for reading level, by the custom GPT assistant with a prompt for a sixth-grade reading level, and by ChatGPT-4 with a prompt for a sixth-grade reading level. The average reading grade level is an average of 8 common readability formulas.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Metrics</td><td align="left" valign="bottom">Custom GPT assistant</td><td align="left" valign="bottom">ChatGPT-4</td><td align="left" valign="bottom">Custom GPT assistant + prompted reading level</td><td align="left" valign="bottom">ChatGPT-4 + prompted reading level</td></tr></thead><tbody><tr><td align="left" valign="top">Average reading grade level, mean (SD)</td><td align="left" valign="top">11.40 (2.04)</td><td align="left" valign="top">15.41 (2.0)</td><td align="left" valign="top">8.80 (0.83)</td><td align="left" valign="top">9.50 (1.28)</td></tr><tr><td align="left" valign="top">Automated Readability Index, mean (SD)</td><td align="left" valign="top">11.68 (2.54)</td><td align="left" valign="top">16.60 (2.45)</td><td align="left" valign="top">9.30 (1.00)</td><td align="left" valign="top">10.04 (1.62)</td></tr><tr><td align="left" valign="top">Flesch Reading Ease, mean (SD)</td><td align="left" valign="top">49.95 (14.84)</td><td align="left" valign="top">23.41 (12.47)</td><td align="left" valign="top">74.65 (5.39)</td><td align="left" valign="top">69.70 (7.34)</td></tr><tr><td align="left" valign="top">Gunning Fog Index, mean (SD)</td><td align="left" valign="top">13.93 (2.51)</td><td align="left" valign="top">18.41 (2.57)</td><td align="left" valign="top">10.23 (1.05)</td><td align="left" valign="top">10.84 (1.72)</td></tr><tr><td align="left" valign="top">Flesch-Kincaid Grade Level, mean (SD)</td><td align="left" valign="top">10.79 (2.32)</td><td align="left" valign="top">15.32 (2.20)</td><td align="left" valign="top">7.56 (0.98)</td><td align="left" valign="top">8.24 (1.45)</td></tr><tr><td align="left" valign="top">Coleman-Liau Index, mean (SD)</td><td align="left" valign="top">11.70 (2.63)</td><td align="left" valign="top">16.07 (2.14)</td><td align="left" valign="top">8.21 (0.94)</td><td align="left" valign="top">9.21 (1.29)</td></tr><tr><td align="left" valign="top">SMOG<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> Index, mean (SD)</td><td align="left" valign="top">10.09 (1.84)</td><td align="left" valign="top">13.37 (1.77)</td><td align="left" valign="top">6.69 (0.92)</td><td align="left" valign="top">7.38 (1.37)</td></tr><tr><td align="left" valign="top">Linsear Write score, mean (SD)</td><td align="left" valign="top">11.88 (2.68)</td><td align="left" valign="top">16.09 (2.73)</td><td align="left" valign="top">10.35 (1.49)</td><td align="left" valign="top">10.83 (2.07)</td></tr><tr><td align="left" valign="top">FORCAST readability</td><td align="left" valign="top">10.85 (1.18)</td><td align="left" valign="top">12.10 (0.74)</td><td align="left" valign="top">8.99 (0.46)</td><td align="left" valign="top">9.28 (0.76)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>SMOG: Simple Measure of Gobbledygook.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>The GPT assistant provided more readable responses about pediatric neurocutaneous diseases than ChatGPT-4 when no reading level was specified and when a reading level was prompted. Using the GPT assistant with a reading level prompt achieved the best results, suggesting that when a GPT assistant accesses educational materials with a variety of reading levels, readability improves. However, specifying a reading level in ChatGPT-4 resulted in better performance than the GPT assistant without a reading level prompt. Furthermore, there is only a small difference in reading level between models when a comprehension level is prompted, indicating that this action enhances readability, though this is not always intuitive for users. GPT assistants provide another avenue for improving readability, with or without a reading level prompt.</p><p>This study also indicated that the caliber of data used when designing a GPT directly influences model results. Poor data quality affects machine learning models&#x2019; performance. In the context of readability, poor quality equates to resources with high reading levels. RAG in a GPT assistant allows access to materials with lower reading levels, thereby improving responses without the need for specific prompts. Recent research has determined that RAG improves patient information accuracy and reduces GPT hallucinations; our results show that it can also improve readability [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. If all documents were at a sixth-grade level, readability may improve further; however, more research is needed to determine this.</p><p>GPT assistants have the potential to give pediatric dermatology patients and their families another modality for learning and asking questions about the conditions they face&#x2014;one that is more understandable than ChatGPT alone. Furthermore, GPT assistants may enable clinicians to fine-tune information produced by a GPT specifically for their patient population. GPT assistants with a knowledge base incorporating easy-to-read material can better aid physicians in providing patient- and caregiver-level information, with or without a specific reading level prompt, when compared to ChatGPT-4 alone. A limitation of this study is the limited number of questions assessed. However, this study provides a foundation for larger-scale future research.</p></sec></body><back><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">API</term><def><p>application program interface</p></def></def-item><def-item><term id="abb2">RAG</term><def><p>retrieval augmentation generation</p></def></def-item><def-item><term id="abb3">SMOG</term><def><p>Simple Measure of Gobbledygook</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>D&#x2019;Alessandro</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Kingsley</surname><given-names>P</given-names> </name><name name-style="western"><surname>Johnson-West</surname><given-names>J</given-names> </name></person-group><article-title>The readability of pediatric patient education materials on the World Wide Web</article-title><source>Arch Pediatr Adolesc Med</source><year>2001</year><month>07</month><volume>155</volume><issue>7</issue><fpage>807</fpage><lpage>812</lpage><pub-id pub-id-type="doi">10.1001/archpedi.155.7.807</pub-id><pub-id pub-id-type="medline">11434848</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>COST Action BM1105</collab><name name-style="western"><surname>Badiu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Bonomi</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Developing and evaluating rare disease educational materials co-created by expert clinicians and patients: the paradigm of congenital hypogonadotropic hypogonadism</article-title><source>Orphanet J Rare Dis</source><year>2017</year><month>03</month><day>20</day><volume>12</volume><issue>1</issue><fpage>57</fpage><pub-id pub-id-type="doi">10.1186/s13023-017-0608-2</pub-id><pub-id pub-id-type="medline">28320476</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ayre</surname><given-names>J</given-names> </name><name name-style="western"><surname>Mac</surname><given-names>O</given-names> </name><name name-style="western"><surname>McCaffery</surname><given-names>K</given-names> </name><etal/></person-group><article-title>New frontiers in health literacy: using ChatGPT to simplify health information for people in the community</article-title><source>J Gen Intern Med</source><year>2024</year><month>03</month><volume>39</volume><issue>4</issue><fpage>573</fpage><lpage>577</lpage><pub-id pub-id-type="doi">10.1007/s11606-023-08469-w</pub-id><pub-id pub-id-type="medline">37940756</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jin</surname><given-names>JQ</given-names> </name><name name-style="western"><surname>Dobry</surname><given-names>AS</given-names> </name></person-group><article-title>ChatGPT for healthcare providers and patients: practical implications within dermatology</article-title><source>J Am Acad Dermatol</source><year>2023</year><month>10</month><volume>89</volume><issue>4</issue><fpage>870</fpage><lpage>871</lpage><pub-id pub-id-type="doi">10.1016/j.jaad.2023.05.081</pub-id><pub-id pub-id-type="medline">37315798</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Golan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ripps</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>Reddy</surname><given-names>R</given-names> </name><etal/></person-group><article-title>ChatGPT&#x2019;s ability to assess quality and readability of online medical information: evidence from a cross-sectional study</article-title><source>Cureus</source><year>2023</year><month>07</month><day>20</day><volume>15</volume><issue>7</issue><fpage>e42214</fpage><pub-id pub-id-type="doi">10.7759/cureus.42214</pub-id><pub-id pub-id-type="medline">37484787</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="web"><article-title>Assistants API overview</article-title><source>OpenAI</source><access-date>2025-05-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://platform.openai.com/docs/assistants/overview?context=with-streaming">https://platform.openai.com/docs/assistants/overview?context=with-streaming</ext-link></comment></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Ede</surname><given-names>N</given-names> </name></person-group><article-title>Figure 1. Flow diagram of the creation of the GPT assistant and how it functions</article-title><source>BioRender</source><year>2025</year><access-date>2025-06-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://app.biorender.com/citation/685d9abbd45f2d774558aed4">https://app.biorender.com/citation/685d9abbd45f2d774558aed4</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="web"><source>Readability formulas</source><access-date>2025-05-05</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://readabilityformulas.com/">https://readabilityformulas.com/</ext-link></comment></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Iqbal</surname><given-names>H</given-names> </name></person-group><article-title>Hasaniqbal777/openfactcheck: v0.3.0</article-title><source>Zenodo</source><year>2024</year><month>08</month><day>22</day><access-date>2025-06-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://zenodo.org/records/13358665">https://zenodo.org/records/13358665</ext-link></comment></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Miao</surname><given-names>J</given-names> </name><name name-style="western"><surname>Thongprayoon</surname><given-names>C</given-names> </name><name name-style="western"><surname>Suppadungsuk</surname><given-names>S</given-names> </name><name name-style="western"><surname>Garcia Valencia</surname><given-names>OA</given-names> </name><name name-style="western"><surname>Cheungpasitporn</surname><given-names>W</given-names> </name></person-group><article-title>Integrating retrieval-augmented generation with large language models in nephrology: advancing practical applications</article-title><source>Medicina (Kaunas)</source><year>2024</year><month>03</month><day>8</day><volume>60</volume><issue>3</issue><fpage>445</fpage><pub-id pub-id-type="doi">10.3390/medicina60030445</pub-id><pub-id pub-id-type="medline">38541171</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ning</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Keppo</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Retrieval-augmented generation for generative artificial intelligence in health care</article-title><source>Npj Health Syst</source><year>2025</year><month>01</month><day>25</day><volume>2</volume><fpage>2</fpage><pub-id pub-id-type="doi">10.1038/s44401-024-00004-1</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Prompts input into the GPT assistant and ChatGPT.</p><media xlink:href="derma_v8i1e59054_app1.docx" xlink:title="DOCX File, 7 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Readability formula definitions.</p><media xlink:href="derma_v8i1e59054_app2.docx" xlink:title="DOCX File, 13 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Supplemental methods and results for response accuracy.</p><media xlink:href="derma_v8i1e59054_app3.docx" xlink:title="DOCX File, 14 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>ANOVA results.</p><media xlink:href="derma_v8i1e59054_app4.docx" xlink:title="DOCX File, 158 KB"/></supplementary-material></app-group></back></article>