@Article{info:doi/10.2196/62774, author="Lei, Changbin and Jiang, Yan and Xu, Ke and Liu, Shanshan and Cao, Hua and Wang, Cong", title="Convolutional Neural Network Models for Visual Classification of Pressure Ulcer Stages: Cross-Sectional Study", journal="JMIR Med Inform", year="2025", month="Mar", day="25", volume="13", pages="e62774", keywords="pressure ulcer", keywords="deep learning", keywords="artificial intelligence", keywords="neural network", keywords="CNN", keywords="machine learning", keywords="image", keywords="imaging", keywords="classification", keywords="ulcer", keywords="sore", keywords="pressure", keywords="wound", keywords="skin", abstract="Background: Pressure injuries (PIs) pose a negative health impact and a substantial economic burden on patients and society. Accurate staging is crucial for treating PIs. Owing to the diversity in the clinical manifestations of PIs and the lack of objective biochemical and pathological examinations, accurate staging of PIs is a major challenge. The deep learning algorithm, which uses convolutional neural networks (CNNs), has demonstrated exceptional classification performance in the intricate domain of skin diseases and wounds and has the potential to improve the staging accuracy of PIs. Objective: We explored the potential of applying AlexNet, VGGNet16, ResNet18, and DenseNet121 to PI staging, aiming to provide an effective tool to assist in staging. Methods: PI images from patients---including those with stage I, stage II, stage III, stage IV, unstageable, and suspected deep tissue injury (SDTI)---were collected at a tertiary hospital in China. Additionally, we augmented the PI data by cropping and flipping the PI images 9 times. The collected images were then divided into training, validation, and test sets at a ratio of 8:1:1. We subsequently trained them via AlexNet, VGGNet16, ResNet18, and DenseNet121 to develop staging models. Results: We collected 853 raw PI images with the following distributions across stages: stage I (n=148), stage II (n=121), stage III (n=216), stage IV (n=110), unstageable (n=128), and SDTI (n=130). A total of 7677 images were obtained after data augmentation. Among all the CNN models, DenseNet121 demonstrated the highest overall accuracy of 93.71\%. The classification performances of AlexNet, VGGNet16, and ResNet18 exhibited overall accuracies of 87.74\%, 82.42\%, and 92.42\%, respectively. Conclusions: The CNN-based models demonstrated strong classification ability for PI images, which might promote highly efficient, intelligent PI staging methods. In the future, the models can be compared with nurses with different levels of experience to further verify the clinical application effect. ", doi="10.2196/62774", url="https://medinform.jmir.org/2025/1/e62774" } @Article{info:doi/10.2196/60653, author="Jones, Tudor Owain and Calanzani, Natalia and Scott, E. Suzanne and Matin, N. Rubeta and Emery, Jon and Walter, M. Fiona", title="User and Developer Views on Using AI Technologies to Facilitate the Early Detection of Skin Cancers in Primary Care Settings: Qualitative Semistructured Interview Study", journal="JMIR Cancer", year="2025", month="Jan", day="28", volume="11", pages="e60653", keywords="artificial intelligence", keywords="AI", keywords="machine learning", keywords="ML", keywords="primary care", keywords="skin cancer", keywords="melanoma", keywords="qualitative research", keywords="mobile phone", abstract="Background: Skin cancers, including melanoma and keratinocyte cancers, are among the most common cancers worldwide, and their incidence is rising in most populations. Earlier detection of skin cancer leads to better outcomes for patients. Artificial intelligence (AI) technologies have been applied to skin cancer diagnosis, but many technologies lack clinical evidence and/or the appropriate regulatory approvals. There are few qualitative studies examining the views of relevant stakeholders or evidence about the implementation and positioning of AI technologies in the skin cancer diagnostic pathway. Objective: This study aimed to understand the views of several stakeholder groups on the use of AI technologies to facilitate the early diagnosis of skin cancer, including patients, members of the public, general practitioners, primary care nurse practitioners, dermatologists, and AI researchers. Methods: This was a qualitative, semistructured interview study with 29 stakeholders. Participants were purposively sampled based on age, sex, and geographical location. We conducted the interviews via Zoom between September 2022 and May 2023. Transcribed recordings were analyzed using thematic framework analysis. The framework for the Nonadoption, Abandonment, and Challenges to Scale-Up, Spread, and Sustainability was used to guide the analysis to help understand the complexity of implementing diagnostic technologies in clinical settings. Results: Major themes were ``the position of AI in the skin cancer diagnostic pathway'' and ``the aim of the AI technology''; cross-cutting themes included trust, usability and acceptability, generalizability, evaluation and regulation, implementation, and long-term use. There was no clear consensus on where AI should be placed along the skin cancer diagnostic pathway, but most participants saw the technology in the hands of either patients or primary care practitioners. Participants were concerned about the quality of the data used to develop and test AI technologies and the impact this could have on their accuracy in clinical use with patients from a range of demographics and the risk of missing skin cancers. Ease of use and not increasing the workload of already strained health care services were important considerations for participants. Health care professionals and AI researchers reported a lack of established methods of evaluating and regulating AI technologies. Conclusions: This study is one of the first to examine the views of a wide range of stakeholders on the use of AI technologies to facilitate early diagnosis of skin cancer. The optimal approach and position in the diagnostic pathway for these technologies have not yet been determined. AI technologies need to be developed and implemented carefully and thoughtfully, with attention paid to the quality and representativeness of the data used for development, to achieve their potential. ", doi="10.2196/60653", url="https://cancer.jmir.org/2025/1/e60653" } @Article{info:doi/10.2196/59839, author="Parekh, Pranav and Oyeleke, Richard and Vishwanath, Tejas", title="The Depth Estimation and Visualization of Dermatological Lesions: Development and Usability Study", journal="JMIR Dermatol", year="2024", month="Dec", day="18", volume="7", pages="e59839", keywords="machine learning", keywords="ML", keywords="computer vision", keywords="neural networks", keywords="explainable AI", keywords="XAI", keywords="computer graphics", keywords="red spot analysis", keywords="mixed reality", keywords="MR", keywords="artificial intelligence", keywords="visualization", abstract="Background: Thus far, considerable research has been focused on classifying a lesion as benign or malignant. However, there is a requirement for quick depth estimation of a lesion for the accurate clinical staging of the lesion. The lesion could be malignant and quickly grow beneath the skin. While biopsy slides provide clear information on lesion depth, it is an emerging domain to find quick and noninvasive methods to estimate depth, particularly based on 2D images. Objective: This study proposes a novel methodology for the depth estimation and visualization of skin lesions. Current diagnostic methods are approximate in determining how much a lesion may have proliferated within the skin. Using color gradients and depth maps, this method will give us a definite estimate and visualization procedure for lesions and other skin issues. We aim to generate 3D holograms of the lesion depth such that dermatologists can better diagnose melanoma. Methods: We started by performing classification using a convolutional neural network (CNN), followed by using explainable artificial intelligence to localize the image features responsible for the CNN output. We used the gradient class activation map approach to perform localization of the lesion from the rest of the image. We applied computer graphics for depth estimation and developing the 3D structure of the lesion. We used the depth from defocus method for depth estimation from single images and Gabor filters for volumetric representation of the depth map. Our novel method, called red spot analysis, measures the degree of infection based on how a conical hologram is constructed. We collaborated with a dermatologist to analyze the 3D hologram output and received feedback on how this method can be introduced to clinical implementation. Results: The neural model plus the explainable artificial intelligence algorithm achieved an accuracy of 86\% in classifying the lesions correctly as benign or malignant. For the entire pipeline, we mapped the benign and malignant cases to their conical representations. We received exceedingly positive feedback while pitching this idea at the King Edward Memorial Institute in India. Dermatologists considered this a potentially useful tool in the depth estimation of lesions. We received a number of ideas for evaluating the technique before it can be introduced to the clinical scene. Conclusions: When we map the CNN outputs (benign or malignant) to the corresponding hologram, we observe that a malignant lesion has a higher concentration of red spots (infection) in the upper and deeper portions of the skin, and that the malignant cases have deeper conical sections when compared with the benign cases. This proves that the qualitative results map with the initial classification performed by the neural model. The positive feedback provided by the dermatologist suggests that the qualitative conclusion of the method is sufficient. ", doi="10.2196/59839", url="https://derma.jmir.org/2024/1/e59839" } @Article{info:doi/10.2196/50451, author="Barlow, Richard and Bewley, Anthony and Gkini, Angeliki Maria", title="AI in Psoriatic Disease: Scoping Review", journal="JMIR Dermatol", year="2024", month="Oct", day="16", volume="7", pages="e50451", keywords="artificial intelligence", keywords="machine learning", keywords="psoriasis", keywords="psoriatic arthritis", keywords="psoriatic disease", keywords="biologics", keywords="prognostic models", keywords="mobile phone", abstract="Background: Artificial intelligence (AI) has many applications in numerous medical fields, including dermatology. Although the majority of AI studies in dermatology focus on skin cancer, there is growing interest in the applicability of AI models in inflammatory diseases, such as psoriasis. Psoriatic disease is a chronic, inflammatory, immune-mediated systemic condition with multiple comorbidities and a significant impact on patients' quality of life. Advanced treatments, including biologics and small molecules, have transformed the management of psoriatic disease. Nevertheless, there are still considerable unmet needs. Globally, delays in the diagnosis of the disease and its severity are common due to poor access to health care systems. Moreover, despite the abundance of treatments, we are unable to predict which is the right medication for the right patient, especially in resource-limited settings. AI could be an additional tool to address those needs. In this way, we can improve rates of diagnosis, accurately assess severity, and predict outcomes of treatment. Objective: This study aims to provide an up-to-date literature review on the use of AI in psoriatic disease, including diagnostics and clinical management as well as addressing the limitations in applicability. Methods: We searched the databases MEDLINE, PubMed, and Embase using the keywords ``AI AND psoriasis OR psoriatic arthritis OR psoriatic disease,'' ``machine learning AND psoriasis OR psoriatic arthritis OR psoriatic disease,'' and ``prognostic model AND psoriasis OR psoriatic arthritis OR psoriatic disease'' until June 1, 2023. Reference lists of relevant papers were also cross-examined for other papers not detected in the initial search. Results: Our literature search yielded 38 relevant papers. AI has been identified as a key component in digital health technologies. Within this field, there is the potential to apply specific techniques such as machine learning and deep learning to address several aspects of managing psoriatic disease. This includes diagnosis, particularly useful for remote teledermatology via photographs taken by patients as well as monitoring and estimating severity. Similarly, AI can be used to synthesize the vast data sets already in place through patient registries which can help identify appropriate biologic treatments for future cohorts and those individuals most likely to develop complications. Conclusions: There are multiple advantageous uses for AI and digital health technologies in psoriatic disease. With wider implementation of AI, we need to be mindful of potential limitations, such as validation and standardization or generalizability of results in specific populations, such as patients with darker skin phototypes. ", doi="10.2196/50451", url="https://derma.jmir.org/2024/1/e50451" } @Article{info:doi/10.2196/59273, author="Liu, Xu and Duan, Chaoli and Kim, Min-kyu and Zhang, Lu and Jee, Eunjin and Maharjan, Beenu and Huang, Yuwei and Du, Dan and Jiang, Xian", title="Claude 3 Opus and ChatGPT With GPT-4 in Dermoscopic Image Analysis for Melanoma Diagnosis: Comparative Performance Analysis", journal="JMIR Med Inform", year="2024", month="Aug", day="6", volume="12", pages="e59273", keywords="artificial intelligence", keywords="AI", keywords="large language model", keywords="LLM", keywords="Claude", keywords="ChatGPT", keywords="dermatologist", abstract="Background: Recent advancements in artificial intelligence (AI) and large language models (LLMs) have shown potential in medical fields, including dermatology. With the introduction of image analysis capabilities in LLMs, their application in dermatological diagnostics has garnered significant interest. These capabilities are enabled by the integration of computer vision techniques into the underlying architecture of LLMs. Objective: This study aimed to compare the diagnostic performance of Claude 3 Opus and ChatGPT with GPT-4 in analyzing dermoscopic images for melanoma detection, providing insights into their strengths and limitations. Methods: We randomly selected 100 histopathology-confirmed dermoscopic images (50 malignant, 50 benign) from the International Skin Imaging Collaboration (ISIC) archive using a computer-generated randomization process. The ISIC archive was chosen due to its comprehensive and well-annotated collection of dermoscopic images, ensuring a diverse and representative sample. Images were included if they were dermoscopic images of melanocytic lesions with histopathologically confirmed diagnoses. Each model was given the same prompt, instructing it to provide the top 3 differential diagnoses for each image, ranked by likelihood. Primary diagnosis accuracy, accuracy of the top 3 differential diagnoses, and malignancy discrimination ability were assessed. The McNemar test was chosen to compare the diagnostic performance of the 2 models, as it is suitable for analyzing paired nominal data. Results: In the primary diagnosis, Claude 3 Opus achieved 54.9\% sensitivity (95\% CI 44.08\%-65.37\%), 57.14\% specificity (95\% CI 46.31\%-67.46\%), and 56\% accuracy (95\% CI 46.22\%-65.42\%), while ChatGPT demonstrated 56.86\% sensitivity (95\% CI 45.99\%-67.21\%), 38.78\% specificity (95\% CI 28.77\%-49.59\%), and 48\% accuracy (95\% CI 38.37\%-57.75\%). The McNemar test showed no significant difference between the 2 models (P=.17). For the top 3 differential diagnoses, Claude 3 Opus and ChatGPT included the correct diagnosis in 76\% (95\% CI 66.33\%-83.77\%) and 78\% (95\% CI 68.46\%-85.45\%) of cases, respectively. The McNemar test showed no significant difference (P=.56). In malignancy discrimination, Claude 3 Opus outperformed ChatGPT with 47.06\% sensitivity, 81.63\% specificity, and 64\% accuracy, compared to 45.1\%, 42.86\%, and 44\%, respectively. The McNemar test showed a significant difference (P<.001). Claude 3 Opus had an odds ratio of 3.951 (95\% CI 1.685-9.263) in discriminating malignancy, while ChatGPT-4 had an odds ratio of 0.616 (95\% CI 0.297-1.278). Conclusions: Our study highlights the potential of LLMs in assisting dermatologists but also reveals their limitations. Both models made errors in diagnosing melanoma and benign lesions. These findings underscore the need for developing robust, transparent, and clinically validated AI models through collaborative efforts between AI researchers, dermatologists, and other health care professionals. While AI can provide valuable insights, it cannot yet replace the expertise of trained clinicians. ", doi="10.2196/59273", url="https://medinform.jmir.org/2024/1/e59273" } @Article{info:doi/10.2196/44913, author="Haugsten, Rygvold Elisabeth and Vestergaard, Tine and Trettin, Bettina", title="Experiences Regarding Use and Implementation of Artificial Intelligence--Supported Follow-Up of Atypical Moles at a Dermatological Outpatient Clinic: Qualitative Study", journal="JMIR Dermatol", year="2023", month="Jun", day="23", volume="6", pages="e44913", keywords="artificial intelligence", keywords="AI", keywords="computer-assisted diagnosis", keywords="CAD", keywords="dermatology", keywords="diagnostic tool", keywords="FotoFinder", keywords="implementation", keywords="interview", keywords="melanoma", keywords="Moleanalyzer Pro", keywords="total body dermoscopy", keywords="TBD", abstract="Background: Artificial intelligence (AI) is increasingly used in numerous medical fields. In dermatology, AI can be used in the form of computer-assisted diagnosis (CAD) systems when assessing and diagnosing skin lesions suspicious of melanoma, a potentially lethal skin cancer with rising incidence all over the world. In particular, CAD may be a valuable tool in the follow-up of patients with high risk of developing melanoma, such as patients with multiple atypical moles. One such CAD system, ATBM Master (FotoFinder), can execute total body dermoscopy (TBD). This process comprises automatically photographing a patient{\textasciiacute}s entire body and then neatly displaying moles on a computer screen, grouped according to their clinical relevance. Proprietary FotoFinder algorithms underlie this organized presentation of moles. In addition, ATBM Master's optional convoluted neural network (CNN)-based Moleanalyzer Pro software can be used to further assess moles and estimate their probability of malignancy. Objective: Few qualitative studies have been conducted on the implementation of AI-supported procedures in dermatology. Therefore, the purpose of this study was to investigate how health care providers experience the use and implementation of a CAD system like ATBM Master, in particular its TBD module. In this way, the study aimed to elucidate potential barriers to the application of such new technology. Methods: We conducted a thematic analysis based on 2 focus group interviews with 14 doctors and nurses regularly working in an outpatient pigmented lesions clinic. Results: Surprisingly, the study revealed that only 3 participants had actual experience using the TBD module. Even so, all participants were able to provide many notions and anticipations about its use, resulting in 3 major themes emerging from the interviews. First, several organizational matters were revealed to be a barrier to consistent use of the ATBM Master's TBD module, namely lack of guidance, time pressure, and insufficient training. Second, the study found that the perceived benefits of TBD were the ability to objectively detect and monitor subtle lesion changes and unbiasedness of the procedure. Imprecise identification of moles, inability to photograph certain areas, and substandard technical aspects were the perceived weaknesses. Lastly, the study found that clinicians were open to use AI-powered technology and that the TBD module was considered a supplementary tool to aid the medical staff, rather than a replacement of the clinician. Conclusions: Demonstrated by how few of the participants had actual experience with the TBD module, this study showed that implementation of new technology does not occur automatically. It highlights the importance of having a strategy for implementation to ensure the optimized application of CAD tools. The study identified areas that could be improved when implementing AI-powered technology, as well as providing insight on how medical staff anticipated and experienced the use of a CAD device in dermatology. ", doi="10.2196/44913", url="https://derma.jmir.org/2023/1/e44913", url="http://www.ncbi.nlm.nih.gov/pubmed/37632937" } @Article{info:doi/10.2196/44575, author="Kiddle, Adam and Barham, Helen and Wegerif, Simon and Petronzio, Connie", title="Dynamic Region of Interest Selection in Remote Photoplethysmography: Proof-of-Concept Study", journal="JMIR Form Res", year="2023", month="Mar", day="30", volume="7", pages="e44575", keywords="vital sign", keywords="vital sign measurement", keywords="remote photoplethysmography", keywords="contactless vital sign measurement", keywords="region of interest (ROI)", keywords="biomedical sensing", keywords="facial camera PPG", keywords="signal processing", keywords="machine learning", keywords="smart device", keywords="mobile app", keywords="algorithm", keywords="skin tone", abstract="Background: Remote photoplethysmography (rPPG) can record vital signs (VSs) by detecting subtle changes in the light reflected from the skin. Lifelight (Xim Ltd) is a novel software being developed as a medical device for the contactless measurement of VSs using rPPG via integral cameras on smart devices. Research to date has focused on extracting the pulsatile VS from the raw signal, which can be influenced by factors such as ambient light, skin thickness, facial movements, and skin tone. Objective: This preliminary proof-of-concept study outlines a dynamic approach to rPPG signal processing wherein green channel signals from the most relevant areas of the face (the midface, comprising the cheeks, nose, and top of the lip) are optimized for each subject using tiling and aggregation (T\&A) algorithms. Methods: High-resolution 60-second videos were recorded during the VISION-MD study. The midface was divided into 62 tiles of 20{\texttimes}20 pixels, and the signals from multiple tiles were evaluated using bespoke algorithms through weighting according to signal-to-noise ratio in the frequency domain (SNR-F) score or segmentation. Midface signals before and after T\&A were categorized by a trained observer blinded to the data processing as 0 (high quality, suitable for algorithm training), 1 (suitable for algorithm testing), or 2 (inadequate quality). On secondary analysis, observer categories were compared for signals predicted to improve categories following T\&A based on the SNR-F score. Observer ratings and SNR-F scores were also compared before and after T\&A for Fitzpatrick skin tones 5 and 6, wherein rPPG is hampered by light absorption by melanin. Results: The analysis used 4310 videos recorded from 1315 participants. Category 2 and 1 signals had lower mean SNR-F scores than category 0 signals. T\&A improved the mean SNR-F score using all algorithms. Depending on the algorithm, 18\% (763/4212) to 31\% (1306/4212) of signals improved by at least one category, with up to 10\% (438/4212) improving into category 0, and 67\% (2834/4212) to 79\% (3337/4212) remaining in the same category. Importantly, 9\% (396/4212) to 21\% (875/4212) improved from category 2 (not usable) into category 1. All algorithms showed improvements. No more than 3\% (137/4212) of signals were assigned to a lower-quality category following T\&A. On secondary analysis, 62\% of signals (32/52) were recategorized, as predicted from the SNR-F score. T\&A improved SNR-F scores in darker skin tones; 41\% of signals (151/369) improved from category 2 to 1 and 12\% (44/369) from category 1 to 0. Conclusions: The T\&A approach to dynamic region of interest selection improved signal quality, including in dark skin tones. The method was verified by comparison with a trained observer's rating. T\&A could overcome factors that compromise whole-face rPPG. This method's performance in estimating VS is currently being assessed. Trial Registration: ClinicalTrials.gov NCT04763746; https://clinicaltrials.gov/ct2/show/NCT04763746 ", doi="10.2196/44575", url="https://formative.jmir.org/2023/1/e44575", url="http://www.ncbi.nlm.nih.gov/pubmed/36995742" } @Article{info:doi/10.2196/35150, author="Oloruntoba, I. Ayooluwatomiwa and Vestergaard, Tine and Nguyen, D. Toan and Yu, Zhen and Sashindranath, Maithili and Betz-Stablein, Brigid and Soyer, Peter H. and Ge, Zongyuan and Mar, Victoria", title="Assessing the Generalizability of Deep Learning Models Trained on Standardized and Nonstandardized Images and Their Performance Against Teledermatologists: Retrospective Comparative Study", journal="JMIR Dermatol", year="2022", month="Sep", day="12", volume="5", number="3", pages="e35150", keywords="artificial intelligence", keywords="AI", keywords="convolutional neural network", keywords="CNN", keywords="teledermatology", keywords="standardized Image", keywords="nonstandardized image", keywords="machine learning", keywords="skin cancer", keywords="cancer", abstract="Background: Convolutional neural networks (CNNs) are a type of artificial intelligence that shows promise as a diagnostic aid for skin cancer. However, the majority are trained using retrospective image data sets with varying image capture standardization. Objective: The aim of our study was to use CNN models with the same architecture---trained on image sets acquired with either the same image capture device and technique (standardized) or with varied devices and capture techniques (nonstandardized)---and test variability in performance when classifying skin cancer images in different populations. Methods: In all, 3 CNNs with the same architecture were trained. CNN nonstandardized (CNN-NS) was trained on 25,331 images taken from the International Skin Imaging Collaboration (ISIC) using different image capture devices. CNN standardized (CNN-S) was trained on 177,475 MoleMap images taken with the same capture device, and CNN standardized number 2 (CNN-S2) was trained on a subset of 25,331 standardized MoleMap images (matched for number and classes of training images to CNN-NS). These 3 models were then tested on 3 external test sets: 569 Danish images, the publicly available ISIC 2020 data set consisting of 33,126 images, and The University of Queensland (UQ) data set of 422 images. Primary outcome measures were sensitivity, specificity, and area under the receiver operating characteristic curve (AUROC). Teledermatology assessments available for the Danish data set were used to determine model performance compared to teledermatologists. Results: When tested on the 569 Danish images, CNN-S achieved an AUROC of 0.861 (95\% CI 0.830-0.889) and CNN-S2 achieved an AUROC of 0.831 (95\% CI 0.798-0.861; standardized models), with both outperforming CNN-NS (nonstandardized model; P=.001 and P=.009, respectively), which achieved an AUROC of 0.759 (95\% CI 0.722-0.794). When tested on 2 additional data sets (ISIC 2020 and UQ), CNN-S (P<.001 and P<.001, respectively) and CNN-S2 (P=.08 and P=.35, respectively) still outperformed CNN-NS. When the CNNs were matched to the mean sensitivity and specificity of the teledermatologists on the Danish data set, the models' resultant sensitivities and specificities were surpassed by the teledermatologists. However, when compared to CNN-S, the differences were not statistically significant (sensitivity: P=.10; specificity: P=.053). Performance across all CNN models as well as teledermatologists was influenced by image quality. Conclusions: CNNs trained on standardized images had improved performance and, therefore, greater generalizability in skin cancer classification when applied to unseen data sets. This finding is an important consideration for future algorithm development, regulation, and approval. ", doi="10.2196/35150", url="https://derma.jmir.org/2022/3/e35150" } @Article{info:doi/10.2196/39143, author="Rezk, Eman and Eltorki, Mohamed and El-Dakhakhni, Wael", title="Improving Skin Color Diversity in Cancer Detection: Deep Learning Approach", journal="JMIR Dermatol", year="2022", month="Aug", day="19", volume="5", number="3", pages="e39143", keywords="deep learning", keywords="neural network", keywords="machine learning", keywords="algorithm", keywords="artificial intelligence", keywords="skin tone diversity", keywords="data augmentation", keywords="skin cancer diagnosis", keywords="generalizability", keywords="skin", keywords="cancer", keywords="diagnosis", keywords="diagnostic", keywords="imaging", keywords="dermatology", keywords="digital health", keywords="image generation", keywords="generated image", keywords="computer-generated", keywords="lesion", abstract="Background: The lack of dark skin images in pathologic skin lesions in dermatology resources hinders the accurate diagnosis of skin lesions in people of color. Artificial intelligence applications have further disadvantaged people of color because those applications are mainly trained with light skin color images. Objective: The aim of this study is to develop a deep learning approach that generates realistic images of darker skin colors to improve dermatology data diversity for various malignant and benign lesions. Methods: We collected skin clinical images for common malignant and benign skin conditions from DermNet NZ, the International Skin Imaging Collaboration, and Dermatology Atlas. Two deep learning methods, style transfer (ST) and deep blending (DB), were utilized to generate images with darker skin colors using the lighter skin images. The generated images were evaluated quantitively and qualitatively. Furthermore, a convolutional neural network (CNN) was trained using the generated images to assess the latter's effect on skin lesion classification accuracy. Results: Image quality assessment showed that the ST method outperformed DB, as the former achieved a lower loss of realism score of 0.23 (95\% CI 0.19-0.27) compared to 0.63 (95\% CI 0.59-0.67) for the DB method. In addition, ST achieved a higher disease presentation with a similarity score of 0.44 (95\% CI 0.40-0.49) compared to 0.17 (95\% CI 0.14-0.21) for the DB method. The qualitative assessment completed on masked participants indicated that ST-generated images exhibited high realism, whereby 62.2\% (1511/2430) of the votes for the generated images were classified as real. Eight dermatologists correctly diagnosed the lesions in the generated images with an average rate of 0.75 (360 correct diagnoses out of 480) for several malignant and benign lesions. Finally, the classification accuracy and the area under the curve (AUC) of the model when considering the generated images were 0.76 (95\% CI 0.72-0.79) and 0.72 (95\% CI 0.67-0.77), respectively, compared to the accuracy of 0.56 (95\% CI 0.52-0.60) and AUC of 0.63 (95\% CI 0.58-0.68) for the model without considering the generated images. Conclusions: Deep learning approaches can generate realistic skin lesion images that improve the skin color diversity of dermatology atlases. The diversified image bank, utilized herein to train a CNN, demonstrates the potential of developing generalizable artificial intelligence skin cancer diagnosis applications. International Registered Report Identifier (IRRID): RR2-10.2196/34896 ", doi="10.2196/39143", url="https://derma.jmir.org/2022/3/e39143" } @Article{info:doi/10.2196/35497, author="Park, Christine and Jeong, Ki Hyeon and Henao, Ricardo and Kheterpal, Meenal", title="Current Landscape of Generative Adversarial Networks for Facial Deidentification in Dermatology: Systematic Review and Evaluation", journal="JMIR Dermatol", year="2022", month="May", day="27", volume="5", number="2", pages="e35497", keywords="facial recognition", keywords="deidentification", keywords="facial photographs", keywords="HIPAA", keywords="dermatology", keywords="guidelines", abstract="Background: Deidentifying facial images is critical for protecting patient anonymity in the era of increasing tools for automatic image analysis in dermatology. Objective: The aim of this paper was to review the current literature in the field of automatic facial deidentification algorithms. Methods: We conducted a systematic search using a combination of headings and keywords to encompass the concepts of facial deidentification and privacy preservation. The MEDLINE (via PubMed), Embase (via Elsevier), and Web of Science (via Clarivate) databases were queried from inception to May 1, 2021. Studies of incorrect design and outcomes were excluded during the screening and review process. Results: A total of 18 studies reporting on various methodologies of facial deidentification algorithms were included in the final review. The study methods were rated individually regarding their utility for use cases in dermatology pertaining to skin color and pigmentation preservation, texture preservation, data utility, and human detection. Most studies that were notable in the literature addressed feature preservation while sacrificing skin color and texture. Conclusions: Facial deidentification algorithms are sparse and inadequate for preserving both facial features and skin pigmentation and texture quality in facial photographs. A novel approach is needed to ensure greater patient anonymity, while increasing data access for automated image analysis in dermatology for improved patient care. ", doi="10.2196/35497", url="https://derma.jmir.org/2022/2/e35497" } @Article{info:doi/10.2196/34896, author="Rezk, Eman and Eltorki, Mohamed and El-Dakhakhni, Wael", title="Leveraging Artificial Intelligence to Improve the Diversity of Dermatological Skin Color Pathology: Protocol for an Algorithm Development and Validation Study", journal="JMIR Res Protoc", year="2022", month="Mar", day="8", volume="11", number="3", pages="e34896", keywords="artificial intelligence", keywords="skin cancer", keywords="skin tone diversity", keywords="people of color", keywords="image blending", keywords="deep learning", keywords="classification", keywords="early diagnosis", abstract="Background: The paucity of dark skin images in dermatological textbooks and atlases is a reflection of racial injustice in medicine. The underrepresentation of dark skin images makes diagnosing skin pathology in people of color challenging. For conditions such as skin cancer, in which early diagnosis makes a difference between life and death, people of color have worse prognoses and lower survival rates than people with lighter skin tones as a result of delayed or incorrect diagnoses. Recent advances in artificial intelligence, such as deep learning, offer a potential solution that can be achieved by diversifying the mostly light-skin image repositories through generating images for darker skin tones. Thus, facilitating the development of inclusive cancer early diagnosis systems that are trained and tested on diverse images that truly represent human skin tones. Objective: We aim to develop and evaluate an artificial intelligence--based skin cancer early detection system for all skin tones using clinical images. Methods: This study consists of four phases: (1) Publicly available skin image repositories will be analyzed to quantify the underrepresentation of darker skin tones, (2) Images will be generated for the underrepresented skin tones, (3) Generated images will be extensively evaluated for realism and disease presentation with quantitative image quality assessment as well as qualitative human expert and nonexpert ratings, and (4) The images will be utilized with available light-skin images to develop a robust skin cancer early detection model. Results: This study started in September 2020. The first phase of quantifying the underrepresentation of darker skin tones was completed in March 2021. The second phase of generating the images is in progress and will be completed by March 2022. The third phase is expected to be completed by May 2022, and the final phase is expected to be completed by September 2022. Conclusions: This work is the first step toward expanding skin tone diversity in existing image databases to address the current gap in the underrepresentation of darker skin tones. Once validated, the image bank will be a valuable resource that can potentially be utilized in physician education and in research applications. Furthermore, generated images are expected to improve the generalizability of skin cancer detection. When completed, the model will assist family physicians and general practitioners in evaluating skin lesion severity and in efficient triaging for referral to expert dermatologists. In addition, the model can assist dermatologists in diagnosing skin lesions. International Registered Report Identifier (IRRID): DERR1-10.2196/34896 ", doi="10.2196/34896", url="https://www.researchprotocols.org/2022/3/e34896", url="http://www.ncbi.nlm.nih.gov/pubmed/34983017" } @Article{info:doi/10.2196/22798, author="Chang, Wei Che and Lai, Feipei and Christian, Mesakh and Chen, Chun Yu and Hsu, Ching and Chen, Shen Yo and Chang, Hao Dun and Roan, Luen Tyng and Yu, Che Yen", title="Deep Learning--Assisted Burn Wound Diagnosis: Diagnostic Model Development Study", journal="JMIR Med Inform", year="2021", month="Dec", day="2", volume="9", number="12", pages="e22798", keywords="deep learning", keywords="semantic segmentation", keywords="instance segmentation", keywords="burn wounds", keywords="percentage total body surface area", abstract="Background: Accurate assessment of the percentage total body surface area (\%TBSA) of burn wounds is crucial in the management of burn patients. The resuscitation fluid and nutritional needs of burn patients, their need for intensive unit care, and probability of mortality are all directly related to \%TBSA. It is difficult to estimate a burn area of irregular shape by inspection. Many articles have reported discrepancies in estimating \%TBSA by different doctors. Objective: We propose a method, based on deep learning, for burn wound detection, segmentation, and calculation of \%TBSA on a pixel-to-pixel basis. Methods: A 2-step procedure was used to convert burn wound diagnosis into \%TBSA. In the first step, images of burn wounds were collected from medical records and labeled by burn surgeons, and the data set was then input into 2 deep learning architectures, U-Net and Mask R-CNN, each configured with 2 different backbones, to segment the burn wounds. In the second step, we collected and labeled images of hands to create another data set, which was also input into U-Net and Mask R-CNN to segment the hands. The \%TBSA of burn wounds was then calculated by comparing the pixels of mask areas on images of the burn wound and hand of the same patient according to the rule of hand, which states that one's hand accounts for 0.8\% of TBSA. Results: A total of 2591 images of burn wounds were collected and labeled to form the burn wound data set. The data set was randomly split into training, validation, and testing sets in a ratio of 8:1:1. Four hundred images of volar hands were collected and labeled to form the hand data set, which was also split into 3 sets using the same method. For the images of burn wounds, Mask R-CNN with ResNet101 had the best segmentation result with a Dice coefficient (DC) of 0.9496, while U-Net with ResNet101 had a DC of 0.8545. For the hand images, U-Net and Mask R-CNN had similar performance with DC values of 0.9920 and 0.9910, respectively. Lastly, we conducted a test diagnosis in a burn patient. Mask R-CNN with ResNet101 had on average less deviation (0.115\% TBSA) from the ground truth than burn surgeons. Conclusions: This is one of the first studies to diagnose all depths of burn wounds and convert the segmentation results into \%TBSA using different deep learning models. We aimed to assist medical staff in estimating burn size more accurately, thereby helping to provide precise care to burn victims. ", doi="10.2196/22798", url="https://medinform.jmir.org/2021/12/e22798", url="http://www.ncbi.nlm.nih.gov/pubmed/34860674" } @Article{info:doi/10.2196/22934, author="Takiddin, Abdulrahman and Schneider, Jens and Yang, Yin and Abd-Alrazaq, Alaa and Househ, Mowafa", title="Artificial Intelligence for Skin Cancer Detection: Scoping Review", journal="J Med Internet Res", year="2021", month="Nov", day="24", volume="23", number="11", pages="e22934", keywords="artificial intelligence", keywords="skin cancer", keywords="skin lesion", keywords="machine learning", keywords="deep neural networks", abstract="Background: Skin cancer is the most common cancer type affecting humans. Traditional skin cancer diagnosis methods are costly, require a professional physician, and take time. Hence, to aid in diagnosing skin cancer, artificial intelligence (AI) tools are being used, including shallow and deep machine learning--based methodologies that are trained to detect and classify skin cancer using computer algorithms and deep neural networks. Objective: The aim of this study was to identify and group the different types of AI-based technologies used to detect and classify skin cancer. The study also examined the reliability of the selected papers by studying the correlation between the data set size and the number of diagnostic classes with the performance metrics used to evaluate the models. Methods: We conducted a systematic search for papers using Institute of Electrical and Electronics Engineers (IEEE) Xplore, Association for Computing Machinery Digital Library (ACM DL), and Ovid MEDLINE databases following the Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews (PRISMA-ScR) guidelines. The studies included in this scoping review had to fulfill several selection criteria: being specifically about skin cancer, detecting or classifying skin cancer, and using AI technologies. Study selection and data extraction were independently conducted by two reviewers. Extracted data were narratively synthesized, where studies were grouped based on the diagnostic AI techniques and their evaluation metrics. Results: We retrieved 906 papers from the 3 databases, of which 53 were eligible for this review. Shallow AI-based techniques were used in 14 studies, and deep AI-based techniques were used in 39 studies. The studies used up to 11 evaluation metrics to assess the proposed models, where 39 studies used accuracy as the primary evaluation metric. Overall, studies that used smaller data sets reported higher accuracy. Conclusions: This paper examined multiple AI-based skin cancer detection models. However, a direct comparison between methods was hindered by the varied use of different evaluation metrics and image types. Performance scores were affected by factors such as data set size, number of diagnostic classes, and techniques. Hence, the reliability of shallow and deep models with higher accuracy scores was questionable since they were trained and tested on relatively small data sets of a few diagnostic classes. ", doi="10.2196/22934", url="https://www.jmir.org/2021/11/e22934", url="http://www.ncbi.nlm.nih.gov/pubmed/34821566" } @Article{info:doi/10.2196/31697, author="Aggarwal, Pushkar", title="Performance of Artificial Intelligence Imaging Models in Detecting Dermatological Manifestations in Higher Fitzpatrick Skin Color Classifications", journal="JMIR Dermatol", year="2021", month="Oct", day="12", volume="4", number="2", pages="e31697", keywords="deep learning", keywords="melanoma", keywords="basal cell carcinoma", keywords="skin of color", keywords="image recognition", keywords="dermatology", keywords="disease", keywords="convolutional neural network", keywords="specificity", keywords="prediction", keywords="artificial intelligence", keywords="skin color", keywords="skin tone", abstract="Background: The performance of deep-learning image recognition models is below par when applied to images with Fitzpatrick classification skin types 4 and 5. Objective: The objective of this research was to assess whether image recognition models perform differently when differentiating between dermatological diseases in individuals with darker skin color (Fitzpatrick skin types 4 and 5) than when differentiating between the same dermatological diseases in Caucasians (Fitzpatrick skin types 1, 2, and 3) when both models are trained on the same number of images. Methods: Two image recognition models were trained, validated, and tested. The goal of each model was to differentiate between melanoma and basal cell carcinoma. Open-source images of melanoma and basal cell carcinoma were acquired from the Hellenic Dermatological Atlas, the Dermatology Atlas, the Interactive Dermatology Atlas, and DermNet NZ. Results: The image recognition models trained and validated on images with light skin color had higher sensitivity, specificity, positive predictive value, negative predictive value, and F1 score than the image recognition models trained and validated on images of skin of color for differentiation between melanoma and basal cell carcinoma. Conclusions: A higher number of images of dermatological diseases in individuals with darker skin color than images of dermatological diseases in individuals with light skin color would need to be gathered for artificial intelligence models to perform equally well. ", doi="10.2196/31697", url="https://derma.jmir.org/2021/2/e31697", url="http://www.ncbi.nlm.nih.gov/pubmed/37632853" }