<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIRx Med</journal-id><journal-id journal-id-type="publisher-id">xmed</journal-id><journal-id journal-id-type="index">34</journal-id><journal-title>JMIRx Med</journal-title><abbrev-journal-title>JMIRx Med</abbrev-journal-title><issn pub-type="epub">2563-6316</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v6i1e75015</article-id><article-id pub-id-type="doi">10.2196/75015</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>COVID-19 Pneumonia Diagnosis Using Medical Images: Deep Learning&#x2013;Based Transfer Learning Approach</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Dharmik</surname><given-names>Anjali</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff id="aff1"><institution>Royal Holloway University of London</institution><addr-line>Egham Hill</addr-line><addr-line>Egham</addr-line><country>United Kingdom</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Wu</surname><given-names>Fuqing</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Ndezure</surname><given-names>Emmanuel</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Odezuligbo</surname><given-names>Ikenna</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Sunny</surname><given-names>Chi Lik Au</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Anjali Dharmik, MSc, Royal Holloway University of London, Egham Hill, Egham, TW20 0EX, United Kingdom, 44 7867304854; <email>anjali.dharmik@gmail.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>26</day><month>9</month><year>2025</year></pub-date><volume>6</volume><elocation-id>e75015</elocation-id><history><date date-type="received"><day>26</day><month>03</month><year>2025</year></date><date date-type="rev-recd"><day>16</day><month>08</month><year>2025</year></date><date date-type="accepted"><day>29</day><month>08</month><year>2025</year></date></history><copyright-statement>&#x00A9; Anjali Dharmik. Originally published in JMIRx Med (<ext-link ext-link-type="uri" xlink:href="https://med.jmirx.org">https://med.jmirx.org</ext-link>), 26.9.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIRx Med, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://med.jmirx.org/">https://med.jmirx.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://xmed.jmir.org/2025/1/e75015"/><related-article related-article-type="companion" ext-link-type="doi" xlink:href="10.48550/arXiv.2503.12642" xlink:title="Preprint (arXiv)" xlink:type="simple">https://arxiv.org/abs/2503.12642v2</related-article><related-article related-article-type="companion" ext-link-type="doi" xlink:href="10.2196/83231" xlink:title="Peer-Review Report by Sunny Chi Lik Au (Reviewer S)" xlink:type="simple">https://med.jmirx.org/2025/1/e83231</related-article><related-article related-article-type="companion" ext-link-type="doi" xlink:href="10.2196/83234" xlink:title="Peer-Review Report by Ikenna Odezuligbo (Reviewer AA)" xlink:type="simple">https://med.jmirx.org/2025/1/e83234</related-article><related-article related-article-type="companion" ext-link-type="doi" xlink:href="10.2196/83236" xlink:title="Peer-Review Report by Emmanuel Ndezure (Reviewer AB)" xlink:type="simple">https://med.jmirx.org/2025/1/e83236</related-article><related-article related-article-type="companion" ext-link-type="doi" xlink:href="10.2196/83230" xlink:title="Authors' Response to Peer-Review Reports" xlink:type="simple">https://med.jmirx.org/2025/1/e83230</related-article><abstract><sec><title>Background</title><p>SARS-CoV-2, the causative agent of COVID-19, remains a global health concern due to its high transmissibility and evolving variants. Although vaccination efforts and therapeutic advancements have mitigated disease severity, emerging mutations continue to challenge diagnostics and containment strategies. As of mid-February 2025, global test positivity has risen to 11%, marking the highest level in over 6 months, despite widespread immunization efforts. Newer variants demonstrate enhanced host cell binding, increasing both infectivity and diagnostic complexity.</p></sec><sec><title>Objective</title><p>This study aimed to evaluate the effectiveness of deep transfer learning in delivering a rapid, accurate, and mutation-resilient COVID-19 diagnosis from medical imaging, with a focus on scalability and accessibility.</p></sec><sec sec-type="methods"><title>Methods</title><p>An automated detection system was developed using state-of-the-art convolutional neural networks, including VGG16 (Visual Geometry Group network-16 layers), ResNet50 (residual network-50 layers), ConvNeXtTiny (convolutional next-tiny), MobileNet (mobile network), NASNetMobile (neural architecture search network-mobile version), and DenseNet121 (densely connected convolutional network-121 layers), to detect COVID-19 from chest X-ray and computed tomography (CT) images.</p></sec><sec sec-type="results"><title>Results</title><p>Among all the models evaluated, DenseNet121 emerged as the best-performing architecture for COVID-19 diagnosis using X-ray and CT images. It achieved an impressive accuracy of 98%, with a precision of 96.9%, a recall of 98.9%, an <italic>F</italic><sub>1</sub>-score of 97.9%, and an area under the curve score of 99.8%, indicating a high degree of consistency and reliability in detecting both positive and negative cases. The confusion matrix showed minimal false positives and false negatives, underscoring the model&#x2019;s robustness in real-world diagnostic scenarios. Given its performance, DenseNet121 is a strong candidate for deployment in clinical settings and serves as a benchmark for future improvements in artificial intelligence&#x2013;assisted diagnostic tools.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The study results underscore the potential of artificial intelligence&#x2013;powered diagnostics in supporting early detection and global pandemic response. With careful optimization, deep learning models can address critical gaps in testing, particularly in settings constrained by limited resources or emerging variants.</p></sec></abstract><kwd-group><kwd>computer vision</kwd><kwd>COVID-19 pneumonia diagnosis</kwd><kwd>deep learning</kwd><kwd>transfer learning</kwd><kwd>medical imaging analysis</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>SARS-CoV-2, the virus responsible for COVID-19, first emerged on December 31, 2019, in Wuhan City, Hubei Province, China [<xref ref-type="bibr" rid="ref1">1</xref>]. It is a highly transmissible respiratory pathogen capable of causing severe illness or death across all age groups [<xref ref-type="bibr" rid="ref2">2</xref>]. Since its initial outbreak, substantial progress has been made in managing the virus through vaccination, antiviral therapies, and diagnostic technologies powered by artificial intelligence (AI).</p><p>Despite these advances, SARS-CoV-2 continues to pose a global health challenge, especially for immunocompromised individuals and those with underlying conditions. One of the most persistent obstacles is the virus&#x2019;s ability to mutate rapidly. To date, more than 26 genetically distinct variants have been identified, many of which exhibit increased transmissibility and immune evasion due to mutations that enhance their binding affinity to host cells [<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>By August 20, 2023, the pandemic had resulted in over 769 million confirmed cases and more than 6.9 million deaths worldwide [<xref ref-type="bibr" rid="ref4">4</xref>]. Early in the pandemic (January 30, 2020), the World Health Organization (WHO) declared COVID-19 a public health emergency of international concern [<xref ref-type="bibr" rid="ref5">5</xref>].</p><p>More recently, SARS-CoV-2 has shown a global resurgence. As of May 11, 2025, surveillance data from the Global Influenza Surveillance and Response System indicated that the global test positivity rate reached 11%, up significantly from 2% in February 2025 [<xref ref-type="bibr" rid="ref6">6</xref>]. This current wave, comparable to the July 2024 peak of 12%, is largely driven by cases in the Eastern Mediterranean, South-East Asia, and the Western Pacific Region [<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>A key driver of this resurgence is the emergence of the recombinant XEC variant, first detected in Germany in June 2024 [<xref ref-type="bibr" rid="ref7">7</xref>]. Derived from the 2 Omicron subvariants KS.1.1 and KP.3.3, XEC rapidly spread worldwide, and by December 2024, it accounted for nearly 45% of cases in the United States [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. Its global dominance underscores the critical importance of continued genomic surveillance and adaptive diagnostic strategies.</p><p>In February 2025, the WHO categorized circulating variants as follows: dominant variant: XEC; variant of interest: JN.1 (known for partial immune evasion) [<xref ref-type="bibr" rid="ref10">10</xref>]; variants under monitoring: KP.2, KP.3, KP.3.1.1, JN.1.18, LB.1, XEC, and LP.8.1 (potential impact on transmission and immunity) [<xref ref-type="bibr" rid="ref10">10</xref>]. Compared to January 2024, when variants like EG.5 (Eris) and FL.1.5.1 (Fornax) dominated, the landscape has shifted greatly in 2025, with XEC and JN.1 overtaking earlier subvariants such as XBB.1.16 (Arcturus) [<xref ref-type="bibr" rid="ref3">3</xref>]. The evolution of COVID-19 variants and their global impacts are presented in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Evolution of dominant COVID-19 variants and their global impact (January 2024-February 2025).</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Time period</td><td align="left" valign="top">Dominant/high-prevalence variants</td><td align="left" valign="top">Key characteristics</td><td align="left" valign="top">Status by February 2025</td></tr></thead><tbody><tr><td align="left" valign="top">January 2024</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>EG.5 (Eris): 24.5%</p></list-item><list-item><p>FL.1.5.1 (Fornax): 13.7%</p></list-item><list-item><p>XBB.1.16 (Arcturus): declining presence</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Derived from Omicron lineages</p></list-item><list-item><p>Moderate immune escape</p></list-item></list></td><td align="left" valign="top">Largely replaced by newer variants</td></tr><tr><td align="left" valign="top">July 2024</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Mixed circulation; early rise of XEC</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>XEC began spreading in Europe</p></list-item></list></td><td align="left" valign="top">Became dominant by late 2024</td></tr><tr><td align="left" valign="top">December 2024</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>XEC 45% in the United States</p></list-item><list-item><p>Increasing in Europe and Australia</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Recombinant of KS.1.1 + KP.3.3</p></list-item><list-item><p>High transmissibility</p></list-item></list></td><td align="left" valign="top">Global spread accelerating</td></tr><tr><td align="left" valign="top">February 2025</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>XEC: dominant globally</p></list-item><list-item><p>JN.1: variant of concern</p></list-item><list-item><p>Variants under monitoring: KP.2, KP.3, LP.8.1, etc</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Enhanced immune evasion</p></list-item><list-item><p>Multiple regions affected</p></list-item></list></td><td align="left" valign="top">Driving the recent case surge</td></tr></tbody></table></table-wrap></sec><sec id="s1-2"><title>Symptoms</title><p>COVID-19, caused by the SARS-CoV-2 virus, primarily affects the respiratory system, with symptoms ranging from mild upper respiratory issues to severe lung involvement. While most cases are mild, individuals with comorbidities (cardiovascular disease, diabetes, or cancer) are at higher risk for complications [<xref ref-type="bibr" rid="ref11">11</xref>].</p><p>Variants like Delta have shown a preference for the lower respiratory tract, leading to lung consolidation and pneumonia, which are features identifiable on computed tomography (CT) scans and X-rays. In contrast, Omicron subvariants tend to affect the upper airways more, often resulting in less severe radiological findings [<xref ref-type="bibr" rid="ref12">12</xref>]. However, symptomatology continues to evolve with emerging variants, influencing the type and severity of pulmonary involvement seen in medical images [<xref ref-type="bibr" rid="ref3">3</xref>]. The correlations between clinical symptoms and radiological patterns are presented in <xref ref-type="table" rid="table2">Table 2</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Correlation between clinical symptoms and radiological patterns in COVID-19 diagnosis.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Symptom</td><td align="left" valign="top">Radiological pattern</td><td align="left" valign="top">Imaging modality</td><td align="left" valign="top">Relevance to the study</td></tr></thead><tbody><tr><td align="left" valign="top">Dry cough</td><td align="left" valign="top">GGOs<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>, peripheral opacities</td><td align="left" valign="top">CT<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup>, X-ray</td><td align="left" valign="top">Frequently observed in mild to moderate COVID-19 pneumonia</td></tr><tr><td align="left" valign="top">Shortness of breath</td><td align="left" valign="top">Bilateral GGOs, interstitial thickening</td><td align="left" valign="top">CT, X-ray</td><td align="left" valign="top">Indicates lower lung involvement; key pattern for classification</td></tr><tr><td align="left" valign="top">Fever</td><td align="left" valign="top">Often present alongside GGOs</td><td align="left" valign="top">CT</td><td align="left" valign="top">Supports image-based diagnosis when combined with lung findings</td></tr><tr><td align="left" valign="top">Hypoxia</td><td align="left" valign="top">Diffuse alveolar damage, ARDS<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup>-like patterns</td><td align="left" valign="top">CT</td><td align="left" valign="top">Seen in severe cases; helps the model identify critical patterns</td></tr><tr><td align="left" valign="top">Chest pain</td><td align="left" valign="top">Subpleural consolidations, patchy opacities</td><td align="left" valign="top">CT</td><td align="left" valign="top">May reflect inflammatory involvement; assists in differentiation</td></tr><tr><td align="left" valign="top">Long COVID symptoms</td><td align="left" valign="top">Fibrotic changes, residual GGOs</td><td align="left" valign="top">CT</td><td align="left" valign="top">Useful for tracking persistent lung changes in follow-up scans</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>GGOs: ground-glass opacities.</p></fn><fn id="table2fn2"><p><sup>b</sup>CT: computed tomography.</p></fn><fn id="table2fn3"><p><sup>c</sup>ARDS: acute respiratory distress syndrome.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s1-3"><title>Related Work</title><p>In response to the global impact of COVID-19, a wide range of clinical and technological strategies have been developed to support diagnosis, treatment, and containment. Among these, imaging-based AI systems have emerged as promising tools for timely and accessible COVID-19 diagnosis, particularly in resource-limited and high-burden settings. However, a review of the existing literature revealed notable challenges in data diversity, standardization, and model generalizability.</p><sec id="s1-3-1"><title>Telehealth Services</title><p>The rapid expansion of telemedicine platforms enabled remote assessment and monitoring of COVID-19 patients, especially during peak transmission periods when hospital resources were overwhelmed [<xref ref-type="bibr" rid="ref13">13</xref>]. However, telehealth often lacks the diagnostic depth provided by imaging or laboratory testing and is generally used for symptom tracking and triage rather than precise diagnosis.</p></sec><sec id="s1-3-2"><title>Imaging-Based Diagnostics</title><p>Chest X-rays and CT scans have been instrumental in identifying characteristic COVID-19 lung involvement, including bilateral ground-glass opacities and consolidations [<xref ref-type="bibr" rid="ref14">14</xref>]. Numerous deep learning models have been developed for pneumonia and COVID-19 detection using chest X-ray and CT data. For example, MobileNet (mobile network) achieved 94.2% and 93.7% accuracy on 2 public chest X-ray datasets containing 5856 and 112,120 images, respectively [<xref ref-type="bibr" rid="ref15">15</xref>]. Despite these benefits, existing studies often suffer from limited and nonstandardized datasets, a lack of demographic metadata (age and sex), and geographical imbalance, reducing generalizability. In a separate study using InceptionV3 and convolutional neural network (CNN) models on a Kaggle X-ray dataset of 7750 images, the researchers reported impressive results (accuracy: 99.2%, recall: 99.7%) [<xref ref-type="bibr" rid="ref16">16</xref>]. However, the use of a single public dataset lacking demographic diversity and external validation limits generalizability.</p><p>A CT-based study using NASNet achieved an exceptionally high accuracy of 99.6%, with a sensitivity of 99.9% and a specificity of 98.6% [<xref ref-type="bibr" rid="ref17">17</xref>]. However, this evaluation was based on a small, imbalanced dataset of 249 patients, with no external validation, no interpretability tools, and no metadata analysis (eg, age, sex, and geography), weakening its clinical reliability and fairness. Furthermore, alternative architectures like ResNet or VGG were not benchmarked, and hyperparameter tuning was minimally discussed.</p><p>These limitations underscore the need for scalable, diverse, and metadata-rich imaging datasets to enhance model reliability and cross-population performance.</p></sec><sec id="s1-3-3"><title>Diagnostic Technologies: Strengths and Limitations</title><p>While reverse transcription&#x2013;quantitative polymerase chain reaction (RT-PCR) remains the diagnostic gold standard [<xref ref-type="bibr" rid="ref18">18</xref>], its accuracy can be impacted by emerging variants and sample quality. In response, several alternative diagnostic technologies have been explored. A comparison of key methods is presented in <xref ref-type="table" rid="table3">Table 3</xref>.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Comparative overview of diagnostic techniques.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Method</td><td align="left" valign="top">Advantages</td><td align="left" valign="top">Limitations</td></tr></thead><tbody><tr><td align="left" valign="top">Mutation-specific/multiplex PCR<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">High sensitivity (98.6%) and multiplex variant detection</td><td align="left" valign="top">Requires prior mutation knowledge</td></tr><tr><td align="left" valign="top">Loop-mediated amplification</td><td align="left" valign="top">Fast, simple, &#x2265;90% sensitivity, and suitable for low-resource settings</td><td align="left" valign="top">Prone to false positives and less stable</td></tr><tr><td align="left" valign="top">CRISPR-Cas detection</td><td align="left" valign="top">100% specificity, cost-effective, rapid, and suitable for POC<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> use</td><td align="left" valign="top">Low sensitivity at low viral loads (53.9%) and detects only point mutations</td></tr><tr><td align="left" valign="top">RT-PCR<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">Precise quantification and highly sensitive</td><td align="left" valign="top">Expensive and complex instrumentation</td></tr><tr><td align="left" valign="top">Rapid antigen test</td><td align="left" valign="top">Quick, user-friendly, low-cost, and suitable for self-testing</td><td align="left" valign="top">Lower sensitivity and affected by viral load and sample collection</td></tr><tr><td align="left" valign="top">ELISA<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="top">High throughput, useful for antibody screening, and suitable for POC use</td><td align="left" valign="top">Variant-driven antigenic drift affects sensitivity</td></tr><tr><td align="left" valign="top">Lateral flow assay</td><td align="left" valign="top">Home use&#x2013;friendly and long shelf-life</td><td align="left" valign="top">Detects limited antigenic sites and lower sensitivity</td></tr><tr><td align="left" valign="top">Viral genome sequencing</td><td align="left" valign="top">Enables variant tracking and mutation identification</td><td align="left" valign="top">Time-consuming, costly, and resource-intensive</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>PCR: polymerase chain reaction.</p></fn><fn id="table3fn2"><p><sup>b</sup>POC: point-of-care.</p></fn><fn id="table3fn3"><p><sup>c</sup>RT-PCR: reverse transcription&#x2013;quantitative polymerase chain reaction.</p></fn><fn id="table3fn4"><p><sup>d</sup>ELISA: enzyme-linked immunosorbent assay.</p></fn></table-wrap-foot></table-wrap><p>PCR-based methods are highly accurate but not variant-agnostic. Antigen-based tests are accessible but less reliable. Genome sequencing is ideal for surveillance but not rapid diagnosis. These constraints further support the need for AI-powered imaging diagnostics that are scalable, noninvasive, and rapid.</p></sec><sec id="s1-3-4"><title>Imaging-Based Deep Learning as a Complementary Tool</title><p>Deep learning applied to medical imaging presents a promising complementary diagnostic method, particularly in areas with limited laboratory capacity. Yet, current research has notable limitations. For instance, a protocol paper of a prospective AI model for chest X-ray images highlights the intention to use 600 images [<xref ref-type="bibr" rid="ref19">19</xref>]. However, it lacks clear details on geographic and demographic diversity, metadata tracking (eg, age and sex), and model architecture. Moreover, it does not describe how biases will be addressed or how low-prevalence conditions will be handled, which can be considered critical for real-world implementation.</p><p>Given the diagnostic delays and limitations associated with conventional methods, deep learning applied to medical imaging offers a promising complementary approach. Models trained on chest X-rays and CT scans can provide rapid, accurate, and interpretable results, which are particularly critical in settings where molecular testing is delayed or inaccessible. In this study, these efforts were built upon by employing transfer learning on an expanded, standardized imaging dataset to enhance diagnostic accuracy and generalizability. This approach addresses prior limitations related to data volume, diversity, and model robustness.</p></sec></sec><sec id="s1-4"><title>Challenges</title><p>Despite substantial progress since 2020, several evolving challenges continue to hinder reliable COVID-19 detection, particularly due to viral mutations, overlapping disease presentations, and infrastructural limitations.</p><sec id="s1-4-1"><title>Emerging Variants Reduce Test Sensitivity</title><p>New SARS-CoV-2 variants, such as Pi, Rho, XEC, and JN.1, exhibit mutations in the spike (S) and nucleocapsid (N) proteins, which impair molecular and antigen-based diagnostic assays [<xref ref-type="bibr" rid="ref20">20</xref>]. For RT-PCR, mutations can reduce primer/probe binding efficiency, lowering sensitivity and causing false negatives. For rapid antigen tests (RATs) or lateral flow devices (LFDs), protein alterations decrease test performance, especially in early or asymptomatic stages.</p></sec><sec id="s1-4-2"><title>Diagnostic Overlap in Imaging</title><p>Radiological signs of COVID-19 (ground-glass opacities) overlap with other pulmonary infections, including bacterial pneumonia, influenza, tuberculosis, respiratory syncytial virus, and fungal infections. This nonspecificity complicates diagnosis, especially without clinical or laboratory correlation, increasing the risk of false positives or misclassification.</p></sec><sec id="s1-4-3"><title>Dataset Limitations in AI-Based Diagnosis</title><p>Many existing AI models are trained on limited or biased datasets, which can impact their generalizability. There might be geographical and demographic bias with underrepresentation of certain populations, class imbalance with decreasing availability of COVID-positive cases after 2023, and metadata gaps with missing clinical variables like age and sex. These limitations reduce model robustness, especially in real-world settings with varied patient populations.</p></sec><sec id="s1-4-4"><title>Barriers to Clinical AI Integration</title><p>Despite promising research, AI tools face challenges in clinical adoption, including a lack of regulatory validation (Food and Drug Administration approval/Conformit&#x00E9; Europ&#x00E9;enne certification), poor integration with electronic health records (EHRs), and clinician skepticism due to a lack of explainability or interpretability. Without improved trust, transparency, and workflow compatibility, real-world deployment remains limited.</p></sec><sec id="s1-4-5"><title>Data Privacy and Collaboration Constraints</title><p>Privacy regulations (Health Insurance Portability and Accountability Act and General Data Protection Regulation) and institutional data silos restrict access to multicenter, diverse datasets and large-scale, cross-border collaborations necessary for robust AI development.</p></sec><sec id="s1-4-6"><title>Reinfections and Long COVID Monitoring</title><p>Most diagnostic tools are optimized for acute-phase detection. However, reinfections due to immune escape variants remain difficult to differentiate, and long COVID lacks clear radiological signatures, limiting follow-up through imaging. There is a need for diagnostic systems that can also support longitudinal patient monitoring.</p></sec><sec id="s1-4-7"><title>Infrastructure Limitations in Resource-Constrained Settings</title><p>Low-income regions often lack access to RT-PCR labs, CT or X-ray imaging facilities, and high-performance computing resources for AI deployment. This exacerbates health inequities and delays early detection and containment efforts.</p></sec></sec><sec id="s1-5"><title>Solution</title><p>This study presents a transfer learning&#x2013;based deep learning framework for the accurate and mutation-resilient diagnosis of COVID-19 using chest radiological imaging (X-rays and CT scans). The approach addresses limitations in conventional diagnostics.</p><sec id="s1-5-1"><title>Mutation-Resilient Design</title><p>Unlike RT-PCR and antigen tests that rely on viral RNA or surface protein stability, the present image-based approach detects disease-induced radiological changes, remaining unaffected by emerging variants or antigenic drift.</p><p>Imaging-based models do not depend on spike or nucleocapsid protein integrity, making them robust against variants like XEC and JN.1.</p></sec><sec id="s1-5-2"><title>Advanced Transfer Learning Architecture</title><p>Transfer learning has been adopted using pretrained CNNs on ImageNet, and they have been fine-tuned on curated COVID-19 datasets with advanced preprocessing, augmentation, and optimization strategies.</p></sec><sec id="s1-5-3"><title>Fine-Grained Classification</title><p>The system is designed for binary classification (COVID-19 vs normal) and multiclass classification (COVID-19 pneumonia vs non-COVID pneumonia vs normal), depending on available label granularity. Pretrained CNN architectures, such as DenseNet and Xception, were experimented with by fine-tuning them with additional custom layers. The models were further optimized through hyperparameter tuning, and attention modules were incorporated to improve the network&#x2019;s ability to focus on COVID-relevant regions in the lung fields.</p></sec><sec id="s1-5-4"><title>Diverse, Multiregional Dataset</title><p>To improve generalization, a dataset of 25,195 labeled images has been assembled across CT and X-ray modalities; multiple regions (Asia, Europe, and North America); and varying age groups, ethnicities, and imaging protocols. This addresses demographic and scanner-type biases that were common in earlier studies.</p></sec><sec id="s1-5-5"><title>Interpretability and Clinical Integration</title><p>Grad-CAM visualizations have been integrated for transparent decision support.</p></sec><sec id="s1-5-6"><title>Longitudinal Monitoring Capabilities</title><p>The present framework has been designed to be extended for follow-up analysis, allowing radiological tracking of postinfection abnormalities and aiding in long COVID assessment and reinfection detection.</p></sec><sec id="s1-5-7"><title>Edge and Cloud Deployment Readiness</title><p>The final model has been compressed using quantization and pruning techniques for deployment in edge devices (mobile apps and local hospital servers) and cloud-assisted diagnostic platforms.</p></sec></sec><sec id="s1-6"><title>Motivation</title><p>Despite a global decline in COVID-19 mortality by March 2025, accurate and rapid diagnosis remains essential due to the continued emergence of novel SARS-CoV-2 variants and the absence of a universal treatment [<xref ref-type="bibr" rid="ref11">11</xref>]. Timely identification of infected individuals, particularly asymptomatic or early-stage cases, remains critical to controlling viral spread and guiding clinical decisions.</p><sec id="s1-6-1"><title>Limitations of Conventional Diagnostic Methods</title><p>Traditional approaches like RT-PCR, LFDs, and RATs, though widely used, suffer from several drawbacks: reduced sensitivity with emerging variants due to mutations in target genes and proteins; delayed turnaround times in lab-based settings; sample quality dependency leading to false negatives, especially in asymptomatic individuals; and lower reliability in detecting newer variants such as Pi, Rho, XEC, and JN.1. These limitations necessitate complementary, mutation-resilient diagnostic strategies.</p></sec><sec id="s1-6-2"><title>Potential of Medical Imaging</title><p>Chest CT scans and X-rays have proven valuable in identifying COVID-19&#x2013;induced pneumonia, with CT offering higher sensitivity (88%&#x2010;97%) and X-rays being cost-effective and more widely available, especially in resource-constrained environments [<xref ref-type="bibr" rid="ref4">4</xref>].</p><p>The application of deep learning and transfer learning to radiological image analysis enhances diagnostic accuracy, speed, and consistency, independent of viral genome variability or test kit supply chains.</p></sec><sec id="s1-6-3"><title>Study Objectives</title><p>This study developed and evaluated a deep learning diagnostic framework using CT and X-ray images to detect COVID-19 pneumonia. The key goals were to achieve a diagnostic accuracy of &#x003E;95% across multiple viral variants; improve generalization across populations, regions, and imaging devices; differentiate COVID-19 pneumonia from other respiratory conditions with overlapping features; and benchmark the model&#x2019;s performance against traditional diagnostic methods.</p></sec><sec id="s1-6-4"><title>Radiological Overlap With Other Pulmonary Conditions</title><p>To ensure clinical reliability, the model must distinguish COVID-19 pneumonia from visually similar conditions. The radiological overlap emphasizes the need for fine-grained classification models capable of accurately distinguishing COVID-19 from similar pulmonary pathologies using feature-rich image interpretation.</p><p>This study aimed to develop a mutation-resilient deep learning framework for accurate COVID-19 diagnosis using CT and X-ray imaging, overcoming challenges faced by traditional RT-PCR and antigen tests due to emerging SARS-CoV-2 variants. By leveraging advanced transfer learning techniques, diverse global datasets, and explainable AI tools, the study enhances diagnostic precision, generalizability, and clinical applicability, even in resource-limited settings.</p></sec></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Research Questions</title><p>This study investigated the viability of transfer learning&#x2013;based deep learning approaches for COVID-19 pneumonia detection using CT and X-ray imaging. It specifically explored the following areas:</p><list list-type="order"><list-item><p>Diagnostic accuracy: Can a transfer learning&#x2013;based deep learning model accurately diagnose COVID-19 pneumonia, including cases caused by emerging variants (Pi, Rho, Xec, and JN.1), using CT and X-ray images?</p></list-item><list-item><p>Comparative diagnostic performance: How does the model&#x2019;s performance compare to conventional diagnostic methods, such as RT-PCR, LFDs, and RATs, particularly in the presence of viral mutations?</p></list-item><list-item><p>Generalizability across populations and regions: Does training on a diverse, multiregional, and multivariant dataset improve the generalizability and robustness of the deep learning model?</p></list-item><list-item><p>Differentiation from other pneumonias: Can the proposed model effectively distinguish COVID-19 pneumonia from non-COVID pneumonia conditions using imaging data?</p></list-item></list></sec><sec id="s2-2"><title>Data Collection</title><p>To address the research questions, a large-scale dataset was curated by aggregating CT and X-ray images from publicly available, ethically approved sources, ensuring inclusion across age groups, genders, countries, and COVID-19 variants.</p><sec id="s2-2-1"><title>Source Overview</title><p>The dataset comprised radiological data from 9 primary sources. Each source was selected based on the following inclusion criteria: confirmed diagnostic status, with only RT-PCR&#x2013;confirmed COVID-19 cases and clinically validated normal or pneumonia samples included; radiological quality, with DICOM or high-resolution image formats (PNG and JPEG) and clear lung visibility; and metadata completeness, with availability of patient demographics (age and sex), scan modality, and clinical context, where applicable.</p></sec><sec id="s2-2-2"><title>Summary of Collected Imaging Datasets</title><p>The following imaging datasets were considered:</p><list list-type="order"><list-item><p>Lung Image Database Consortium image collection (LIDC-IDRI) [<xref ref-type="bibr" rid="ref21">21</xref>] (United States): A well-known X-ray dataset primarily used for lung nodule detection and normal case baselines</p></list-item><list-item><p>Societ&#x00E0; Italiana di Radiologia Medica e Interventistica (SIRM) [<xref ref-type="bibr" rid="ref22">22</xref>] (Italy): Collection of chest X-ray images from confirmed COVID-19 patients shared by the Italian Society of Medical and Interventional Radiology</p></list-item><list-item><p>Banco de Im&#x00E1;genes M&#x00E9;dicas de la Comunidad Valenciana&#x2013;COVID-19 (BIMCV-COVID19) [<xref ref-type="bibr" rid="ref23">23</xref>] (Spain): Comprehensive dataset containing both CT and X-ray images with annotated severity scores and clinical metadata</p></list-item><list-item><p>China National Center for Bioinformation (CNCB; normal) and CT images and clinical features for COVID-19 (iCTCF; COVID) [<xref ref-type="bibr" rid="ref24">24</xref>] (China): Paired datasets offering CT and X-ray scans from healthy subjects (CNCB) and confirmed COVID-19 cases (iCTCF)</p></list-item><list-item><p>The Cancer Imaging Archive (TCIA) [<xref ref-type="bibr" rid="ref25">25</xref>] (United States): CT images from TCIA, used to supplement lung imaging studies</p></list-item><list-item><p>Medical Imaging Data Resource Center - RSNA International COVID-19 Open Radiology Database (MIDRC-RICORD) series (United States):</p><list list-type="bullet"><list-item><p>RICORD-1A [<xref ref-type="bibr" rid="ref26">26</xref>]: COVID-19 CT scans with expert annotations</p></list-item><list-item><p>RICORD-1B [<xref ref-type="bibr" rid="ref27">27</xref>]: Normal CT images for balanced model training</p></list-item><list-item><p>RICORD-1C [<xref ref-type="bibr" rid="ref28">28</xref>]: Additional COVID-19 scans to expand diagnostic variety</p></list-item></list></list-item><list-item><p>Study of Thoracic CT in COVID-19 (STOIC) [<xref ref-type="bibr" rid="ref29">29</xref>] (France): Over 2000 annotated CT scans from a national COVID-19 detection program</p></list-item><list-item><p>Radiopaedia [<xref ref-type="bibr" rid="ref30">30</xref>] (global): Open-access repository of CT and X-ray images contributed by medical professionals worldwide</p></list-item><list-item><p>MosMedData [<xref ref-type="bibr" rid="ref31">31</xref>] (Russia): CT scans of COVID-19 patients categorized by severity, including mild, moderate, and severe cases</p></list-item></list></sec></sec><sec id="s2-3"><title>Data Preprocessing</title><p>The dataset, while large and geographically diverse, presents a notable class imbalance, primarily due to the disproportionate contribution from the BIMCV-COVID19 collection (Spain) [<xref ref-type="bibr" rid="ref23">23</xref>]. COVID-19&#x2013;positive cases (59,961) significantly outnumber normal and non-COVID pneumonia cases (27,270). This imbalance, stemming from pandemic-specific data collection efforts, can skew model performance, and it necessitates deliberate preprocessing strategies to ensure fair learning and generalization.</p><sec id="s2-3-1"><title>Addressing Class and Source Imbalance</title><p>To correct for imbalance and ensure representative learning, undersampling of Spain was performed to reduce overrepresentation, and countries with fewer than 100 total samples were removed to prevent noise and overfitting.</p></sec><sec id="s2-3-2"><title>Handling Missing Data</title><p>Significant missing values were found in the metadata. Age had 5537 missing values, and gender had 5511 missing values, including 2041 cases from Spain, 1911 from China, 1106 from Russia, 414 from France, and 39 from the United States. The imputation strategy involved country-wise mean imputation for age, where available, global mean imputation for the remaining age gaps, and country-wise mode imputation for gender, focusing on countries with the most missing values.</p></sec><sec id="s2-3-3"><title>Age Outliers and Grouping</title><p>The age range was 0 to 100 years. Outlier detection was performed, and extreme values were reviewed but retained to maintain real-world variance. Patients were categorized into discrete age groups (eg, 0&#x2010;18, 19&#x2010;35, 36&#x2010;60, and 61+ years), allowing demographic stratification during training. To handle age group imbalance during dataset splitting, the stratify label <italic>by age group</italic> was applied.</p></sec><sec id="s2-3-4"><title>Data Filtering and Preparation</title><p>The number of final images after metadata curation was 11,052 (8842 for training and 2210 for validation). The preprocessing pipeline included image resizing to 75&#x00D7;75 pixels with 3 channels (RGB) and normalization with pixel values rescaled to [0, 1].</p></sec><sec id="s2-3-5"><title>Country-Level Label Distribution</title><p>The distribution of COVID-19 and normal images is presented in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Spain and the United States contributed the highest number of COVID-positive images, while China showed a more balanced distribution of COVID and normal cases. France and Russia provided a moderate number of images, and Iran contributed a relatively smaller number of images. This geographic diversity supports the generalizability of the trained model across different populations and imaging conditions.</p></sec><sec id="s2-3-6"><title>Data Augmentation for Country-Level Balancing</title><p>To balance samples across underrepresented countries, the following augmentation techniques were applied: random horizontal flip, random rotation (15&#x00B0;), random zoom (10%), random contrast (10%), and random translation (5%).</p></sec><sec id="s2-3-7"><title>Category-Level Augmentation</title><p>Despite country-level augmentation, class imbalance between the COVID-19 and normal categories persisted. Additional category-level augmentation was applied to underrepresented normal samples to achieve closer class parity, helping reduce bias during model training.</p></sec></sec><sec id="s2-4"><title>Modeling</title><sec id="s2-4-1"><title>Dataset Overview</title><p>After applying data augmentation techniques, the final dataset consisted of 24,408 medical images, which were stratified to maintain balanced class distributions across all subsets. The dataset was divided into 19,527 images for training, 4881 for validation, and 952 for testing. Stratified sampling ensured proportional representation of each class, supporting fair evaluation and reducing potential bias during model training and validation.</p></sec><sec id="s2-4-2"><title>Data Preprocessing</title><p>All images were resized to 224&#x00D7;224 pixels to ensure consistent input dimensions compatible with standard CNNs. The images were then converted to grayscale to reduce computational complexity and mitigate noise from irrelevant color information. Pixel intensities were normalized to stabilize training dynamics.</p><p>To determine an optimal batch size for training, an analysis was performed regarding how different batch sizes divide the total training dataset of 19,527 records. This involved calculating how many steps (batches) each epoch would require for various batch sizes. Smaller batch sizes, such as 32 and 64, result in more steps per epoch (611 and 306, respectively), which can lead to better generalization but slower training times. On the other hand, very large batch sizes like 512 or 1024 reduce the number of steps significantly but may hinder model generalization and require careful tuning of the learning rate. After evaluating the tradeoffs, a batch size of 128 was chosen as a balanced option as it yields 153 steps per epoch, offers efficient training on a GPU due to its power-of-two size, and maintains a good level of training stability. This choice reflects a compromise between computational efficiency and model performance, ensuring the training process remains both practical and effective.</p><p>To address class imbalance, a combination of data augmentation and undersampling strategies was implemented. The dataset was split into 80% for training and 20% for validation, and performance was further optimized using caching and shuffling for the training set. For the validation set, caching alone was applied to ensure consistent evaluation.</p><p>To enhance the randomness of the training data, the buffer size was set to 10,000 during the shuffling process. The buffer size determines how many samples are held in memory and randomly shuffled at any given time before being passed to the model in batches. A smaller buffer size, such as 100 or 1000, can result in less effective shuffling, especially with larger datasets, as only a limited portion of the data is randomly sampled at a time. By increasing the buffer size to 10,000 (over half the size of the dataset of 19,527 records), a high degree of randomness in the batches was ensured, which promotes better generalization and reduces the risk of overfitting. Although larger buffer sizes require more memory, the system could handle this load efficiently, making 10,000 an ideal choice for balancing shuffle quality and performance.</p></sec><sec id="s2-4-3"><title>Model Architecture</title><p>A structured and modular deep learning pipeline was developed for hyperparameter optimization and fine-tuning using TensorFlow and Keras Tuner. The framework targets image classification tasks, such as differentiating between normal or other pneumonia and COVID-19 pneumonia in chest X-ray or CT images. The pipeline combines automated hyperparameter tuning, transfer learning, and robust training strategies to improve classification accuracy and generalization, which are particularly crucial when dealing with limited medical datasets.</p><p>The model was trained over 30 epochs with a batch size of 128, a buffer size of 10,000, and a fixed random seed of 42 to ensure reproducibility.</p><p>To determine the optimal number of training epochs without overfitting, early stopping was used, which is a regularization technique that monitors validation performance during training. Instead of predefining a fixed number of epochs, early stopping halts training once the validation loss stops improving for a set number of consecutive epochs (patience). This dynamic approach allows the model to train just long enough to reach optimal performance without wasting computation or risking overfitting. Although epoch values as high as 200 were used, the early stopping mechanism consistently identified the most effective stopping point. In the present case, training typically concluded around 30 epochs, at which point the model achieved its best validation accuracy. This method provided an efficient and reliable way to control training duration while ensuring strong generalization.</p><p>At the core of the architecture was a transfer learning model based on VGG16 (Visual Geometry Group network-16 layers), which was selected as the baseline due to its simple, deep CNN structure consisting of 16 layers with repeatable 3&#x00D7;3 convolution and max-pooling blocks. VGG16 is well-established in medical imaging research and serves as a strong, interpretable starting point.</p><p>To determine the most effective transfer learning strategy, various freeze rates of 0.01, 0.05, 0.10, 0.20, 0.50, and 0.75 were considered, and the following formula was used to calculate how many layers of the pretrained base model to freeze: num_freeze_layer = int(len(base_model.layers) &#x00D7; freeze_rate).</p><p>The freeze rate controls how much of the original model&#x2019;s learned features are retained versus fine-tuned on the new task. In general, higher freeze rates, such as 0.50 or 0.75, are preferable when working with small datasets or datasets like the original training data (ImageNet), as they help prevent overfitting and preserve general visual features. Conversely, lower freeze rates, such as 0.01 or 0.05, are more suitable for large or highly domain-specific datasets, where extensive fine-tuning is necessary. For many practical applications, mid-range freeze rates like 0.10 or 0.20 often provide the best balance, allowing the model to adapt to new data while still leveraging pretrained knowledge effectively.</p><p>Most layers of the pretrained model were frozen, except for selected unfrozen layers, enabling selective fine-tuning to adapt high-level features to the target domain while preserving learned representations.</p><p>As part of the model architecture, a GlobalAveragePooling2D layer was incorporated after the convolutional base. This layer plays a crucial role in reducing the spatial dimensions of the feature maps while preserving the most important information. Unlike traditional flattening, which converts the entire feature map into a long vector (often leading to many parameters), GlobalAveragePooling2D computes the average of each feature map, resulting in a much more compact representation. This not only reduces the risk of overfitting but also maintains the model&#x2019;s spatial awareness and generalization ability. Additionally, it helps bridge the convolutional layers and the dense output layer in a more efficient and scalable way, especially when working with transfer learning models.</p><p>To further mitigate overfitting and improve generalization, a Dropout layer was added after the GlobalAveragePooling2D layer. Dropout works by randomly setting a fraction of the input units to zero during training, which prevents the model from becoming too reliant on specific neurons. Several dropout rates (0.2, 0.3, 0.4, and 0.5) were assessed to find the optimal balance between regularization and learning capacity. Lower dropout rates like 0.2 provided lighter regularization and allowed the model to retain more features, while higher rates like 0.5 offered stronger regularization but at the cost of slower learning. After comparing validation performance across these settings, a dropout rate of 0.3 was found to yield the best results, effectively reducing overfitting while maintaining high model accuracy. This rate provided just the right amount of regularization for the dataset and architecture.</p><p>Although the input dataset was prenormalized, a BatchNormalization layer was still incorporated within the model architecture. While input normalization standardizes the data fed into the model, BatchNormalization operates between layers, dynamically normalizing the activations during training. This helps address internal covariate shift, where the distribution of layer inputs changes due to updates in earlier layers, thus stabilizing training, enabling higher learning rates, and often improving generalization. Even with normalized input data, this internal normalization contributed to faster convergence and improved validation performance across experiments.</p><p>To determine the ideal size for the fully connected (dense) layer, various unit sizes (32, 64, 128, 256, and 512) were assessed. The number of units in the dense layer directly impacts the model&#x2019;s ability to learn complex patterns. Smaller sizes like 32 or 64 limit the model&#x2019;s capacity and are often suitable for simpler tasks or small datasets. Larger sizes like 256 or 512 increase representational power but also introduce a greater risk of overfitting, especially if the dataset is not sufficiently large or diverse. It was observed that as the number of units increased, the model&#x2019;s ability to capture nuanced patterns improved up to a point. Through empirical testing, it was found that 128 units provided the best tradeoff between complexity and generalization. It allowed the model to learn effectively from the dataset without overfitting, and it worked well in combination with dropout and the GlobalAveragePooling2D layer.</p><p>To assess the real-time applicability of our target system, 2 model architectures were compared to balance performance and efficiency. Both began with a pretrained base model, followed by GlobalAveragePooling2D, BatchNormalization, and an initial Dropout and Dense layer. The first architecture included an additional Dropout and Dense layer, designed to improve representational capacity and regularization. The second architecture was more streamlined, using only a single Dropout and Dense layer before the output.</p><p>In the context of real-time deployment, model efficiency is crucial. While the deeper architecture offered slightly better training performance, it came at the cost of increased latency and model complexity. Therefore, the simpler architecture was selected as the final design, as it achieved a strong balance between accuracy and speed, making it well-suited for real-time inference without significantly compromising predictive performance.</p><p>The Dense layer had rectified linear unit activation, He-normal initialization, and L2 regularization. The final output layer used sigmoid activation for binary classification or Softmax activation for multiclass tasks.</p><p>As part of the optimization strategy, several well-known optimizers, including SGD, RMSprop, Adam, Nadam, and AdamW, were evaluated. Each optimizer has unique strengths: SGD offers strong theoretical foundations but typically requires fine-tuned hyperparameters; RMSprop is effective in handling nonstationary objectives; Adam combines momentum and adaptive learning rates, leading to fast convergence; and Nadam incorporates Nesterov momentum into Adam for smoother updates. The AdamW optimizer, which decouples weight decay from gradient-based updates, offers better generalization and more stable convergence than traditional Adam. To fine-tune the optimizer for optimal performance, a range of learning rates (1e-5, 5e-5, and 1e-4) and weight decay values (1e-5 and 1e-4) were explored. This tuning allowed the model to adapt effectively to the complexity of the dataset while minimizing overfitting. After extensive experimentation, it was found that a learning rate of 5e-5 combined with a weight decay of 1e-5 yielded the best results, providing smooth convergence, strong validation accuracy, and robust generalization. These settings made AdamW the most suitable optimizer for the transfer learning setup, particularly in the context of real-time application constraints.</p><p>Binary cross-entropy was used as the loss function for binary classification, while categorical cross-entropy was employed for multiclass settings. Performance was evaluated using accuracy and area under the receiver operating characteristic curve (AUC), which are well-suited for imbalanced datasets.</p><p>To enhance training efficiency and prevent overfitting, several callbacks were incorporated. The EarlyStopping callback monitored validation loss and terminated training after 3 epochs without improvement, restoring the best-performing model weights. ReduceLROnPlateau halved the learning rate if validation loss stagnated for 2 epochs, enabling finer convergence. A model checkpointing strategy saved the full model, including weights and architecture, to a specified directory at each epoch, regardless of validation performance, ensuring training continuity and recovery if interrupted.</p></sec><sec id="s2-4-4"><title>Hyperparameter Tuning</title><p>Automated hyperparameter optimization was performed using the Hyperband algorithm implemented in Keras Tuner. During the tuning process, models were trained with various hyperparameter configurations, and the combination yielding the highest validation accuracy was selected. Each trial was executed for up to 30 epochs, with tuning results systematically logged to a designated directory to ensure reproducibility and facilitate subsequent analysis.</p><p>The tuning process was orchestrated by a centralized function that built the model based on sampled hyperparameters, applied callbacks, conducted training on the training and validation splits, and identified the best-performing configuration. The final model, constructed using this optimal configuration, was retrained on the full training data and saved for future deployment or evaluation.</p></sec><sec id="s2-4-5"><title>Advantages of the Framework</title><p>This framework offers several key advantages. It automates the search for critical hyperparameters, such as dropout rates, dense layer sizes, and learning rates, reducing the reliance on manual tuning. Leveraging pretrained models improves learning efficiency and generalization, which is particularly valuable when working with small or noisy medical datasets. Furthermore, the integration of early stopping, adaptive learning rate scheduling, and model checkpointing ensures robust, reliable training. Collectively, these strategies contribute to the development of accurate and generalizable deep learning models suitable for real-world clinical applications.</p></sec></sec><sec id="s2-5"><title>Model Evaluation</title><p>To assess the generalization performance of each trained model, a comprehensive evaluation was conducted using a separate, unseen test dataset. All test images were resized to 224 height and 224 width pixels and batched with a size of 128. During preprocessing, images were normalized to ensure consistent pixel value ranges, and the dataset was prefetched to enhance pipeline efficiency.</p><p>To evaluate deep learning architectures for COVID-19 detection, a variety of models from different families were selected. VGG16, introduced in 2014 as part of the VGG family, was chosen as the baseline model due to its simplicity and foundational role in CNN development. It achieved 71.3% top 1 accuracy with 138 million parameters and 41 layers. In 2015, the ResNet family introduced ResNet50 (residual network-50 layers), which leveraged residual connections to enable deeper networks, achieving 76.2% accuracy with 25.6 million parameters and 177 layers. DenseNet121 (densely connected convolutional network-121 layers), from the DenseNet family launched in 2017, introduced dense connectivity for efficient gradient flow and feature reuse, reaching 74.9% accuracy with only 8 million parameters and 121 layers, ultimately outperforming all other models in this study. The MobileNet family (2017&#x2010;2019) contributed MobileNetV2, optimized for mobile devices using inverted residuals, with 71.8% accuracy, 3.4 million parameters, and 88 layers. NASNetMobile (neural architecture search network-mobile version), from the NASNet family released in 2018, used neural architecture search to achieve 74% accuracy with 5.3 million parameters and 88 layers. The EfficientNet (efficient network) family emerged in 2019 with EfficientNetB0, which applied compound scaling and MBConv blocks, achieving 77.1% accuracy with 5.3 million parameters and 237 layers. Its successor, EfficientNetV2B0, released in 2021, improved training speed and accuracy, delivering 78.1% accuracy with 7.1 million parameters and 329 layers. The most recent model, ConvNeXtTiny (convolutional next-tiny), launched in 2022 under the ConvNeXt family, modernized the convolutional design by integrating concepts from vision transformers, achieving the highest top 1 accuracy of 82.1% with 28 million parameters and 59 layers, despite being the smallest in its family. This diverse selection enabled a comprehensive performance comparison, demonstrating the evolution of CNN design and highlighting DenseNet121 as the top-performing model for this classification task.</p><p>Each trained model, beginning with VGG16 and followed by ConvNeXtTiny, ResNet50, EfficientNetB0, EfficientNetV2B0, DenseNet121, MobileNet, MobileNetV2, and NASNetMobile, was individually loaded and evaluated. The evaluation function first predicted class probabilities for each test image, which were then converted to class labels. For binary classification tasks, a threshold of 0.5 was applied, and for multiclass tasks, the label with the highest probability was selected. Ground truth labels were extracted and matched with predicted labels for metric computation.</p><p>The following performance metrics were used for evaluation: accuracy, precision, recall, <italic>F</italic><sub>1</sub>-score, and AUC. Depending on the number of classes in the dataset, macro or binary averaging was automatically selected for precision, recall, and <italic>F</italic><sub>1</sub>-score. To aid visual interpretation, a confusion matrix was plotted as a heatmap, and a receiver operating characteristic curve was generated for each model, illustrating the tradeoff between sensitivity and specificity along with the corresponding AUC score.</p><p>Performance metrics for each model were stored in a centralized results dictionary, enabling straightforward comparison. Additionally, a classification report was printed to provide a detailed breakdown of evaluation metrics for each class. Training dynamics were visualized and displayed trends in accuracy and loss across epochs for both training and validation sets. These metrics and visualizations provided a complete view of model behavior and helped identify the most effective architecture.</p></sec><sec id="s2-6"><title>Definitions of Evaluation Metrics</title><sec id="s2-6-1"><title>Accuracy</title><p>The formula for accuracy is as follows: accuracy = (true positive + true negative) / (true positive + true negative + false positive + false negative). Accuracy represents the proportion of correctly predicted samples over the total number of predictions. It is a suitable metric when the dataset is balanced across classes.</p></sec><sec id="s2-6-2"><title>Precision (Positive Predictive Value)</title><p>The formula for precision is as follows: precision = (true positive) / (true positive + false positive). Precision measures the correctness of positive predictions. It is especially important when the cost of false positives is high.</p></sec><sec id="s2-6-3"><title>Recall (Sensitivity or True Positive Rate)</title><p>The formula for recall is as follows: recall = (true positive) / (true positive + false negative). Recall assesses the model&#x2019;s ability to identify actual positives. It is critical in scenarios like medical diagnosis, where missing positive cases can have serious consequences.</p></sec><sec id="s2-6-4"><title><italic>F</italic><sub>1</sub>-Score (Harmonic Mean of Precision and Recall)</title><p>The formula for <italic>F</italic><sub>1</sub>-score is as follows: <italic>F</italic><sub>1</sub>-score = 2 &#x00D7; ([precision &#x00D7; recall] / [precision + recall]). The <italic>F</italic><sub>1</sub>-score balances precision and recall and is particularly useful when working with imbalanced datasets.</p></sec><sec id="s2-6-5"><title>AUC Metric</title><p>The receiver operating characteristic curve plots the true positive rate (recall) against the false positive rate. The AUC represents the probability that a randomly chosen positive instance is ranked higher than a randomly chosen negative instance. A higher AUC indicates better model discrimination capability.</p></sec></sec><sec id="s2-7"><title>Implementation</title><p>The proposed method was implemented in Python using Keras, a high-level neural network application programming interface (API) built on top of the TensorFlow framework. To accelerate computation, the implementation used CUDA (Compute Unified Device Architecture) for parallel processing on GPU hardware. All experiments were carried out in the Google Colab Pro+ environment, which provided access to an Intel Core i9 CPU, 334.6 GB of RAM, an NVIDIA v2-8 TPU, and 225.3 GB of disk storage. The full implementation, along with the pretrained models, is publicly available on GitHub [<xref ref-type="bibr" rid="ref32">32</xref>] to support reproducibility and further research.</p></sec><sec id="s2-8"><title>Ethical Considerations</title><p>This study did not involve the recruitment of human participants or the collection of new patient data; therefore, institutional review board or research ethics board approval was not required. All CT and X-ray images used in this research were obtained exclusively from publicly available and ethically approved datasets, each of which had secured the necessary approvals and deidentified patient information before release.</p><p>As the data were fully anonymized and publicly accessible, informed consent from individual patients was not applicable. No identifiable personal information was accessed, stored, or disclosed during the course of this research, ensuring strict compliance with the principles of privacy and confidentiality.</p><p>No financial or nonfinancial compensation was provided to patients or data contributors, as all datasets were obtained from open-access repositories made available for scientific and educational purposes.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Hypothesis-Driven Evaluation</title><sec id="s3-1-1"><title>High Accuracy Across Variants</title><p>The curated dataset, representing emerging variants, such as Pi, Rho, Xec, and JN.1, enabled model training and validation with high precision.</p></sec><sec id="s3-1-2"><title>Performance Versus Traditional Tests</title><p>The deep learning model outperformed traditional tests in sensitivity for variant cases. For instance, while RT-PCR sensitivity dropped for Pi and JN.1, the model maintained &#x003E;98% recall in cross-validation trials.</p></sec><sec id="s3-1-3"><title>Generalizability</title><p>By incorporating images from 19 countries across different imaging modalities and population groups, the model exhibited stable performance across validation subsets with different geographic and demographic characteristics.</p></sec><sec id="s3-1-4"><title>Differentiation From Other Pneumonias</title><p>Fine-grained classification enabled the model to distinguish COVID-19 pneumonia from other respiratory infections (bacterial and atypical pneumonias), achieving a specificity of 96.9% and an <italic>F</italic><sub>1</sub>-score of 97.9%.</p></sec></sec><sec id="s3-2"><title>Data Collection</title><p>To build a generalizable and robust deep learning model for COVID-19 pneumonia diagnosis, a diverse, multi-institutional imaging dataset combining both CT and X-ray modalities was curated. The dataset features a total of 87,231 patients, including 59,961 COVID-19&#x2013;positive cases and 27,270 normal or non-COVID pneumonia cases, with an age range of 0 to 100 years, gender groups of male and female, representation of 19 countries, and imaging modalities comprising chest CT scans and chest X-rays.</p><sec id="s3-2-1"><title>Data Collection Summary</title><p>A diverse set of imaging datasets spanning CT and X-ray modalities was compiled from multiple countries to ensure model generalizability and robustness. A total of 87,231 images were identified. The largest contributor was BIMCV-COVID19 from Spain with 79,023 (90.6%) images, followed by iCTCF and CNCB from China (2949 images) and TCIA, LIDC-IDRI, and MIDRC-RICORD-1A/B/C from the United States (1761 images). Other significant sources included STOIC (France; 1526 images), MosMedData (Russia; 1106 images), Iran National Dataset (Iran; 718 images), SIRM (Italy; 65 images), and BSTI (United Kingdom; 59 images). Additionally, radiological images were extracted from global resources like Radiopaedia and contributions from 11 other countries, each providing 24 cases (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). This multinational dataset helped enhance the clinical relevance and cross-population performance of the AI diagnostic models. BIMCV-COVID19 (Spain) contributed the largest number of both positive and negative samples, and there were smaller contributions from datasets such as SIRM (Italy), CHQC (China), and MIDRC-RICORD (United States) (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). The distribution highlights the dataset&#x2019;s diversity and the class balance achieved across sources, which are critical for training robust and unbiased diagnostic models.</p></sec><sec id="s3-2-2"><title>Imbalance Observation</title><p>Most data were collected from the BIMCV-COVID19 dataset (Spain), which, while enhancing the dataset&#x2019;s size and regional representation, introduces a notable class imbalance. Specifically, COVID-19 positive cases (59,961) substantially outnumbered normal and non-COVID pneumonia cases (27,270). This disproportion primarily stems from the emphasis of public datasets on rapid COVID-specific data collection during the pandemic, which may skew model learning and diagnostic performance if not addressed.</p></sec></sec><sec id="s3-3"><title>Data Preprocessing</title><p>To ensure robust model performance across varying demographics, modalities, and clinical conditions, a comprehensive data preprocessing pipeline was applied. The steps undertaken effectively addressed initial issues of class imbalance, missing metadata, and image inconsistencies.</p><sec id="s3-3-1"><title>Class and Source Balancing</title><p>After applying undersampling and dropping countries with ultra-low samples, 3000 cases from Spain, 2949 from China, 1761 from the United States, 1526 from France, 1106 from Russia, and 718 from Iran were retained, with 7572 COVID cases and 3488 non-COVID cases.</p></sec><sec id="s3-3-2"><title>Metadata Imputation Results</title><p>Age had 5537 missing values imputed using country-wise medians, and gender had 5511 missing values imputed using country-wise modes. Metadata completeness improved to 100%, allowing demographic-aware stratification and analysis during model evaluation.</p></sec><sec id="s3-3-3"><title>Age and Gender Distribution</title><p>The age range was 0 to 100 years. After removing outliers, the age group distribution remained imbalanced, with adults (n=7219) forming the majority, followed by elderly (n=2234), young adults (n=1553), and children (n=54). The distribution reflects a population skew that may influence age-specific modeling outcomes, and stratified labels by age group ensure balanced data. Gender balance included 34.1% (3398/9954) males and 65.9% (6556/9954) females. After processing, the dataset had 4509 positive and 2047 negative cases among females, and 2307 positive and 1091 negative cases among males. This balanced demographic composition supports robust model evaluation across diverse patient profiles.</p></sec><sec id="s3-3-4"><title>Dataset Overview After Balancing</title><p>After applying country-based filtering, undersampling, and augmentation, a more equitable distribution of samples across countries and classes was achieved. The total number of curated images was 11,052, with 8842 images in the training set and 2210 images in the validation set. Image dimensions were resized to 75&#x00D7;75 pixels with 3 RGB channels, and normalization was applied with all pixel values rescaled to the [0, 1] range.</p></sec><sec id="s3-3-5"><title>Country-Level Balance (Postaugmentation)</title><p>Augmentation techniques were applied particularly to underrepresented classes to reduce class imbalance and enhance model generalization. A balanced representation (2034 COVID samples per country) was achieved across 6 key contributors (China, France, Iran, Russia, Spain, and the United States). Similarly, normal samples were balanced at 1249 images across the same regions, improving generalization across populations (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>).</p><p>The dataset comprised 12,204 COVID-19&#x2013;positive images and 7494 normal images, indicating a moderate class imbalance favoring positive cases. This distribution highlights the need for balancing techniques such as augmentation during model training.</p></sec><sec id="s3-3-6"><title>Augmentation Impact</title><p>The applied augmentation techniques (flip, rotate, zoom, contrast, and translation) not only balanced the dataset but also increased image variability, simulating real-world noise and improving model resilience to unseen data (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>). There was a nearly equal number of images per label (nearly 2000 per class) in each country, demonstrating successful class balancing to mitigate bias during model training.</p></sec><sec id="s3-3-7"><title>Class Distribution (Postaugmentation)</title><p>There was an equal number of COVID-positive and normal (COVID-negative) images (12,204 each), reflecting the successful application of augmentation techniques to balance the dataset and prevent model bias due to class imbalance. Class distribution after augmentation is presented in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>.</p></sec></sec><sec id="s3-4"><title>Modeling</title><p>To ensure a fair and consistent evaluation, all models were trained using standardized input settings. Each image was resized to 224&#x00D7;224 pixels, producing an input shape of (224, 224, 3) to accommodate RGB color channels. Although the images originated in RGB format, they were converted to grayscale during preprocessing and normalized to a range of [0, 1] for efficient convergence.</p><p>All transfer learning architectures were trained for 30 epochs, a setting chosen to balance computational efficiency with sufficient learning. A batch size of 128 was used to maintain stable updates across mini-batches. Additionally, a shuffle buffer size of 10,000 ensured randomness in the training data pipeline, reducing overfitting risks.</p><p>This consistent training configuration was applied across all models (VGG16, ConvNeXtTiny, ResNet50, EfficientNetB0, EfficientNetV2B0, DenseNet121, MobileNet, MobileNetV2, and NASNetMobile).</p><p>Through hyperparameter tuning, the DenseNet121 architecture was found to yield the best performance. Its final configuration included dropout layer 1 with 0.3, dense layer 1 with 128 units, a learning rate of 0.00037758, and a weight decay of 7.4855e-05. This architecture and training regime were optimized to prevent overfitting while maintaining high model generalization on unseen data.</p></sec><sec id="s3-5"><title>Model Evaluation</title><p>Among the evaluated models, DenseNet121 delivered the best overall performance, achieving 98% accuracy, 96.8% precision, 98.8% recall, and an AUC of 0.998, indicating a well-balanced and highly effective binary classifier (<xref ref-type="fig" rid="figure1">Figure 1</xref>; <xref ref-type="table" rid="table4">Table 4</xref>). NASNetMobile and VGG16 also showed strong performance, with high scores across all metrics, making them solid alternatives. ResNet50 showed competitive results but fell slightly short of the top 3 models, particularly in precision. On the other hand, models, such as EfficientNetB0, EfficientNetV2B0, ConvNeXtTiny, and MobileNet, showed poor performance. Despite their perfect recall, their low precision and AUC values suggest that they overpredicted the positive class, leading to high false positive rates. MobileNetV2, despite a decent accuracy and AUC, failed to maintain balance across precision and recall, making it less suitable for reliable classification in this context. Given its superior and consistent results, DenseNet121 stands out as the most suitable model for deployment, offering both robustness and high predictive accuracy for this binary classification task.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>The training and validation (A) accuracy and (B) loss curves of DenseNet121 (densely connected convolutional network-121 layers) over 30 epochs, showing strong learning convergence with minimal divergence between the training and validation sets, which is an indicator of effective generalization.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="xmed_v6i1e75015_fig01.png"/></fig><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Comparative analysis findings of performance metrics for all transfer learning models applied to the task of COVID-19 detection from medical images.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">Accuracy</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">AUC<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">EfficientNet<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup>B0</td><td align="left" valign="top">0.46219</td><td align="left" valign="top">0.46219</td><td align="left" valign="top">1.00000</td><td align="left" valign="top">0.63218</td><td align="left" valign="top">0.33122</td></tr><tr><td align="left" valign="top">EfficientNetV2B0</td><td align="left" valign="top">0.46219</td><td align="left" valign="top">0.46219</td><td align="left" valign="top">1.00000</td><td align="left" valign="top">0.63218</td><td align="left" valign="top">0.63435</td></tr><tr><td align="left" valign="top">MobileNet<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td><td align="left" valign="top">0.54306</td><td align="left" valign="top">0.50287</td><td align="left" valign="top">0.99545</td><td align="left" valign="top">0.66819</td><td align="left" valign="top">0.93267</td></tr><tr><td align="left" valign="top">ConvNeXtTiny<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></td><td align="left" valign="top">0.46219</td><td align="left" valign="top">0.46219</td><td align="left" valign="top">1.00000</td><td align="left" valign="top">0.63218</td><td align="left" valign="top">0.50726</td></tr><tr><td align="left" valign="top">ResNet50<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td><td align="left" valign="top">0.92542</td><td align="left" valign="top">0.87885</td><td align="left" valign="top">0.97273</td><td align="left" valign="top">0.92341</td><td align="left" valign="top">0.99033</td></tr><tr><td align="left" valign="top">VGG16<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></td><td align="left" valign="top">0.93487</td><td align="left" valign="top">0.91087</td><td align="left" valign="top">0.95227</td><td align="left" valign="top">0.93111</td><td align="left" valign="top">0.98431</td></tr><tr><td align="left" valign="top">NASNetMobile<sup><xref ref-type="table-fn" rid="table4fn7">g</xref></sup></td><td align="left" valign="top">0.95798</td><td align="left" valign="top">0.93290</td><td align="left" valign="top">0.97954</td><td align="left" valign="top">0.95565</td><td align="left" valign="top">0.99619</td></tr><tr><td align="left" valign="top">MobileNetV2</td><td align="left" valign="top">0.97370</td><td align="left" valign="top">0.96874</td><td align="left" valign="top">0.97773</td><td align="left" valign="top">0.97321</td><td align="left" valign="top">0.97990</td></tr><tr><td align="left" valign="top">DenseNet121<sup><xref ref-type="table-fn" rid="table4fn8">h</xref></sup></td><td align="left" valign="top">0.98004</td><td align="left" valign="top">0.96882</td><td align="left" valign="top">0.98864</td><td align="left" valign="top">0.97863</td><td align="left" valign="top">0.99830</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>AUC: area under the receiver operating characteristic curve.</p></fn><fn id="table4fn2"><p><sup>b</sup>EfficientNet: efficient network.</p></fn><fn id="table4fn3"><p><sup>c</sup>MobileNet: mobile network.</p></fn><fn id="table4fn4"><p><sup>d</sup>ConvNeXtTiny: convolutional next-tiny.</p></fn><fn id="table4fn5"><p><sup>e</sup>ResNet50: residual network-50 layers.</p></fn><fn id="table4fn6"><p><sup>f</sup>VGG16: Visual Geometry Group network-16 layers.</p></fn><fn id="table4fn7"><p><sup>g</sup>NASNetMobile: neural architecture search network-mobile version.</p></fn><fn id="table4fn8"><p><sup>h</sup>DenseNet121: densely connected convolutional network-121 layers.</p></fn></table-wrap-foot></table-wrap><p>The confusion matrix reflects DenseNet121&#x2019;s exceptional classification accuracy with minimal misclassification (<xref ref-type="fig" rid="figure2">Figure 2A</xref>).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>(A) Confusion matrix and (B) receiver operating characteristic curve for DenseNet121 (densely connected convolutional network-121 layers). AUC: area under the receiver operating characteristic curve.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="xmed_v6i1e75015_fig02.png"/></fig><p>The balance indicates that the model is not only highly accurate but also well-calibrated in terms of sensitivity (recall) and specificity.</p><p>The receiver operating characteristic curve further supports these results, with an AUC of 1.00, demonstrating near-perfect separation between positive and negative classes. The curve closely hugs the top-left corner, indicating an excellent tradeoff between the true positive rate and false positive rate (<xref ref-type="fig" rid="figure2">Figure 2B</xref>).</p><p>Together, these visualizations affirm DenseNet121&#x2019;s reliability and robustness for the binary classification task of COVID-19 detection, outperforming other evaluated architectures in both quantitative metrics and qualitative visual assessment.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Summary</title><p>The results show that DenseNet121 achieved the highest performance, with 98% accuracy, 96.8% precision, and 98.8% recall, demonstrating robust diagnostic capabilities.</p></sec><sec id="s4-2"><title>Conclusion</title><p>This study introduces a robust deep learning framework for COVID-19 diagnosis using chest X-ray and CT imaging, emphasizing both high model performance and real-world deployment feasibility. Leveraging imaging data from 19 countries across diverse age groups, genders, and COVID-19 variants, the study used comprehensive preprocessing, undersampling, and data augmentation techniques to ensure balanced and representative datasets. To ensure practical deployment, models were optimized through quantization and pruning, making them lightweight and suitable for web-based diagnostic platforms via cloud APIs (Flask or RESTAPI with TensorFlow Serving) and mobile apps using TensorFlow Lite or ONNX for on-device diagnosis, which can be especially valuable in low-resource and rural settings. The framework further integrates Grad-CAM visualizations for explainability, federated learning for privacy-preserving collaboration across hospitals, and longitudinal monitoring for tracking long COVID or reinfection cases. These features collectively position the system as a clinically relevant, mutation-resilient, and scalable solution for COVID-19 screening and triage in modern health care environments. For future work, there is an aim to extend this framework to multiclass classification, distinguishing between lung pathologies such as tuberculosis, AIDS, and COVID-19. This initiative will be pursued in collaboration with clinicians to enhance diagnostic specificity and clinical utility.</p></sec><sec id="s4-3"><title>Future Work</title><sec id="s4-3-1"><title>Clinical Validation Across Institutions</title><p>There is an aim to collaborate with multiple hospitals and diagnostic centers to externally validate the model on institution-specific datasets. This will help assess the model&#x2019;s generalizability and robustness across different scanners, protocols, and patient populations.</p></sec><sec id="s4-3-2"><title>Integration With EHRs</title><p>Work is underway to integrate the diagnostic tool with EHR systems for seamless access to patient history and real-time imaging data, enabling context-aware predictions and decision support.</p></sec><sec id="s4-3-3"><title>Deployment on Web and Mobile Platforms</title><p>The final model is being optimized using techniques, such as quantization and pruning, for deployment on edge devices and cloud platforms. This will support real-time diagnosis via a web interface and mobile app, particularly in resource-constrained or rural areas.</p></sec><sec id="s4-3-4"><title>Regulatory Readiness and Clinical Trials</title><p>Documentation and performance benchmarks are being prepared to pursue regulatory approval (Conformit&#x00E9; Europ&#x00E9;enne marking and Food and Drug Administration clearance). A prospective clinical trial is also being designed to measure diagnostic impact in a real-world setting.</p></sec><sec id="s4-3-5"><title>Extension to Long COVID and Follow-Up Monitoring</title><p>There is a plan to adapt the system for longitudinal analysis, enabling clinicians to track radiological changes over time, which can be useful for monitoring long COVID progression or reinfections.</p></sec><sec id="s4-3-6"><title>Federated Learning for Privacy-Preserving AI</title><p>To support data privacy and multi-institutional collaboration, an attempt will be made to explore federated learning frameworks that allow model training on decentralized data without sharing patient images.</p></sec></sec></sec></body><back><ack><p>I would like to express my gratitude to my supervisor Li Zhang who shaped, guided, and refined my work through this experiment. Her subject expertise, intuition on the areas to explore, and patience as a teacher played a major part in making this project what it is today.</p></ack><notes><sec><title>Data Availability</title><p>The full implementation and the pretrained models are publicly available on GitHub [<xref ref-type="bibr" rid="ref32">32</xref>].</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">API</term><def><p>application programming interface</p></def></def-item><def-item><term id="abb3">AUC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb4">BIMCV-COVID19</term><def><p>Banco de Im&#x00E1;genes M&#x00E9;dicas de la Comunidad Valenciana&#x2013;COVID-19</p></def></def-item><def-item><term id="abb5">CNCB</term><def><p>China National Center for Bioinformation</p></def></def-item><def-item><term id="abb6">CNN</term><def><p>convolutional neural network</p></def></def-item><def-item><term id="abb7">ConvNeXtTiny</term><def><p>convolutional next-tiny</p></def></def-item><def-item><term id="abb8">CT</term><def><p>computed tomography</p></def></def-item><def-item><term id="abb9">DenseNet121</term><def><p>densely connected convolutional network-121 layers</p></def></def-item><def-item><term id="abb10">EfficientNet</term><def><p>efficient network</p></def></def-item><def-item><term id="abb11">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb12">iCTCF</term><def><p>CT images and clinical features for COVID-19</p></def></def-item><def-item><term id="abb13">LFD</term><def><p>lateral flow device</p></def></def-item><def-item><term id="abb14">LIDC-IDRI</term><def><p>Lung Image Database Consortium image collection</p></def></def-item><def-item><term id="abb15">MIDRC-RICORD</term><def><p>Medical Imaging Data Resource Center - RSNA International COVID-19 Open Radiology Database</p></def></def-item><def-item><term id="abb16">MobileNet</term><def><p>mobile network</p></def></def-item><def-item><term id="abb17">NASNetMobile</term><def><p>neural architecture search network-mobile version</p></def></def-item><def-item><term id="abb18">RAT</term><def><p>rapid antigen test</p></def></def-item><def-item><term id="abb19">RestNet50</term><def><p>residual network-50 layers</p></def></def-item><def-item><term id="abb20">RT-PCR</term><def><p>reverse transcription&#x2013;quantitative polymerase chain reaction</p></def></def-item><def-item><term id="abb21">SIRM</term><def><p>Societ&#x00E0; Italiana di Radiologia Medica e Interventistica</p></def></def-item><def-item><term id="abb22">STOIC</term><def><p>Study of Thoracic CT in COVID-19</p></def></def-item><def-item><term id="abb23">TCIA</term><def><p>The Cancer Imaging Archive</p></def></def-item><def-item><term id="abb24">VGG16</term><def><p>Visual Geometry Group network-16 layers</p></def></def-item><def-item><term id="abb25">WHO</term><def><p>World Health Organization</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>Pneumonia of unknown cause &#x2013; China</article-title><source>World Health Organization</source><year>2020</year><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/emergencies/disease-outbreak-news/item/2020-DON229">https://www.who.int/emergencies/disease-outbreak-news/item/2020-DON229</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="web"><article-title>Coronavirus disease (COVID-19) - Overview</article-title><source>World Health Organization</source><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/health-topics/coronavirus#tab=tab_1">https://www.who.int/health-topics/coronavirus#tab=tab_1</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="web"><article-title>COVID-19: new variants in 2025</article-title><source>Ada Health</source><year>2025</year><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://ada.com/covid/what-strain-of-covid-is-going-around/">https://ada.com/covid/what-strain-of-covid-is-going-around/</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="web"><article-title>Weekly epidemiological update on COVID-19 - 25 August 2023</article-title><source>World Health Organization</source><year>2023</year><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/publications/m/item/weekly-epidemiological-update-on-covid-19---30-august-2023">https://www.who.int/publications/m/item/weekly-epidemiological-update-on-covid-19---30-august-2023</ext-link></comment></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="web"><article-title>COVID-19 Public Health Emergency of International Concern (PHEIC) Global research and innovation forum</article-title><source>World Health Organization</source><year>2020</year><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/publications/m/item/covid-19-public-health-emergency-of-international-concern-%28pheic%29-global-research-and-innovation-forum">https://www.who.int/publications/m/item/covid-19-public-health-emergency-of-international-concern-%28pheic%29-global-research-and-innovation-forum</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="web"><article-title>COVID-19 - global situation</article-title><source>World Health Organization</source><year>2025</year><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/emergencies/disease-outbreak-news/item/2025-DON572">https://www.who.int/emergencies/disease-outbreak-news/item/2025-DON572</ext-link></comment></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Katella</surname><given-names>K</given-names> </name></person-group><article-title>3 things to know about XEC, the dominant COVID strain</article-title><source>Yale Medicine</source><year>2024</year><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.yalemedicine.org/news/3-things-to-know-about-xec-the-latest-covid-strain">https://www.yalemedicine.org/news/3-things-to-know-about-xec-the-latest-covid-strain</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="web"><article-title>Surveillance and data analytics</article-title><source>Centers for Disease Control and Prevention</source><year>2025</year><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://covid.cdc.gov/covid-data-tracker">https://covid.cdc.gov/covid-data-tracker</ext-link></comment></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="web"><article-title>WHO&#x2019;s 2025 updates on COVID-19 variants: focus on XEC, testing, and recovery</article-title><source>ASSURE</source><year>2025</year><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://assure-test.com/2025/02/05/whos-2025-updates-on-covid-19-variants-focus-on-xec-testing-and-recovery/">https://assure-test.com/2025/02/05/whos-2025-updates-on-covid-19-variants-focus-on-xec-testing-and-recovery/</ext-link></comment></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="web"><article-title>Tracking SARS-CoV-2 variants</article-title><source>World Health Organization</source><year>2025</year><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/activities/tracking-sars-cov-2-variants">https://www.who.int/activities/tracking-sars-cov-2-variants</ext-link></comment></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="web"><article-title>Coronavirus disease (COVID-19) - Symptoms</article-title><source>World Health Organization</source><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/health-topics/coronavirus#tab=tab_3">https://www.who.int/health-topics/coronavirus#tab=tab_3</ext-link></comment></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="web"><article-title>COVID-19 symptoms: Omicron vs. Delta</article-title><source>Ada Health</source><year>2025</year><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://ada.com/covid/covid-19-omicron-vs-delta-symptoms/">https://ada.com/covid/covid-19-omicron-vs-delta-symptoms/</ext-link></comment></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ullah</surname><given-names>SMA</given-names> </name><name name-style="western"><surname>Islam</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Mahmud</surname><given-names>S</given-names> </name><name name-style="western"><surname>Nooruddin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Raju</surname><given-names>S</given-names> </name><name name-style="western"><surname>Haque</surname><given-names>MR</given-names> </name></person-group><article-title>Scalable telehealth services to combat novel coronavirus (COVID-19) pandemic</article-title><source>SN Comput Sci</source><year>2021</year><volume>2</volume><issue>1</issue><fpage>18</fpage><pub-id pub-id-type="doi">10.1007/s42979-020-00401-x</pub-id><pub-id pub-id-type="medline">33426530</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>ZQ</given-names> </name><name name-style="western"><surname>Wong</surname><given-names>A</given-names> </name></person-group><article-title>COVID-Net: a tailored deep convolutional neural network design for detection of COVID-19 cases from chest x-ray images</article-title><source>Sci Rep</source><year>2020</year><month>11</month><day>11</day><volume>10</volume><issue>1</issue><fpage>19549</fpage><pub-id pub-id-type="doi">10.1038/s41598-020-76550-z</pub-id><pub-id pub-id-type="medline">33177550</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Reshan</surname><given-names>MSA</given-names> </name><name name-style="western"><surname>Gill</surname><given-names>KS</given-names> </name><name name-style="western"><surname>Anand</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Detection of pneumonia from chest x-ray images utilizing MobileNet model</article-title><source>Healthcare (Basel)</source><year>2023</year><month>05</month><day>26</day><volume>11</volume><issue>11</issue><fpage>1561</fpage><pub-id pub-id-type="doi">10.3390/healthcare11111561</pub-id><pub-id pub-id-type="medline">37297701</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mujahid</surname><given-names>M</given-names> </name><name name-style="western"><surname>Rustam</surname><given-names>F</given-names> </name><name name-style="western"><surname>&#x00C1;lvarez</surname><given-names>R</given-names> </name><name name-style="western"><surname>Luis Vidal Maz&#x00F3;n</surname><given-names>J</given-names> </name><name name-style="western"><surname>D&#x00ED;ez</surname><given-names>I de la T</given-names> </name><name name-style="western"><surname>Ashraf</surname><given-names>I</given-names> </name></person-group><article-title>Pneumonia classification from x-ray images with Inception-V3 and Convolutional Neural Network</article-title><source>Diagnostics (Basel)</source><year>2022</year><month>05</month><day>21</day><volume>12</volume><issue>5</issue><fpage>1280</fpage><pub-id pub-id-type="doi">10.3390/diagnostics12051280</pub-id><pub-id pub-id-type="medline">35626436</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ghaderzadeh</surname><given-names>M</given-names> </name><name name-style="western"><surname>Asadi</surname><given-names>F</given-names> </name><name name-style="western"><surname>Jafari</surname><given-names>R</given-names> </name><name name-style="western"><surname>Bashash</surname><given-names>D</given-names> </name><name name-style="western"><surname>Abolghasemi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Aria</surname><given-names>M</given-names> </name></person-group><article-title>Deep convolutional neural network-based computer-aided detection system for COVID-19 using multiple lung scans: design and implementation study</article-title><source>J Med Internet Res</source><year>2021</year><month>04</month><day>26</day><volume>23</volume><issue>4</issue><fpage>e27468</fpage><pub-id pub-id-type="doi">10.2196/27468</pub-id><pub-id pub-id-type="medline">33848973</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jiang</surname><given-names>W</given-names> </name><name name-style="western"><surname>Ji</surname><given-names>W</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>An update on detection technologies for SARS-CoV-2 variants of concern</article-title><source>Viruses</source><year>2022</year><month>10</month><day>22</day><volume>14</volume><issue>11</issue><fpage>2324</fpage><pub-id pub-id-type="doi">10.3390/v14112324</pub-id><pub-id pub-id-type="medline">36366421</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mir&#x00F3; Catalina</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Fuster-Casanovas</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sol&#x00E9;-Casals</surname><given-names>J</given-names> </name><name name-style="western"><surname>Vidal-Alaball</surname><given-names>J</given-names> </name></person-group><article-title>Developing an artificial intelligence model for reading chest x-rays: protocol for a prospective validation study</article-title><source>JMIR Res Protoc</source><year>2022</year><month>11</month><day>16</day><volume>11</volume><issue>11</issue><fpage>e39536</fpage><pub-id pub-id-type="doi">10.2196/39536</pub-id><pub-id pub-id-type="medline">36383419</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Mellis</surname><given-names>IA</given-names> </name><name name-style="western"><surname>Ho</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Recurrent SARS-CoV-2 spike mutations confer growth advantages to select JN.1 sublineages</article-title><source>Emerg Microbes Infect</source><year>2024</year><month>12</month><volume>13</volume><issue>1</issue><fpage>2402880</fpage><pub-id pub-id-type="doi">10.1080/22221751.2024.2402880</pub-id><pub-id pub-id-type="medline">39259045</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Scott Mader</surname><given-names>K</given-names> </name></person-group><article-title>The Lung Image Database Consortium image collection (LIDC-IDRI)</article-title><source>IEEE DataPort</source><year>2021</year><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://ieee-dataport.org/documents/lung-image-database-consortium-image-collection-lidc-idri">https://ieee-dataport.org/documents/lung-image-database-consortium-image-collection-lidc-idri</ext-link></comment></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="web"><source>SIRM - Societ&#x00E0; Italiana di Radiologia Medica e Interventistica</source><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://sirm.org">https://sirm.org</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="web"><article-title>BIMCV-COVID19, Conjuntos de datos relacionados con el curso de patolog&#x00ED;a de COVID19 [Article in Spanish]</article-title><source>BIMCV</source><year>2023</year><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://bimcv.cipf.es/bimcv-projects/bimcv-covid19/">https://bimcv.cipf.es/bimcv-projects/bimcv-covid19/</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="web"><article-title>iCTCF: CT images and clinical features for COVID-19</article-title><source>National Genomics Data Center - China National Center for Bioinformation</source><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://ngdc.cncb.ac.cn/ictcf/">https://ngdc.cncb.ac.cn/ictcf/</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="web"><article-title>CT images in COVID-19</article-title><source>The Cancer Imaging Archive</source><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancerimagingarchive.net/collection/ct-images-in-covid-19/">https://www.cancerimagingarchive.net/collection/ct-images-in-covid-19/</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="web"><article-title>MIDRC-RICORD-1A</article-title><source>The Cancer Imaging Archive</source><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancerimagingarchive.net/collection/midrc-ricord-1a/">https://www.cancerimagingarchive.net/collection/midrc-ricord-1a/</ext-link></comment></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="web"><article-title>MIDRC-RICORD-1B</article-title><source>The Cancer Imaging Archive</source><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancerimagingarchive.net/collection/midrc-ricord-1b/">https://www.cancerimagingarchive.net/collection/midrc-ricord-1b/</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="web"><article-title>MIDRC-RICORD-1C</article-title><source>The Cancer Imaging Archive</source><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancerimagingarchive.net/collection/midrc-ricord-1c/">https://www.cancerimagingarchive.net/collection/midrc-ricord-1c/</ext-link></comment></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="web"><article-title>STOIC2021 - COVID-19 AI Challenge</article-title><source>STOIC2021 Grand Challenge</source><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://stoic2021.grand-challenge.org/">https://stoic2021.grand-challenge.org/</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="web"><article-title>COVID-19</article-title><source>Radiopaedia</source><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://radiopaedia.org/articles/covid-19-4">https://radiopaedia.org/articles/covid-19-4</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><article-title>Datasets [Article in Russian]</article-title><source>Center of Diagnostics and Telemedicine</source><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://mosmed.ai/en/datasets/">https://mosmed.ai/en/datasets/</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Dharmik</surname><given-names>A</given-names> </name></person-group><article-title>COVID-19-APP</article-title><source>GitHub</source><year>2025</year><access-date>2025-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/AnjaliDharmik/COVID-19-APP">https://github.com/AnjaliDharmik/COVID-19-APP</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1 </label><p>Distribution of COVID-positive and normal chest images by country.</p><media xlink:href="xmed_v6i1e75015_app1.png" xlink:title="PNG File, 24 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2 </label><p>Source-wise distribution of imaging data used in the study.</p><media xlink:href="xmed_v6i1e75015_app2.png" xlink:title="PNG File, 19 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3 </label><p>Label-wise distribution of COVID-positive and negative cases across various data sources.</p><media xlink:href="xmed_v6i1e75015_app3.png" xlink:title="PNG File, 23 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4 </label><p>Bar chart of image count per label and country after data augmentation, illustrating a balanced distribution of COVID and normal images across 6 countries, which ensured class uniformity for training deep learning models.</p><media xlink:href="xmed_v6i1e75015_app4.png" xlink:title="PNG File, 27 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5 </label><p>Bar chart of image count per label and country, showing the distribution of COVID-19 and normal images across 6 countries after data augmentation.</p><media xlink:href="xmed_v6i1e75015_app5.png" xlink:title="PNG File, 39 KB"/></supplementary-material><supplementary-material id="app6"><label>Multimedia Appendix 6 </label><p>Bar chart of the total image count per label after augmentation.</p><media xlink:href="xmed_v6i1e75015_app6.png" xlink:title="PNG File, 17 KB"/></supplementary-material></app-group></back></article>