<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIRx Med</journal-id><journal-id journal-id-type="publisher-id">xmed</journal-id><journal-id journal-id-type="index">34</journal-id><journal-title>JMIRx Med</journal-title><abbrev-journal-title>JMIRx Med</abbrev-journal-title><issn pub-type="epub">2563-6316</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v6i1e66029</article-id><article-id pub-id-type="doi">10.2196/66029</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Improving Tuberculosis Detection in Chest X-Ray Images Through Transfer Learning and Deep Learning: Comparative Study of Convolutional Neural Network Architectures</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Mirugwe</surname><given-names>Alex</given-names></name><degrees>BSc, MSci</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Tamale</surname><given-names>Lillian</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nyirenda</surname><given-names>Juwa</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>School of Public Health, Makerere University</institution><addr-line>Kawalya Kaggwa Close, Plot 20A</addr-line><addr-line>Kampala</addr-line><country>Uganda</country></aff><aff id="aff2"><institution>Faculty of Science and Technology, Victoria University</institution><addr-line>Kampala</addr-line><country>Uganda</country></aff><aff id="aff3"><institution>Department of Statistical Sciences, University of Cape Town</institution><addr-line>Cape Town</addr-line><country>South Africa</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Amal</surname><given-names>Saeed</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Nanthasamroeng</surname><given-names>Natthapong</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Pitakaso</surname><given-names>Rapeepan</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Alex Mirugwe, BSc, MSci, School of Public Health, Makerere University, Kawalya Kaggwa Close, Plot 20A, Kampala, Uganda, 256 701120534; <email>mirugwealex@gmail.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>1</day><month>7</month><year>2025</year></pub-date><volume>6</volume><elocation-id>e66029</elocation-id><history><date date-type="received"><day>02</day><month>09</month><year>2024</year></date><date date-type="rev-recd"><day>27</day><month>03</month><year>2025</year></date><date date-type="accepted"><day>16</day><month>04</month><year>2025</year></date></history><copyright-statement>&#x00A9; Alex Mirugwe, Lillian Tamale, Juwa Nyirenda. Originally published in JMIRx Med (<ext-link ext-link-type="uri" xlink:href="https://med.jmirx.org">https://med.jmirx.org</ext-link>), 1.7.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIRx Med, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://med.jmirx.org/">https://med.jmirx.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://xmed.jmir.org/2025/1/e66029"/><related-article related-article-type="companion" ext-link-type="doi" xlink:href="10.1101/2024.08.02.24311396v1" xlink:title="Preprint (medRxiv)" xlink:type="simple">https://www.medrxiv.org/content/10.1101/2024.08.02.24311396v1</related-article><related-article related-article-type="companion" ext-link-type="doi" xlink:href="10.2196/77171" xlink:title="Peer-Review Report by Rapeepan Pitakaso (Reviewer AE)" xlink:type="simple">https://med.jmirx.org/2025/1/e 77171</related-article><related-article related-article-type="companion" ext-link-type="doi" xlink:href="10.2196/77174" xlink:title="Peer-Review Report by Natthapong Nanthasamroeng (Reviewer AI)" xlink:type="simple">https://med.jmirx.org/2025/1/e 77174</related-article><related-article related-article-type="companion" ext-link-type="doi" xlink:href="10.2196/77221" xlink:title="Authors' Response to Peer-Review Reports" xlink:type="simple">https://med.jmirx.org/2025/1/e 77221</related-article><abstract><sec><title>Background</title><p>Tuberculosis (TB) remains a significant global health challenge, as current diagnostic methods are often resource-intensive, time-consuming, and inaccessible in many high-burden communities, necessitating more efficient and accurate diagnostic methods to improve early detection and treatment outcomes.</p></sec><sec><title>Objective</title><p>This study aimed to evaluate the performance of 6 convolutional neural network architectures&#x2014;Visual Geometry Group-16 (VGG16), VGG19, Residual Network-50 (ResNet50), ResNet101, ResNet152, and Inception-ResNet-V2&#x2014;in classifying chest x-ray (CXR) images as either normal or TB-positive. The impact of data augmentation on model performance, training times, and parameter counts was also assessed.</p></sec><sec sec-type="methods"><title>Methods</title><p>The dataset of 4200 CXR images, comprising 700 labeled as TB-positive and 3500 as normal cases, was used to train and test the models. Evaluation metrics included accuracy, precision, recall, <italic>F</italic><sub>1</sub>-score, and area under the receiver operating characteristic curve. The computational efficiency of each model was analyzed by comparing training times and parameter counts.</p></sec><sec sec-type="results"><title>Results</title><p>VGG16 outperformed the other architectures, achieving an accuracy of 99.4%, precision of 97.9%, recall of 98.6%, <italic>F</italic><sub>1</sub>-score of 98.3%, and area under the receiver operating characteristic curve of 98.25%. This superior performance is significant because it demonstrates that a simpler model can deliver exceptional diagnostic accuracy while requiring fewer computational resources. Surprisingly, data augmentation did not improve performance, suggesting that the original dataset&#x2019;s diversity was sufficient. Models with large numbers of parameters, such as ResNet152 and Inception-ResNet-V2, required longer training times without yielding proportionally better performance.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Simpler models like VGG16 offer a favorable balance between diagnostic accuracy and computational efficiency for TB detection in CXR images. These findings highlight the need to tailor model selection to task-specific requirements, providing valuable insights for future research and clinical implementations in medical image classification.</p></sec></abstract><kwd-group><kwd>tuberculosis detection</kwd><kwd>tuberculosis</kwd><kwd>TB</kwd><kwd>chest x-ray classification</kwd><kwd>diagnostic imaging</kwd><kwd>radiology</kwd><kwd>medical imaging</kwd><kwd>convolutional neural networks</kwd><kwd>data augmentation</kwd><kwd>deep learning</kwd><kwd>early warning</kwd><kwd>early detection</kwd><kwd>comparative study</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Tuberculosis (TB) remains one of the leading infectious diseases worldwide, affecting an estimated one-third to one-fourth of the global population with the bacillus <italic>Mycobacterium tuberculosis</italic>, the causative agent of TB [<xref ref-type="bibr" rid="ref1">1</xref>]. In 2019, it was estimated that over 10 million individuals globally contracted TB; yet, only 71% were detected, diagnosed, and reported through various countries&#x2019; national TB programs, leaving approximately 29% of cases unreported [<xref ref-type="bibr" rid="ref2">2</xref>]. According to the World Health Organization&#x2019;s (WHO&#x2019;s) 2023 TB report, TB was identified as the second most common cause of death among infectious diseases [<xref ref-type="bibr" rid="ref3">3</xref>]. Furthermore, the global incidence rate of TB remains alarmingly high at approximately 133 new cases per 100,000 people annually. This situation underscores the need for prompt, effective, and affordable screening and treatment strategies to meet the WHO&#x2019;s ambitious goals of reducing TB incidence by 80%, decreasing TB mortality by 90%, and eliminating catastrophic financial burdens on families affected by TB by 2030 [<xref ref-type="bibr" rid="ref4">4</xref>].</p><p>The WHO advised member countries to proactively conduct TB screening and detection, especially within the high-risk groups, taking into account their unique epidemic scenarios and financial levels [<xref ref-type="bibr" rid="ref5">5</xref>]. While bacteriological tests, including sputum cultures, sputum smears, and molecular diagnostics, are considered the gold standard for identifying active TB cases, their applicability on a large scale, particularly among high-risk populations, is not feasible [<xref ref-type="bibr" rid="ref6">6</xref>]. This limitation is due to the methods being resource-intensive, logistically challenging, and associated with prolonged turnaround times [<xref ref-type="bibr" rid="ref7">7</xref>]. As a result, chest radiography has become the most prevalent method for early TB detection [<xref ref-type="bibr" rid="ref8">8</xref>]. However, in countries with limited resources, which also bear the highest TB burden, the availability of chest radiography screenings remains inadequate, primarily due to a shortage of radiologists [<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>In recent years, significant advancements have been made in leveraging artificial intelligence (AI), particularly through machine learning and deep learning techniques, for analyzing chest x-ray (CXR) images to differentiate between TB-positive and TB-negative images [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref15">15</xref>]. This innovation has enabled individuals without radiology expertise to conduct TB screening tests, presenting a significant shift in diagnostic approaches. These technologies have shown promising results, to the extent of outperforming radiologists in the interpretation of CXR images [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Despite this progress, the adoption of AI-based TB detection in low-income countries faces limitations, including a lack of computational resources, inconsistent data quality, and the need for models tailored to diverse clinical and demographic contexts. Addressing these challenges is critical to ensuring the scalability and utility of AI-driven diagnostic tools in these settings.</p><p>This research investigates the effectiveness of different convolutional neural network (CNN) architectures in classifying TB in CXR images. We compare and evaluate the performance of popular CNN models, including Residual Network (ResNet), Inception, and Visual Geometry Group (VGG), and examine the impact of different hyperparameters on classification accuracy. The choice of these architectures is motivated by gaps in existing literature, where limited studies compare the performance of advanced CNN models on larger, diverse datasets. Additionally, we explore the impact of transfer learning and data augmentation techniques, providing insights into their role in optimizing model performance.</p><p>To the best of our knowledge, this study is the first to use a larger and more diverse dataset and conduct a comprehensive comparison of the latest CNN architectures, including ResNet101, ResNet152, and Inception-V2, assessed across different parameters. The research aims to address the following questions: (1) How does the choice of CNN architecture affect the classification performance? (2) What is the optimal hyperparameter configuration for each CNN architecture? (3) Can transfer learning be leveraged to improve classification accuracy? (4) How does incorporating data augmentation techniques impact the model&#x2019;s performance compared to training solely on real images?</p><p>The rest of the paper is organized as follows. In the Related Work section, we present the literature review, which provides an overview of the current state of research in the field. This is followed by the Methods section, where we describe the deep learning models used in this research along with the techniques for improving training time, such as transfer learning. We also describe the data and analysis procedures used in our study, such as data augmentation to mitigate against imbalance. Next, we present the results of our analysis, including any findings. Finally, we discuss the implications of our results, conclude with a summary of our main findings, and suggest areas for future research.</p></sec><sec id="s1-2"><title>Related Work</title><p>Research in the field of medical imaging, particularly in automating the screening and identification of TB from CXR images, has progressed significantly. Initial investigations explored traditional machine learning techniques, including support vector machines [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>], decision trees [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>], random forests [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>], and extreme gradient boosting [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>], among others. However, recent advancements have shifted focus toward deep learning methods, such as CNNs, which have demonstrated promising results in image classification comparable to those of radiologists [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. Below, we review some of the recent studies that have used deep learning approaches for detecting TB in CXR images.</p><p>Hooda et al [<xref ref-type="bibr" rid="ref13">13</xref>] proposed a 19-layer CNN architecture for detecting TB, consisting of 7 convolutional layers, 7 rectified linear unit (ReLU) layers, 3 fully connected layers, and 2 dropouts layers. The model was trained on a dataset of 800 CXR images, each resized to 224&#x00D7;224 pixels. Using the Adam optimizer, the study achieved notable results, with an overall accuracy of 94.73% and a validation accuracy of 82.09%. Although these results are impressive, the authors identified potential areas for further improvements. They suggested investigating the impacts of data augmentation and transfer learning on the model&#x2019;s performance, highlighting avenues for future research enhancements and potential increases in accuracy.</p><p>Ojasvi et al [<xref ref-type="bibr" rid="ref25">25</xref>] developed a classification algorithm for CXR images of potential patients with TB, aiming to improve upon existing models [<xref ref-type="bibr" rid="ref26">26</xref>]. To mitigate against dataset imbalances and improve model reliability, they combined the NIH Chest X-ray Dataset, China-Shenzhen Chest X-ray Database, and Montgomery County Chest X-ray Database to train and fine-tune their model. By implementing coarse-to-fine transfer learning and extensive data augmentation techniques, they achieved a remarkable accuracy of 94.89% compared to the accuracy of 89.6% achieved by Cao et al [<xref ref-type="bibr" rid="ref26">26</xref>]. However, the study acknowledges the challenge of maintaining equivalent precision across CXR images obtained in varied settings, as the model was specifically trained for the Chinese dataset.</p><p>Panicker et al [<xref ref-type="bibr" rid="ref27">27</xref>] introduced a novel 2-stage detection method for TB bacilli, using image binarization and CNN classification to analyze microscopic sputum smear images. The method was evaluated on a diverse dataset of 22 images, and the model demonstrated high effectiveness, achieving a recall rate of 97.13%, a precision of 78.4%, and an <italic>F</italic><sub>1</sub>-score of 86.76%. However, the study noted that the model&#x2019;s ability to accurately detect overlapping bacilli was limited. In the same year, Stirenko et al [<xref ref-type="bibr" rid="ref28">28</xref>] explored the application of lung segmentation in CXR images and data augmentation to enhance TB detection from CXR images. Their study highlights the critical role of preprocessing, including lung segmentation and data augmentation, in addressing overfitting issues and improving the effectiveness of computer-aided diagnosis systems in TB identification, particularly when working with limited datasets.</p><p>The study by Kazemzadeh et al [<xref ref-type="bibr" rid="ref15">15</xref>] developed a deep learning algorithm for detecting active pulmonary TB from CXR images. The algorithm was trained and validated on a dataset comprising 165,754 images from 22,284 patients from 10 different countries. The algorithm&#x2019;s performance was compared to that of 14 radiologists on datasets from 4 countries, including a cohort from a South African mining population. It achieved an area under the receiver operating characteristic curve (AUC-ROC) of 0.89, with superior sensitivity (88% vs 75%; <italic>P</italic>=.05) and comparable specificity (79% vs 84%) to radiologists, demonstrating its potential for TB screening in resource-limited settings. Another study by Nijiati et al [<xref ref-type="bibr" rid="ref29">29</xref>] used a 3D ResNet-50 CNN architecture to differentiate active from nonactive pulmonary TB using computed tomography images. This study, similar to that of Kazemzadeh et al [<xref ref-type="bibr" rid="ref15">15</xref>], reported high diagnostic accuracy and efficiency, outperforming conventional radiological methods in terms of speed and precision.</p><p>In their 2019 study, Meraj et al [<xref ref-type="bibr" rid="ref30">30</xref>] used CNN architectures such as VGG16, VGG19, ResNet50, and GoogLeNet to automate the detection of TB manifestations in CXRs using 2 public TB image datasets [<xref ref-type="bibr" rid="ref31">31</xref>]. Their findings showed that the VGG16 model outperformed other architectures in terms of accuracy and AUC-ROC. However, the study was limited by its reliance on small and unbalanced datasets, raising questions about the generalizability of the results. In contrast, our research builds upon and extends the work of Meraj et al [<xref ref-type="bibr" rid="ref30">30</xref>] by incorporating a larger and more diverse dataset. We also explore the diagnostic capabilities of more advanced CNN architectures, including ResNet101, ResNet152, and Inception-V2, to assess their effectiveness in TB detection. This approach aims to provide a more comprehensive understanding of how recent deep learning advancements can be leveraged for more accurate TB diagnosis in varied clinical settings. The Methods section details the methodological framework to achieve these objectives.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><p>In this section, we provide a comprehensive overview of the methodologies used in our study, including the dataset and preprocessing, data normalization, data augmentation, the application of transfer learning methods, the architecture of CNNs used, and the evaluation metrics adopted to assess the performance of the models.</p><sec id="s2-1"><title>Implementation Overview</title><p>The implementation framework illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref> starts with the acquisition of a well-defined dataset, followed by comprehensive data preprocessing, which includes data augmentation, resizing, normalization, and partitioning into training, validation, and test sets. Subsequently, we embark on the development of various deep learning models. These models undergo extensive training and evaluation against different hyperparameters and evaluation metrics to accurately predict and classify CXR images into positive or negative cases of TB.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>The implementation flow of the deep learning classification methodology. ResNet: Residual Network; VGG: Visual Geometry Group.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="xmed_v6i1e66029_fig01.png"/></fig></sec><sec id="s2-2"><title>Dataset</title><p>The dataset used in this research comprises 4200 CXR images sourced from a public Kaggle data repository. The dataset was compiled through a collaborative effort between researchers from Qatar University (Doha, Qatar) and the University of Dhaka (Bangladesh) and collaborators from Malaysia. They worked closely with medical professionals from the Hamad Medical Corporation (Doha, Qatar) and various health care institutions in Bangladesh. The dataset consists of 700 CXR images indicative of TB and 3500 CXR images classified as normal, with all images having a resolution of 512&#x00D7;512 pixels [<xref ref-type="bibr" rid="ref32">32</xref>]. This composition provides a substantial foundation for evaluating the effectiveness of CNN models in the detection of TB from CXR images. <xref ref-type="fig" rid="figure2">Figure 2</xref> presents some of the images from the dataset.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>The chest x-ray sample images. (<bold>A</bold>) Tuberculosis-negative and (<bold>B</bold>) tuberculosis-positive.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="xmed_v6i1e66029_fig02.png"/></fig></sec><sec id="s2-3"><title>Preprocessing</title><p>To optimize the performance and efficiency of our models, we implemented key preprocessing techniques, specifically data normalization and augmentation, before training the models.</p><sec id="s2-3-1"><title>Data Normalization</title><p>In the preprocessing stage of image analysis, normalization is a critical step to standardize the input data, facilitating the model&#x2019;s learning process. This study applies normalization to CXR images, which initially possess pixel intensity values in the range of 0 to 255, common for grayscale images [<xref ref-type="bibr" rid="ref33">33</xref>]. The goal of normalization is to adjust these intensity values to a standardized scale that improves computational efficiency and model convergence during training. The normalization process is mathematically represented as follows:</p><disp-formula id="E1"><label>(1)</label><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msup><mml:mi>I</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>I</mml:mi><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mtext>min</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mtext>max</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mtext>min</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <italic>I</italic> represents the original pixel intensity of the image, <italic>I</italic><sub>min</sub> and <italic>I</italic><sub>max</sub> are the minimum and maximum possible intensity values in the original image, respectively, and <italic>I</italic> is the normalized pixel intensity.</p><p>For grayscale images, <italic>I</italic><sub>min</sub>=0 and <italic>I</italic><sub>max</sub>=255. This equation effectively rescales the pixel intensity values to the range (0-1), making the input data more suitable for processing by the neural network layers. This normalization technique is advantageous because it ensures that each input parameter (pixel, in this case) contributes equally to the analysis, preventing features with initially larger ranges from dominating the learning process [<xref ref-type="bibr" rid="ref34">34</xref>]. It also helps to stabilize the gradient descent optimization algorithm by maintaining a consistent scale for all gradients [<xref ref-type="bibr" rid="ref35">35</xref>]. Previous studies have shown that normalization significantly improves convergence rates and ensures model stability, particularly in image classification tasks involving deep learning [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>].</p></sec><sec id="s2-3-2"><title>Data Augmentation</title><p>Data augmentation represents a powerful regularization strategy designed to artificially increase the dataset through label-preserving transformations, thereby incorporating more invariant examples into the training set [<xref ref-type="bibr" rid="ref36">36</xref>]. This approach, characterized by its computational efficiency, has been previously used to reduce overfitting when training CNNs, such as in the ImageNet Large-Scale Visual Recognition Challenge (ILSVRC), where it contributed to achieving state-of-the-art results [<xref ref-type="bibr" rid="ref37">37</xref>]. This method enhances the robustness and generalizability of deep learning models by exposing them to a wider array of variations, simulating real-world variability.</p><p>In our study, to address the imbalance between TB-positive and TB-negative images and to introduce different variations, we randomly augmented 210 (30%) TB-positive images and 175 (5%) TB-negative images. The data augmentation techniques applied included random rotation within a range of 0 to 60 degrees, random width and height shifts of up to 0.2 times the image size, and random zooming of up to 0.2 times the original size, alongside horizontal and vertical flipping. To manage the newly created pixels from such transformations, a &#x201C;fill mode&#x201D; strategy was used, ensuring integrity and consistency in the augmented images. These augmentations were performed using Keras&#x2019;s ImageDataGenerator, a comprehensive data augmentation suite [<xref ref-type="bibr" rid="ref38">38</xref>].</p><p>While data augmentation techniques are widely adopted in deep learning research, our implementation aligns with prior studies that highlight their utility in addressing dataset imbalance and improving model generalization in medical imaging tasks [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. Additionally, the augmentation strategy in this study was tailored to reflect the variability commonly observed in real-world CXR data, enhancing the robustness of our models. <xref ref-type="fig" rid="figure3">Figure 3</xref> shows a sample of real images and their corresponding augmented outputs.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Sample of real and corresponding augmented images.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="xmed_v6i1e66029_fig03.png"/></fig></sec></sec><sec id="s2-4"><title>Transfer Learning</title><p>Transfer learning is a machine learning technique where a model developed for a specific task is repurposed as the starting point for a model on a second, related task [<xref ref-type="bibr" rid="ref39">39</xref>]. This technique leverages the knowledge gained during the initial training phase in one domain to enhance learning in another potentially unrelated domain. It operates under the principle that information learned in one context can be exploited to accelerate or improve the optimization process in another, essentially allowing for the transfer of learned features and patterns across different but related problems [<xref ref-type="bibr" rid="ref39">39</xref>].</p><p>In this study, we propose an implementation that capitalizes on the transfer learning paradigm by using pretrained models such as Inception-V3, ResNet (50, 101, and 152), and VGG (16 and 19), which were initially trained on the ImageNet dataset [<xref ref-type="bibr" rid="ref37">37</xref>]. This adaptation involves fine-tuning and customizing the models&#x2019; last layers to suit our classification task, effectively tailoring the robust, prelearned representations of the ImageNet dataset to recognize and interpret the specific patterns and anomalies associated with TB in CXR images.</p><p>We opted for transfer learning over training models from scratch due to its significant advantages, particularly in the context of medical imaging. Training deep learning models from scratch requires large datasets, extensive computational resources, and longer training times. These requirements often pose challenges in health care&#x2013;related research, especially when working with relatively small or domain-specific datasets like CXRs. Transfer learning allows us to leverage the rich feature representations of pretrained models while reducing training time and computational demands. Furthermore, studies have shown that transfer learning enhances model performance in medical imaging tasks by effectively repurposing features learned from general image datasets like ImageNet to domain-specific tasks [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref39">39</xref>].</p></sec><sec id="s2-5"><title>CNN Architectures</title><p>In the next subsections, we provide a brief description of the VGG and ResNet families of CNN architectures as well as the Inception ResNet architecture that is considered in this study.</p><sec id="s2-5-1"><title>VGGNet</title><p>Introduced by Simonyan and Zisserman from the University of Oxford&#x2019;s Visual Geometry Group in 2014, the VGGNet architecture marked a significant milestone in the field of deep learning [<xref ref-type="bibr" rid="ref40">40</xref>]. Known for its outstanding performance in the ILSVRC of that year, VGGNet is characterized by its use of 3&#x00D7;3 filters in all convolutional layers, simulating the effects of larger receptive fields. This architecture is available in 2 variants, VGG16 and VGG19, differing in depth and the number of layers, with VGG19 being the deeper model.</p><p>In our research, we used both the VGG16 and VGG19 architectures to train models on datasets consisting of solely real CXR images and a combination of augmented and real images. This approach aimed to assess the impact of incorporating augmented images on the performance of these 2 architectures. Images were resized to 256&#x00D7;256 pixels before being input into the networks. We extended the architectures by adding a flattening layer, followed by a dense layer of 512 neurons with a ReLU activation function and a dropout layer with a dropout rate of 0.2 to mitigate overfitting. A softmax activation function was used in the output layer for binary classification. We used the Adam optimizer with the binary cross-entropy loss function for optimization. The training was conducted over 15 epochs with a batch size of 32 for both models. This rigorous approach ensured that both architectures could classify between TB-positive and TB-negative CXR images accurately.</p></sec><sec id="s2-5-2"><title>ResNet</title><p>He et al [<xref ref-type="bibr" rid="ref41">41</xref>] introduced the deep residual network (ResNet) architecture in their 2016 seminal paper. This architecture greatly improved the performance of deep neural networks and went on to win the Common Objects in Context object detection challenge and the 2015 ILSVRC. To date, several variants of the ResNet architecture exist, including ResNet50, ResNet101, and ResNet152, which vary in depth and number of layers. ResNet architectures are very deep models [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. The core idea behind ResNet is the use of residual connections, also known as shortcuts, which bypass 1 or more layers. By resolving the vanishing gradient issue, these shortcuts maintain the gradient flow across the network and facilitate the training of much deeper networks [<xref ref-type="bibr" rid="ref41">41</xref>].</p><p>The CXR images in this study were classified using the ResNet50, ResNet101, and ResNet152 architectures. We added 3 more layers to the ResNet50 model, 2, each with 256 units and 1 with 512 units, using batch normalization and ReLU activation in each layer. To reduce overfitting, dropout layers were added with dropout rates of 0.3, 0.25, and 0.2, respectively. The binary cross-entropy loss function was used to compile the model, while the Adam optimizer was used to optimize the model at a learning rate of 0.001. Two units with a softmax activation function made up the output layer, which classified the images as either TB-positive or TB-negative. Training for this model involved 16 batch sizes and 100 epochs.</p><p>ResNet101 was trained using the same settings as ResNet50, as preliminary training showed that the same parameter values used for ResNet50 also yielded optimal results for the ResNet101 architecture. For ResNet152, a selective fine-tuning approach was adopted, where only the last 10 layers of the network were trainable, enhancing the model&#x2019;s focus on more feature-specific adjustments in the later stages of the network. This model shared the augmentation layers of ResNet50 but was trained for only 50 epochs, incorporating a learning rate scheduler, ReduceLROnPlateau, which adjusted the rate based on the validation loss with a factor of 0.1, patience of 5, and a minimum learning rate of 1&#x00D7;10<sup>&#x2212;6</sup>, thereby optimizing the training dynamics. The details of the models&#x2019; configuration are shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Training hyperparameters of ResNet<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> models.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Hyperparameter</td><td align="left" valign="bottom">ResNet50</td><td align="left" valign="bottom">ResNet101</td><td align="left" valign="bottom">ResNet152</td></tr></thead><tbody><tr><td align="left" valign="top">Layers, n</td><td align="left" valign="top">53 (50 base +3 extra)</td><td align="left" valign="top">104 (101 base +3 extra)</td><td align="left" valign="top">155 (152 base +3 extra)</td></tr><tr><td align="left" valign="top">Units per layer</td><td align="left" valign="top">256, 256, 512</td><td align="left" valign="top">256, 256, 512</td><td align="left" valign="top">256, 256, 512</td></tr><tr><td align="left" valign="top">Activation</td><td align="left" valign="top">ReLU<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="top">ReLU</td><td align="left" valign="top">ReLU</td></tr><tr><td align="left" valign="top">Batch normalization</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td></tr><tr><td align="left" valign="top">Dropout rate</td><td align="left" valign="top">0.3, 0.25, 0.2</td><td align="left" valign="top">0.3, 0.25, 0.2</td><td align="left" valign="top">0.3, 0.25, 0.2</td></tr><tr><td align="left" valign="top">Optimizer</td><td align="left" valign="top">Adam</td><td align="left" valign="top">Adam</td><td align="left" valign="top">Adam</td></tr><tr><td align="left" valign="top">Learning rate</td><td align="left" valign="top">0.001</td><td align="left" valign="top">0.001</td><td align="left" valign="top">Variable (ReduceLROnPlateau)</td></tr><tr><td align="left" valign="top">Loss function</td><td align="left" valign="top">Binary cross-entropy</td><td align="left" valign="top">Binary cross-entropy</td><td align="left" valign="top">Binary cross-entropy</td></tr><tr><td align="left" valign="top">Training epochs</td><td align="left" valign="top">100</td><td align="left" valign="top">100</td><td align="left" valign="top">50</td></tr><tr><td align="left" valign="top">Batch size</td><td align="left" valign="top">16</td><td align="left" valign="top">16</td><td align="left" valign="top">16</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>ResNet: Residual Network.</p></fn><fn id="table1fn2"><p><sup>b</sup>ReLU: rectified linear unit.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-5-3"><title>Inception-ResNet</title><p>The Inception networks, introduced by Szegedy et al [<xref ref-type="bibr" rid="ref43">43</xref>], have greatly advanced the field of CNN, as they have achieved state-of-the-art performance in a number of computer vision problems [<xref ref-type="bibr" rid="ref43">43</xref>-<xref ref-type="bibr" rid="ref45">45</xref>]. The original Inception-V1, also known as GoogLeNet, was first introduced in 2014 and won the ILSVRC of that year. The architecture introduced a novel approach of using multiple convolutional filter sizes in parallel, allowing the network to capture various spatial features of different scales with improved use of computing resources [<xref ref-type="bibr" rid="ref43">43</xref>].</p><p>In this study, we used Inception-ResNet-V2 architecture, a hybrid model that combines the benefits of both the Inception and residual networks. This hybrid approach enables the architecture to learn more complex features with improved training stability and faster convergence [<xref ref-type="bibr" rid="ref43">43</xref>]. The Inception-ResNet-V2 also leverages residual connections to skip certain layers during training, which helps it improve gradient flow, accelerate training times, and reduce the likelihood of vanishing gradient problems in deep networks [<xref ref-type="bibr" rid="ref46">46</xref>]. We selected Inception-ResNet-V2 due to its demonstrated state-of-the-art results in several medical imaging tasks [<xref ref-type="bibr" rid="ref45">45</xref>].</p><p>For our implementation, the Inception-ResNet-V2 architecture was initialized with weights pretrained on the ImageNet dataset. Similar to our approach with the ResNet152 model, all layers except the last 10 were frozen to retain the pretrained features from ImageNet. The last 10 layers were set to be trainable, enabling the model to learn specific features from the CXR images. We added 3 new layers: 2 with 256 units each and 1 with 512 units, all using ReLU activations and batch normalization. Each of these layers was followed by dropout layers with rates of 0.4, 0.35, and 0.3, respectively, to introduce nonlinearity and reduce overfitting. The final output layer consisted of 2 units with a softmax activation function for binary classification. The model was then compiled using binary cross-entropy as the loss function and the Adam optimizer with a learning rate of 0.0001. Training was conducted for 50 epochs with a batch size of 16.</p><p>The parameters used in the training of all these CNN architectures, including dropout rates, learning rates, batch sizes, and the number of epochs, were determined through a rigorous iterative process of experimentation. This approach involved fine-tuning each parameter to optimize model performance while avoiding overfitting. The configurations presented reflect the parameter values that consistently yielded good performance across the different architectures.</p></sec></sec><sec id="s2-6"><title>Evaluation Metrics</title><p>The performance of the CNN architectures in classifying CXR images into TB-positive and TB-negative categories was assessed using several standard performance metrics, including accuracy, precision, recall, <italic>F</italic><sub>1</sub>-score, and the AUC-ROC. Each metric provides unique insights into the model&#x2019;s classification abilities, considering both the true and false predictions.</p><sec id="s2-6-1"><title>Accuracy</title><p>This metric measures the proportion of true positive (TP) and true negative (TN) results among the total number of cases examined:</p><disp-formula id="E2"><label>(2)</label><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mtext>Accuracy</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">N</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">N</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">N</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where TP is the number of TB-positive images that are correctly identified as TB-positive by the model, TN is the number of TB-negative images that are correctly identified as TB-negative by the model, FP (false positives) is the number of TB-negative images that are incorrectly identified as TB-positive by the model, and FN (false negatives) is the number of TB-positive images that are incorrectly identified as TB-negative by the model.</p></sec><sec id="s2-6-2"><title>Precision</title><p>Also known as positive predictive value, precision is the ratio of correctly identified TB cases to all cases that were diagnosed as TB by the model. It measures the model&#x2019;s accuracy in diagnosing a patient with TB when the model predicts the disease. High precision indicates a low rate of false TB diagnoses. Mathematically, it is defined as:</p><disp-formula id="E3"><label>(3)</label><mml:math id="eqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula></sec><sec id="s2-6-3"><title>Recall</title><p>Recall, or sensitivity, is especially critical in medical diagnostics, as it quantifies the model&#x2019;s ability to correctly identify all actual TB cases. It represents the proportion of actual TB cases that were correctly identified by the model and aims to minimize the risk of missing a true TB case. It is computed as:</p><disp-formula id="E4"><label>(4)</label><mml:math id="eqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mfrac><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">N</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula></sec><sec id="s2-6-4"><title><italic>F</italic><sub>1</sub>-Score</title><p>The <italic>F</italic><sub>1</sub>-score is the harmonic mean of precision and recall, providing a single measure that balances both the FP and FN. In TB diagnosis, it is particularly useful because it creates a balance between precision (minimizing false TB diagnoses) and recall (minimizing missed TB diagnoses), which is crucial for medical screening tests. It is defined as:</p><disp-formula id="E5"><label>(5)</label><mml:math id="eqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mtext>&#x00A0;</mml:mtext><mml:mo>&#x00D7;</mml:mo><mml:mfrac><mml:mrow><mml:mrow><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mo>&#x00D7;</mml:mo><mml:mrow><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula></sec><sec id="s2-6-5"><title>AUC-ROC</title><p>The AUC-ROC measures a model&#x2019;s ability to discern between positive and negative classes. In the context of our problem, that specifically refers to distinguishing between TB-positive and TB-negative CXR images. The AUC-ROC is a plot of the true positive rate (TPR) against the false positive rate (FPR) at various threshold settings. The AUC-ROC provides an aggregated measure of the model&#x2019;s performance across all classification thresholds, with a value of 1 representing a perfect model and a value of 0.5 representing a model with no discriminatory power. The approximate AUC-ROC is calculated by using the following formula:</p><disp-formula id="E6"><label>(6)</label><mml:math id="eqn6"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">U</mml:mi><mml:mi mathvariant="normal">C</mml:mi></mml:mrow><mml:mrow/><mml:mtext>&#x00A0;</mml:mtext><mml:mo>&#x2248;</mml:mo><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mfrac><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">R</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">R</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x00D7;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">R</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">R</mml:mi></mml:mrow><mml:mrow><mml:mi>I</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <italic>i</italic> is the current data point or threshold, FPR<sub><italic>i</italic></sub> and TPR<sub><italic>i</italic></sub> are the false positive and true positive rates at the <italic>i</italic>th threshold, respectively, and <italic>n</italic> is the number of data points or thresholds used to calculate the AUC-ROC. Each term in the sum represents the area of a trapezoid, where (FPR<italic><sub>i</sub>&#x2212;</italic>FPR<italic><sub>i&#x2212;</sub></italic><sub>1</sub>) is the base of the trapezoid and (TPR<italic><sub>i</sub> +</italic>TPR<italic><sub>i&#x2212;1</sub></italic>)/2 is the average height of the trapezoid. The formula calculates the AUC-ROC by summing the areas of trapezoids formed by connecting consecutive points on the AUC-ROC.</p></sec></sec><sec id="s2-7"><title>Computational Environment</title><p>The implementation and findings of this study were based on using the Keras 3.3.3 and TensorFlow 2.16.1 frameworks. The experiments were conducted on a single GPU MSI GL75 Leopard 10SFR laptop with 32 GB of RAM and an 8 GB NVIDIA GEFORCE RTX 2070 GDDR6 card. The system was operated using the CUDA 12.1 and cuDNN SDK 8.7.0 platforms to ensure efficient GPU acceleration and deep learning model training.</p><p>These methodological choices, including dataset selection, preprocessing techniques, CNN architectures, and model evaluation techniques, were designed to ensure a rigorous and comprehensive analysis of CNN performance for TB detection. The results of these analyses are presented in the following section.</p></sec><sec id="s2-8"><title>Ethical Considerations</title><p>This study used a publicly available, deidentified dataset from Kaggle. As such, it did not require institutional review board approval. The dataset does not contain any personally identifiable information, and informed consent was not applicable. No participants were directly involved in this study, and no compensation was provided.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>The study aimed to analyze and compare the performance of various CNN architectures, including VGG16, VGG19, ResNet50, ResNet101, ResNet152, and Inception-ResNet-V2, in classifying CXR images as either TB-positive or TB-negative. Additionally, we also investigated whether data augmentation could further improve the classification performance of these models by comparing the performance of models trained on only real images versus those trained on a combination of real and augmented data. We went further to examine the training time and the number of parameters for each architecture to understand the computational efficiency and resource demands for each model. This analysis is important for practical implementation, particularly in resource-constrained settings where training time and computational costs are significant considerations. By evaluating these parameters, we aimed to identify models that not only perform well but also offer a balanced trade-off between accuracy and efficiency, making them suitable for real-world applications in diverse health care environments.</p><p><xref ref-type="table" rid="table2">Table 2</xref> summarizes the performance of CNN architectures across accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-score, highlighting the impact of training on real images versus a combination of real and augmented data. <xref ref-type="table" rid="table3">Table 3</xref> shows the performance of these models when evaluated using the AUC-ROC score metric. It was observed that the VGG16 outperformed all other architectures across all metrics, with an accuracy of 99.4%, precision of 97.9%, recall of 98.6%, <italic>F</italic><sub>1</sub>-score of 98.3%, and area under the curve of 98.25%. Its performance was superior consistently, irrespective of whether the models were trained with or without data augmentation.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Evaluation of convolutional neural network (CNN) architectures across key evaluation metrics<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Architecture</td><td align="left" valign="bottom">Accuracy (%)</td><td align="left" valign="bottom">Precision (%)</td><td align="left" valign="bottom">Recall (%)</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score (%)</td></tr></thead><tbody><tr><td align="left" valign="top">VGG16<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="char" char="." valign="top">99.4</td><td align="char" char="." valign="top">97.9</td><td align="char" char="." valign="top">98.6</td><td align="char" char="." valign="top">98.3</td></tr><tr><td align="left" valign="top">VGG16<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="char" char="." valign="top">99.3</td><td align="char" char="." valign="top">96.6</td><td align="char" char="." valign="top">99.3</td><td align="char" char="." valign="top">97.9</td></tr><tr><td align="left" valign="top">VGG19</td><td align="char" char="." valign="top">99.2</td><td align="char" char="." valign="top">96.6</td><td align="char" char="." valign="top">98.6</td><td align="char" char="." valign="top">97.6</td></tr><tr><td align="left" valign="top">VGG19<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="char" char="." valign="top">99.2</td><td align="char" char="." valign="top">96.6</td><td align="char" char="." valign="top">98.6</td><td align="char" char="." valign="top">97.6</td></tr><tr><td align="left" valign="top">ResNet50<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="char" char="." valign="top">96.1</td><td align="char" char="." valign="top">81.3</td><td align="char" char="." valign="top">96.9</td><td align="char" char="." valign="top">88.4</td></tr><tr><td align="left" valign="top">ResNet50<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="char" char="." valign="top">89</td><td align="char" char="." valign="top">97.5</td><td align="char" char="." valign="top">30</td><td align="char" char="." valign="top">45.9</td></tr><tr><td align="left" valign="top">ResNet101</td><td align="char" char="." valign="top">96.9</td><td align="char" char="." valign="top">94.8</td><td align="char" char="." valign="top">84.6</td><td align="char" char="." valign="top">89.3</td></tr><tr><td align="left" valign="top">ResNet101<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="char" char="." valign="top">97.3</td><td align="char" char="." valign="top">92.1</td><td align="char" char="." valign="top">90</td><td align="char" char="." valign="top">91.1</td></tr><tr><td align="left" valign="top">ResNet152</td><td align="char" char="." valign="top">97.9</td><td align="char" char="." valign="top">93.6</td><td align="char" char="." valign="top">93.6</td><td align="char" char="." valign="top">93.6</td></tr><tr><td align="left" valign="top">ResNet152<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="char" char="." valign="top">97.5</td><td align="char" char="." valign="top">87.6</td><td align="char" char="." valign="top">96.6</td><td align="char" char="." valign="top">92.1</td></tr><tr><td align="left" valign="top">Inception ResNet-v2</td><td align="char" char="." valign="top">99</td><td align="char" char="." valign="top">95.9</td><td align="char" char="." valign="top">98.6</td><td align="char" char="." valign="top">97.2</td></tr><tr><td align="left" valign="top">Inception ResNet-v2<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="char" char="." valign="top">99.2</td><td align="char" char="." valign="top">97.2</td><td align="char" char="." valign="top">97.9</td><td align="char" char="." valign="top">97.5</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>This table summarizes the performance of various CNN architectures according to precision, recall, and <italic>F</italic><sub>1</sub>-score.</p></fn><fn id="table2fn2"><p><sup>b</sup>VGG: Visual Geometry Group.</p></fn><fn id="table2fn3"><p><sup>c</sup>Models were trained using a combination of real and augmented data, showcasing the impact of data augmentation on model performance.</p></fn><fn id="table2fn4"><p><sup>d</sup>ResNet: Residual Network.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>The models&#x2019; area under the curve (AUC) scores.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Model</td><td align="left" valign="top">AUC (without data augmentation)</td><td align="left" valign="top">AUC (with data augmentation)</td></tr></thead><tbody><tr><td align="left" valign="bottom">VGG16<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="char" char="." valign="bottom">98.25</td><td align="char" char="." valign="bottom">97.95</td></tr><tr><td align="left" valign="bottom">VGG19</td><td align="char" char="." valign="bottom">97.6</td><td align="char" char="." valign="bottom">97.6</td></tr><tr><td align="left" valign="bottom">ResNet50<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="char" char="." valign="bottom">85.65</td><td align="char" char="." valign="bottom">63.75</td></tr><tr><td align="left" valign="bottom">ResNet101</td><td align="char" char="." valign="bottom">89.6</td><td align="char" char="." valign="bottom">91.05</td></tr><tr><td align="left" valign="bottom">ResNet152</td><td align="char" char="." valign="bottom">93.45</td><td align="char" char="." valign="bottom">89.85</td></tr><tr><td align="left" valign="bottom">Inception ResNet-v2</td><td align="char" char="." valign="bottom">92.75</td><td align="char" char="." valign="bottom">97.55</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>VGG: Visual Geometry Group.</p></fn><fn id="table3fn2"><p><sup>b</sup>ResNet: Residual Network.</p></fn></table-wrap-foot></table-wrap><p>Surprisingly, increasing the dataset size through data augmentation did not correspond with an increase in the performance of the models across all architectures, as seen in <xref ref-type="table" rid="table2">Table 2</xref>. This was also observed in other models, such as ResNet50, where when augmented data were included, the AUC-ROC score dropped significantly from 85.65% to 63.75%, as shown in <xref ref-type="table" rid="table3">Table 3</xref>. This suggests that the introduction of augmented data may have introduced noise or overcomplicated the training process for certain architectures, negatively impacting their ability to generalize effectively.</p></sec><sec id="s3-2"><title>Training Time</title><p>We also tracked each model&#x2019;s training time with a combination of data augmentation and real images versus training with only real images, as shown in <xref ref-type="table" rid="table4">Table 4</xref>. As expected, training with data augmentation requires more time due to the increased size of the dataset. For example, training the ResNet152 with data augmentation took 356.6 minutes, whereas training without augmentation took 345.7 minutes. This observation highlights the trade-off between longer training times and the potential benefits of data augmentation. However, data augmentation did not improve performance in our case, indicating that the additional training time did not translate into better model generalization.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Training time for the models.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Model</td><td align="left" valign="top">AUC<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> (real images)</td><td align="left" valign="top">AUC (real and augmented data)</td></tr></thead><tbody><tr><td align="left" valign="bottom">VGG16<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td><td align="char" char="." valign="bottom">98.25</td><td align="char" char="." valign="bottom">97.95</td></tr><tr><td align="left" valign="bottom">VGG19</td><td align="char" char="." valign="bottom">97.6</td><td align="char" char="." valign="bottom">97.6</td></tr><tr><td align="left" valign="bottom">ResNet50<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td><td align="char" char="." valign="bottom">85.65</td><td align="char" char="." valign="bottom">63.75</td></tr><tr><td align="left" valign="bottom">ResNet101</td><td align="char" char="." valign="bottom">89.6</td><td align="char" char="." valign="bottom">91.05</td></tr><tr><td align="left" valign="bottom">ResNet152</td><td align="char" char="." valign="bottom">93.45</td><td align="char" char="." valign="bottom">89.85</td></tr><tr><td align="left" valign="bottom">Inception ResNet-v2</td><td align="char" char="." valign="bottom">92.75</td><td align="char" char="." valign="bottom">97.55</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>AUC: area under the curve.</p></fn><fn id="table4fn2"><p><sup>b</sup>VGG: Visual Geometry Group.</p></fn><fn id="table4fn3"><p><sup>c</sup>ResNet: Residual Network.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Model Parameters</title><p>In addition to our analysis, we provide a detailed breakdown of the parameter count for each model used in our study, as shown in <xref ref-type="table" rid="table5">Table 5</xref>. The number of parameters in a model reflects its complexity and capacity to learn from data. Consequently, it has a direct impact on both training time and the computational resources required, influencing the model&#x2019;s overall efficiency and scalability.</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Parameters of each model.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Model</td><td align="left" valign="top">Parameters, n</td></tr></thead><tbody><tr><td align="left" valign="bottom">Inception-ResNet-V2</td><td align="char" char="." valign="bottom">54,336,736</td></tr><tr><td align="left" valign="bottom">ResNet152<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup></td><td align="char" char="." valign="bottom">58,370,944</td></tr><tr><td align="left" valign="bottom">ResNet101</td><td align="char" char="." valign="bottom">42,658,176</td></tr><tr><td align="left" valign="bottom">ResNet50</td><td align="char" char="." valign="bottom">23,587,712</td></tr><tr><td align="left" valign="bottom">VGG19<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></td><td align="char" char="." valign="bottom">20,024,384</td></tr><tr><td align="left" valign="bottom">VGG16</td><td align="char" char="." valign="bottom">14,714,688</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>ResNet: Residual Network.</p></fn><fn id="table5fn2"><p><sup>b</sup>VGG: Visual Geometry Group.</p></fn></table-wrap-foot></table-wrap><p>The results highlight the superior performance of VGG16 in terms of diagnostic accuracy and computational efficiency, challenging the hypothesis that more complex models always yield better results. These findings and their broader implications for TB diagnostics are explored in the Discussion section.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The findings from this study provide significant insights into the performance and efficiency of several CNN architectures in the classification of CXR images for TB detection. The architectures evaluated included VGG16, VGG19, ResNet50, ResNet101, ResNet152, and Inception-ResNet-V2. Of these, the VGG16 consistently achieved the highest performance across all metrics, such as accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-score. This consistent performance suggests that VGG16 effectively captures the necessary features for distinguishing between TB-positive and TB-negative CXR images, even with fewer parameters compared to the deeper models. VGG16&#x2019;s superior performance is significant, as it demonstrates that a simpler model can achieve exceptional diagnostic accuracy while requiring minimal computational resources. This makes it a practical and scalable solution for deployment in resource-constrained settings with limited access to high-performance hardware.</p><p>The computational time observed across models has implications for clinical settings, particularly in resource-limited environments. Longer training times, as seen with complex architectures like ResNet152, increase resource demands, potentially impacting cost-effectiveness. Importantly, since data augmentation did not improve model performance in this study, the additional computational burden may not be justifiable in such settings. Simpler models, like VGG16 or ResNet50, may offer a more feasible balance between efficiency and diagnostic accuracy, making them better suited for practical implementation.</p></sec><sec id="s4-2"><title>Comparison to Prior Work</title><p>The findings also highlight the fact that while data augmentation is often used to improve the performance of CNN models by expanding the dataset and introducing variability, it does not necessarily lead to performance improvements if the base dataset already provides sufficient diversity for training. In our study, the original dataset appeared robust enough, and the addition of augmented data did not enhance model performance. This aligns with findings from previous studies, such as the study by Shorten and Khoshgoftaar [<xref ref-type="bibr" rid="ref47">47</xref>], which emphasize that the effectiveness of data augmentation is highly dependent on the initial dataset&#x2019;s characteristics, particularly its size and variability. When the base dataset is sufficiently diverse, as in our case, augmentation may introduce unnecessary redundancy or even noise, potentially disrupting the model&#x2019;s ability to generalize effectively.</p><p>However, our findings also contrast with studies in domains where datasets are inherently limited or imbalanced, such as biomedical imaging, where augmentation has been shown to significantly improve performance by addressing underrepresented classes and introducing variability. For instance, a study by Perez and Wang [<xref ref-type="bibr" rid="ref48">48</xref>] demonstrated that data augmentation improved model generalization for small datasets by simulating real-world variability. The discrepancy between our results and these studies highlights the context-dependent nature of augmentation&#x2019;s effectiveness and the need for tailoring augmentation strategies to specific datasets and tasks.</p><p>It is commonly observed in several studies that models with a higher number of parameters, such as ResNet152 and Inception-ResNet-V2, are capable of capturing more deep patterns in the data [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. However, this comes at the cost of requiring more computational resources and longer training times. Interestingly, in our study, despite having fewer parameters, VGG16 outperformed the more complex models. This suggests that for our specific task of classifying CXR images into TB-positive and TB-negative categories, VGG16 efficiently captured the relevant features without necessitating excessive complexity. This finding highlights the importance of selecting the appropriate model architecture based on the specific characteristics and requirements of the task at hand rather than simply opting for the model with the most parameters. This result also aligns with the principle that simpler models can often perform competitively when they are well-matched to the data and the problem domain [<xref ref-type="bibr" rid="ref40">40</xref>].</p></sec><sec id="s4-3"><title>Strengths and Limitations</title><p>The findings from this study show that a simpler model like VGG16 can deliver strong performance while keeping computational requirements low. This makes it suitable for use in low-resource environments. The study also measured training time across different architectures, which helps evaluate practical efficiency.</p><p>The study used a publicly available dataset from Kaggle. While the dataset is extensive, it may not reflect the full range of clinical variability found in real-world populations. Only one data augmentation approach was applied, and results might vary with other techniques or combinations.</p></sec><sec id="s4-4"><title>Conclusions</title><p>This study presents a comprehensive evaluation of several CNN architectures&#x2014;VGG16, VGG19, ResNet50, ResNet101, ResNet152, and Inception-ResNet-V2&#x2014;in classifying CXR images as either TB-positive or TB-negative. The findings showed that the VGG16 architecture consistently outperformed the other models across all the evaluation metrics, achieving superior performance despite having fewer parameters compared to the more complex architectures such as ResNet152 and Inception-ResNet-V2. These results align with previous studies, such as those by Meraj et al [<xref ref-type="bibr" rid="ref30">30</xref>] and Lakhani and Sundaram [<xref ref-type="bibr" rid="ref12">12</xref>], which also highlighted the high diagnostic accuracy and efficiency of simpler architectures like VGG16 for TB detection in CXR images. However, our study extends these findings by demonstrating that VGG16 performs robustly even on larger, more diverse datasets, further validating its applicability to real-world scenarios.</p><p>Our results also showed limited benefits of data augmentation in this context, suggesting that the original dataset provided sufficient diversity for effective training. This finding is consistent with previous research emphasizing that the utility of data augmentation is highly context-dependent and may not always lead to performance improvements, particularly when the dataset already exhibits sufficient variability. However, it contrasts with studies where augmentation proved essential for improving performance in smaller, imbalanced datasets, highlighting the need for task-specific augmentation strategies. Furthermore, the study demonstrated significant trade-offs between model complexity, training time, and performance. Models with higher parameters, such as ResNet152 and Inception-ResNet-V2, required longer training times and more computational resources without corresponding improvements in classification performance across all evaluation metrics. This emphasizes the importance of selecting model architectures based on task requirements rather than defaulting to more complex models. Simpler models like VGG16 not only achieved higher accuracy but also demonstrated computational efficiency, making them particularly suitable for resource-constrained environments. The practical implications of this finding are significant: VGG16&#x2019;s lower computational requirements and superior performance enable its deployment in low-resource health care settings, where access to high-performance hardware and technical expertise may be limited.</p><p>Overall, our research contributes to the growing body of evidence supporting the effectiveness of deep learning models in medical image classification and provides actionable insights into optimizing these models for TB detection in CXR images. By addressing key considerations such as dataset diversity, model complexity, and computational efficiency, this study offers practical guidance for implementing AI-driven TB diagnostic tools in real-world clinical environments.</p></sec></sec></body><back><ack><p>The authors would like to extend their sincere gratitude to the team of researchers from Qatar University, Doha, Qatar, and the University of Dhaka, Bangladesh, along with their collaborators from Malaysia and medical doctors from Hamad Medical Corporation and various health care institutions in Bangladesh for creating and sharing the chest x-ray image database for tuberculosis. Their effort in compiling this comprehensive dataset has significantly contributed to our research. The authors are grateful for their contributions and their dedication to advancing tuberculosis diagnosis and treatment through the provision of this valuable public dataset.</p></ack><notes><sec><title>Data Availability</title><p>The dataset analyzed during this study is publicly available and was obtained from the Kaggle repository [<xref ref-type="bibr" rid="ref32">32</xref>].</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">AUC-ROC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb3">CNN</term><def><p>convolutional neural network</p></def></def-item><def-item><term id="abb4">CXR</term><def><p>chest x-ray</p></def></def-item><def-item><term id="abb5">FN</term><def><p>false negative</p></def></def-item><def-item><term id="abb6">FP</term><def><p>false positive</p></def></def-item><def-item><term id="abb7">FPR</term><def><p>false positive rate</p></def></def-item><def-item><term id="abb8">ILSVRC</term><def><p>ImageNet Large-Scale Visual Recognition Challenge</p></def></def-item><def-item><term id="abb9">ReLU</term><def><p>rectified linear unit</p></def></def-item><def-item><term id="abb10">ResNet</term><def><p>Residual Network</p></def></def-item><def-item><term id="abb11">TB</term><def><p>tuberculosis</p></def></def-item><def-item><term id="abb12">TN</term><def><p>true negative</p></def></def-item><def-item><term id="abb13">TP</term><def><p>true positive</p></def></def-item><def-item><term id="abb14">TPR</term><def><p>true positive rate</p></def></def-item><def-item><term id="abb15">VGG</term><def><p>Visual Geometry Group</p></def></def-item><def-item><term id="abb16">WHO</term><def><p>World Health Organization</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Assefa</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Woldeyohannes</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gelaw</surname><given-names>YA</given-names> </name><name name-style="western"><surname>Hamada</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Getahun</surname><given-names>H</given-names> </name></person-group><article-title>Screening tools to exclude active pulmonary TB in high TB burden countries: systematic review and meta-analysis</article-title><source>Int J Tuberc Lung Dis</source><year>2019</year><volume>23</volume><issue>6</issue><fpage>728</fpage><lpage>734</lpage><pub-id pub-id-type="doi">10.5588/ijtld.18.0547</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chakaya</surname><given-names>J</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ntoumi</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Global Tuberculosis Report 2020&#x2014;reflections on the Global TB burden, treatment and prevention efforts</article-title><source>Int J Infect Dis</source><year>2021</year><month>12</month><volume>113 Suppl 1</volume><issue>Suppl 1</issue><fpage>S7</fpage><lpage>S12</lpage><pub-id pub-id-type="doi">10.1016/j.ijid.2021.02.107</pub-id><pub-id pub-id-type="medline">33716195</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="web"><article-title>Global tuberculosis report</article-title><source>World Health Organization</source><year>2023</year><access-date>2025-05-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/teams/global-programme-on-tuberculosis-and-lung-health/tb-reports/global-tuberculosis-report-2023">https://www.who.int/teams/global-programme-on-tuberculosis-and-lung-health/tb-reports/global-tuberculosis-report-2023</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mukund</surname><given-names>P</given-names> </name><name name-style="western"><surname>Diana</surname><given-names>W</given-names> </name><name name-style="western"><surname>Knut</surname><given-names>L</given-names> </name><etal/></person-group><article-title>WHO&#x2019;s new end TB strategy</article-title><source>Lancet</source><year>2015</year><volume>385</volume><fpage>1799</fpage><lpage>1801</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(15)60570-0</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="web"><article-title>Systematic screening for active tuberculosis: an operational guide</article-title><source>World Health Organization</source><access-date>2025-05-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/publications/i/item/9789241549172">https://www.who.int/publications/i/item/9789241549172</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liao</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Feng</surname><given-names>H</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Evaluation of an artificial intelligence (AI) system to detect tuberculosis on chest X-ray at a pilot active screening project in Guangdong, China in 2019</article-title><source>J Xray Sci Technol</source><year>2022</year><volume>30</volume><issue>2</issue><fpage>221</fpage><lpage>230</lpage><pub-id pub-id-type="doi">10.3233/XST-211019</pub-id><pub-id pub-id-type="medline">34924433</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van&#x2019;t Hoog</surname><given-names>AH</given-names> </name><name name-style="western"><surname>Meme</surname><given-names>HK</given-names> </name><name name-style="western"><surname>Laserson</surname><given-names>KF</given-names> </name><etal/></person-group><article-title>Screening strategies for tuberculosis prevalence surveys: the value of chest radiography and symptoms</article-title><source>PLoS One</source><year>2012</year><volume>7</volume><issue>7</issue><fpage>e38691</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0038691</pub-id><pub-id pub-id-type="medline">22792158</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pande</surname><given-names>T</given-names> </name><name name-style="western"><surname>Pai</surname><given-names>M</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>FA</given-names> </name><name name-style="western"><surname>Denkinger</surname><given-names>CM</given-names> </name></person-group><article-title>Use of chest radiography in the 22 highest tuberculosis burden countries</article-title><source>Eur Respir J</source><year>2015</year><month>12</month><volume>46</volume><issue>6</issue><fpage>1816</fpage><lpage>1819</lpage><pub-id pub-id-type="doi">10.1183/13993003.01064-2015</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Kant</surname><given-names>S</given-names> </name><name name-style="western"><surname>Srivastava</surname><given-names>MM</given-names> </name></person-group><article-title>Towards automated tuberculosis detection using deep learning</article-title><conf-name>2018 IEEE Symposium Series on Computational Intelligence (SSCI)</conf-name><conf-date>Nov 18-21, 2018</conf-date><conf-loc>Bangalore, India</conf-loc><fpage>1250</fpage><lpage>1253</lpage><pub-id pub-id-type="doi">10.1109/SSCI.2018.8628800</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Sangheum</surname><given-names>H</given-names> </name><name name-style="western"><surname>Hyo-Eun</surname><given-names>K</given-names> </name><name name-style="western"><surname>Jihoon</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hee-Jin</surname><given-names>K</given-names> </name></person-group><article-title>A novel approach for tuberculosis screening based on deep convolutional neural networks</article-title><conf-name>Medical Imaging 2016: Computer-Aided Diagnosis</conf-name><conf-date>Mar 27-3, 2016</conf-date><conf-loc>San Diego, CA, United States</conf-loc><fpage>750</fpage><lpage>757</lpage><pub-id pub-id-type="doi">10.1117/12.2216198</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>TK</given-names> </name><name name-style="western"><surname>Yi</surname><given-names>PH</given-names> </name><name name-style="western"><surname>Hager</surname><given-names>GD</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>CT</given-names> </name></person-group><article-title>Refining dataset curation methods for deep learning-based automated tuberculosis screening</article-title><source>J Thorac Dis</source><year>2020</year><month>09</month><volume>12</volume><issue>9</issue><fpage>5078</fpage><lpage>5085</lpage><pub-id pub-id-type="doi">10.21037/jtd.2019.08.34</pub-id><pub-id pub-id-type="medline">33145084</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lakhani</surname><given-names>P</given-names> </name><name name-style="western"><surname>Sundaram</surname><given-names>B</given-names> </name></person-group><article-title>Deep learning at chest radiography: automated classification of pulmonary tuberculosis by using convolutional neural networks</article-title><source>Radiology</source><year>2017</year><month>08</month><volume>284</volume><issue>2</issue><fpage>574</fpage><lpage>582</lpage><pub-id pub-id-type="doi">10.1148/radiol.2017162326</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Hooda</surname><given-names>R</given-names> </name><name name-style="western"><surname>Sofat</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kaur</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mittal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Meriaudeau</surname><given-names>F</given-names> </name></person-group><article-title>Deep-learning: a potential method for tuberculosis detection using chest radiography</article-title><year>2017</year><conf-name>2017 IEEE International Conference on Signal and Image Processing Applications (ICSIPA)</conf-name><conf-date>Sep 12-14, 2017</conf-date><conf-loc>Kuching, Malaysia</conf-loc><fpage>497</fpage><lpage>502</lpage><pub-id pub-id-type="doi">10.1109/ICSIPA.2017.8120663</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Khanh</surname><given-names>HTK</given-names> </name><name name-style="western"><surname>Jeonghwan</surname><given-names>G</given-names> </name><name name-style="western"><surname>Om</surname><given-names>P</given-names> </name><name name-style="western"><surname>Jong-In</surname><given-names>S</given-names> </name><name name-style="western"><surname>Min</surname><given-names>PC</given-names> </name></person-group><article-title>Utilizing pre-trained deep learning models for automated pulmonary tuberculosis detection using chest radiography</article-title><conf-name>Intelligent Information and Database Systems: 11th Asian Conference, ACIIDS 2019</conf-name><conf-date>Apr 8-11, 2019</conf-date><conf-loc>Yogyakarta, Indonesia</conf-loc><fpage>395</fpage><lpage>403</lpage></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kazemzadeh</surname><given-names>S</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Jamshy</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Deep learning detection of active pulmonary tuberculosis at chest radiography matched the clinical performance of radiologists</article-title><source>Radiology</source><year>2023</year><month>01</month><volume>306</volume><issue>1</issue><fpage>124</fpage><lpage>137</lpage><pub-id pub-id-type="doi">10.1148/radiol.212213</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Zulvia</surname><given-names>FE</given-names> </name><name name-style="western"><surname>Kuo</surname><given-names>RJ</given-names> </name><name name-style="western"><surname>Roflin</surname><given-names>E</given-names> </name></person-group><article-title>An initial screening method for tuberculosis diseases using a multi-objective gradient evolution-based support vector machine and C5.0 decision tree</article-title><conf-name>2017 IEEE 41st Annual Computer Software and Applications Conference (COMPSAC)</conf-name><conf-date>Jul 4-8, 2017</conf-date><conf-loc>Turin, Italy</conf-loc><fpage>204</fpage><lpage>209</lpage><pub-id pub-id-type="doi">10.1109/COMPSAC.2017.57</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Saybani</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Shamshirband</surname><given-names>S</given-names> </name><name name-style="western"><surname>Golzari Hormozi</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Diagnosing tuberculosis with a novel support vector machine-based artificial immune recognition system</article-title><source>Iran Red Crescent Med J</source><year>2015</year><month>04</month><volume>17</volume><issue>4</issue><fpage>e24557</fpage><pub-id pub-id-type="doi">10.5812/ircmj.17(4)2015.24557</pub-id><pub-id pub-id-type="medline">26023340</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Fahadulla</surname><given-names>HS</given-names> </name><name name-style="western"><surname>Fareed</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Adeel</surname><given-names>ZM</given-names> </name><name name-style="western"><surname>Aasia</surname><given-names>K</given-names> </name><name name-style="western"><surname>Khan Imran</surname><given-names>H</given-names> </name><name name-style="western"><surname>Raza</surname><given-names>A</given-names> </name></person-group><article-title>Decision-tree inspired classification algorithm to detect tuberculosis (TB)</article-title><year>2017</year><access-date>2025-06-20</access-date><conf-name>21st Pacific-Asia Conference on Information Systems (PACIS 2017)</conf-name><conf-date>Jul 16-20, 2017</conf-date><conf-loc>Langkawi Island, Malaysia</conf-loc><comment><ext-link ext-link-type="uri" xlink:href="https://aisel.aisnet.org/pacis2017/182">https://aisel.aisnet.org/pacis2017/182</ext-link></comment></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mithra</surname><given-names>KS</given-names> </name><name name-style="western"><surname>Emmanuel</surname><given-names>WRS</given-names> </name></person-group><article-title>FHDT: fuzzy and hyco-entropy-based decision tree classifier for tuberculosis diagnosis from sputum images</article-title><source>S&#x0101;dhan&#x0101;</source><year>2018</year><month>08</month><volume>43</volume><issue>8</issue><pub-id pub-id-type="doi">10.1007/s12046-018-0878-y</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ayas</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ekinci</surname><given-names>M</given-names> </name></person-group><article-title>Random forest-based tuberculosis bacteria classification in images of ZN-stained sputum smear samples</article-title><source>SIViP</source><year>2014</year><month>12</month><volume>8</volume><issue>S1</issue><fpage>49</fpage><lpage>61</lpage><pub-id pub-id-type="doi">10.1007/s11760-014-0708-6</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Chi</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Jingxin</surname><given-names>L</given-names> </name><name name-style="western"><surname>Guoping</surname><given-names>Q</given-names> </name></person-group><article-title>Tuberculosis bacteria detection based on random forest using fluorescent images</article-title><conf-name>2016 9th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI)</conf-name><conf-date>Oct 15-17, 2016</conf-date><conf-loc>Datong, China</conf-loc><fpage>553</fpage><lpage>558</lpage></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rahman</surname><given-names>M</given-names> </name><name name-style="western"><surname>Cao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>X</given-names> </name><name name-style="western"><surname>Li</surname><given-names>B</given-names> </name><name name-style="western"><surname>Hao</surname><given-names>Y</given-names> </name></person-group><article-title>Deep pre-trained networks as a feature extractor with XGBoost to detect tuberculosis from chest X-ray</article-title><source>Comput Electr Eng</source><year>2021</year><month>07</month><volume>93</volume><fpage>107252</fpage><pub-id pub-id-type="doi">10.1016/j.compeleceng.2021.107252</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Sebhatu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Nand</surname><given-names>P</given-names> </name></person-group><article-title>Intelligent system for diagnosis of pulmonary tuberculosis using XGBoosting method</article-title><conf-name>International Conference on Ubiquitous Computing and Intelligent Information Systems</conf-name><conf-date>Mar 10-11, 2022</conf-date><conf-loc>Tamil Nadu, India</conf-loc><fpage>493</fpage><lpage>511</lpage></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kotei</surname><given-names>E</given-names> </name><name name-style="western"><surname>Thirunavukarasu</surname><given-names>R</given-names> </name></person-group><article-title>A comprehensive review on advancement in deep learning techniques for automatic detection of tuberculosis from chest X-ray images</article-title><source>Arch Computat Methods Eng</source><year>2024</year><month>01</month><volume>31</volume><issue>1</issue><fpage>455</fpage><lpage>474</lpage><pub-id pub-id-type="doi">10.1007/s11831-023-09987-w</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Ojasvi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Kalpdrum</surname><given-names>P</given-names> </name><name name-style="western"><surname>Chakresh Kumar</surname><given-names>J</given-names> </name></person-group><article-title>Using deep learning to classify X-ray images of potential tuberculosis patients</article-title><conf-name>2018 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)</conf-name><conf-date>Dec 2-6, 2018</conf-date><conf-loc>Madrid, Spain</conf-loc><fpage>2368</fpage><lpage>2375</lpage><pub-id pub-id-type="doi">10.1109/BIBM.2018.8621525</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Cao</surname><given-names>YU</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Improving tuberculosis diagnostics using deep learning and mobile health technologies among resource-poor and marginalized communities</article-title><conf-name>2016 IEEE First International Conference on Connected Health</conf-name><conf-date>Jun 27-29, 2016</conf-date><conf-loc>Washington, DC, United States</conf-loc><fpage>274</fpage><lpage>281</lpage><pub-id pub-id-type="doi">10.1109/CHASE.2016.18</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Panicker</surname><given-names>RO</given-names> </name><name name-style="western"><surname>Kalmady</surname><given-names>KS</given-names> </name><name name-style="western"><surname>Rajan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sabu</surname><given-names>MK</given-names> </name></person-group><article-title>Automatic detection of tuberculosis bacilli from microscopic sputum smear images using deep learning methods</article-title><source>Biocybern Biomed Eng</source><year>2018</year><volume>38</volume><issue>3</issue><fpage>691</fpage><lpage>699</lpage><pub-id pub-id-type="doi">10.1016/j.bbe.2018.05.007</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Stirenko</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kochura</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Alienin</surname><given-names>O</given-names> </name><etal/></person-group><article-title>Chest X-ray analysis of tuberculosis by deep learning with segmentation and augmentation</article-title><conf-name>2018 IEEE 38th International Conference on Electronics and Nanotechnology (ELNANO)</conf-name><conf-date>Apr 24-26, 2018</conf-date><conf-loc>Kyiv, Ukraine</conf-loc><fpage>422</fpage><lpage>428</lpage><pub-id pub-id-type="doi">10.1109/ELNANO.2018.8477564</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nijiati</surname><given-names>M</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>R</given-names> </name><name name-style="western"><surname>Damaola</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Deep learning based CT images automatic analysis model for active/non-active pulmonary tuberculosis differential diagnosis</article-title><source>Front Mol Biosci</source><year>2022</year><volume>9</volume><fpage>1086047</fpage><pub-id pub-id-type="doi">10.3389/fmolb.2022.1086047</pub-id><pub-id pub-id-type="medline">36545511</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Meraj</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Yaakob</surname><given-names>R</given-names> </name><name name-style="western"><surname>Azman</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Detection of pulmonary tuberculosis manifestation in chest X-rays using different convolutional neural network (CNN) models</article-title><source>Int J Eng Adv Technol</source><year>2019</year><volume>9</volume><issue>1</issue><fpage>2270</fpage><lpage>2275</lpage><pub-id pub-id-type="doi">10.35940/ijeat.A2632.109119</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jaeger</surname><given-names>S</given-names> </name><name name-style="western"><surname>Candemir</surname><given-names>S</given-names> </name><name name-style="western"><surname>Antani</surname><given-names>S</given-names> </name><name name-style="western"><surname>W&#x00E1;ng</surname><given-names>YXJ</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>PX</given-names> </name><name name-style="western"><surname>Thoma</surname><given-names>G</given-names> </name></person-group><article-title>Two public chest X-ray datasets for computer-aided screening of pulmonary diseases</article-title><source>Quant Imaging Med Surg</source><year>2014</year><month>12</month><volume>4</volume><issue>6</issue><fpage>475</fpage><lpage>477</lpage><pub-id pub-id-type="doi">10.3978/j.issn.2223-4292.2014.11.20</pub-id><pub-id pub-id-type="medline">25525580</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rahman</surname><given-names>T</given-names> </name><name name-style="western"><surname>Khandakar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kadir</surname><given-names>MA</given-names> </name><etal/></person-group><article-title>Reliable tuberculosis detection using chest X-ray with deep learning, segmentation and visualization</article-title><source>IEEE Access</source><year>2020</year><volume>8</volume><fpage>191586</fpage><lpage>191601</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2020.3031384</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Rajan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sowmya</surname><given-names>V</given-names> </name><name name-style="western"><surname>Govind</surname><given-names>D</given-names> </name><name name-style="western"><surname>Soman</surname><given-names>KP</given-names> </name></person-group><article-title>Dependency of various color and intensity planes on CNN based image classification</article-title><conf-name>Advances in Signal Processing and Intelligent Recognition Systems: Proceedings of Third International Symposium on Signal Processing and Intelligent Recognition Systems (SIRS-2017)</conf-name><conf-date>Sep 13-16, 2017</conf-date><conf-loc>Manipal, India</conf-loc><fpage>1167</fpage><lpage>1177</lpage></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ara&#x00FA;jo</surname><given-names>T</given-names> </name><name name-style="western"><surname>Aresta</surname><given-names>G</given-names> </name><name name-style="western"><surname>Castro</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Classification of breast cancer histology images using convolutional neural networks</article-title><source>PLoS ONE</source><year>2017</year><volume>12</volume><issue>6</issue><fpage>e0177544</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0177544</pub-id><pub-id pub-id-type="medline">28570557</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Shibani</surname><given-names>S</given-names> </name><name name-style="western"><surname>Tsipras</surname><given-names>D</given-names> </name><name name-style="western"><surname>Ilyas</surname><given-names>A</given-names> </name><name name-style="western"><surname>Madry</surname><given-names>A</given-names> </name></person-group><article-title>How does batch normalization help optimization</article-title><year>2019</year><month>04</month><day>15</day><conf-name>Advances in Neural Information Processing Systems 31 (NeurIPS 2018)</conf-name><conf-date>Dec 3-8, 2018</conf-date><conf-loc>Montr&#x00E9;al, Canada</conf-loc><pub-id pub-id-type="doi">10.48550/arXiv.1805.11604</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Taylor</surname><given-names>L</given-names> </name><name name-style="western"><surname>Nitschke</surname><given-names>G</given-names> </name></person-group><article-title>Improving deep learning with generic data augmentation</article-title><conf-name>2018 IEEE Symposium Series on Computational Intelligence (SSCI)</conf-name><conf-date>Nov 18-21, 2018</conf-date><conf-loc>Bangalore, India</conf-loc><fpage>1542</fpage><lpage>1547</lpage><pub-id pub-id-type="doi">10.1109/SSCI.2018.8628742</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Russakovsky</surname><given-names>O</given-names> </name><name name-style="western"><surname>Deng</surname><given-names>J</given-names> </name><name name-style="western"><surname>Su</surname><given-names>H</given-names> </name><etal/></person-group><article-title>ImageNet Large Scale Visual Recognition Challenge</article-title><source>Int J Comput Vis</source><year>2015</year><month>12</month><volume>115</volume><issue>3</issue><fpage>211</fpage><lpage>252</lpage><pub-id pub-id-type="doi">10.1007/s11263-015-0816-y</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Antonio</surname><given-names>G</given-names> </name><name name-style="western"><surname>Sujit</surname><given-names>P</given-names> </name></person-group><source>Deep Learning with Keras</source><year>2017</year><publisher-name>Packt Publishing Ltd</publisher-name></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Mahbub</surname><given-names>H</given-names> </name><name name-style="western"><surname>Bird Jordan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Faria Diego</surname><given-names>R</given-names> </name></person-group><article-title>A study on CNN transfer learning for image classification</article-title><conf-name>Advances in Computational Intelligence Systems: Contributions Presented at the 18th UK Workshop on Computational Intelligence</conf-name><conf-date>Sep 5-7, 2018</conf-date><conf-loc>Nottingham, United Kingdom</conf-loc><fpage>191</fpage><lpage>202</lpage></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Karen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Andrew</surname><given-names>Z</given-names> </name></person-group><article-title>Very deep convolutional networks for large-scale image recognition</article-title><source>arXiv</source><comment>Preprint posted online on  Apr 10, 2015</comment><pub-id pub-id-type="doi">10.48550/arXiv.1409.1556</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>He</surname><given-names>K</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Ren</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>J</given-names> </name></person-group><article-title>Deep residual learning for image recognition</article-title><conf-name>2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name><conf-date>Jun 27-30, 2016</conf-date><conf-loc>Las Vegas, NV, United States</conf-loc><fpage>770</fpage><lpage>778</lpage><pub-id pub-id-type="doi">10.1109/CVPR.2016.90</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Kaiming</surname><given-names>H</given-names> </name><name name-style="western"><surname>Xiangyu</surname><given-names>Z</given-names> </name></person-group><article-title>Identity mappings in deep residual networks</article-title><conf-name>Computer Vision&#x2013;ECCV 2016: 14th European Conference</conf-name><conf-date>Oct 11-14, 2016</conf-date><conf-loc>Amsterdam, The Netherlands</conf-loc><fpage>630</fpage><lpage>645</lpage></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Szegedy</surname><given-names>C</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Jia</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Going deeper with convolutions</article-title><conf-name>2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name><conf-date>Jun 7-12, 2015</conf-date><conf-loc>Boston, MA, United States</conf-loc><pub-id pub-id-type="doi">10.1109/CVPR.2015.7298594</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Christian</surname><given-names>S</given-names> </name><name name-style="western"><surname>Vincent</surname><given-names>V</given-names> </name><name name-style="western"><surname>Sergey</surname><given-names>I</given-names> </name><name name-style="western"><surname>Jon</surname><given-names>S</given-names> </name></person-group><article-title>Rethinking the inception architecture for computer vision</article-title><conf-name>2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name><conf-date>Jun 27-30, 2016</conf-date><conf-loc>Las Vegas, NV, United States</conf-loc></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Neshat</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ahmed</surname><given-names>M</given-names> </name><name name-style="western"><surname>Askari</surname><given-names>H</given-names> </name><name name-style="western"><surname>Thilakaratne</surname><given-names>M</given-names> </name><name name-style="western"><surname>Mirjalili</surname><given-names>S</given-names> </name></person-group><article-title>Hybrid Inception architecture with residual connection: fine-tuned Inception-ResNet deep learning model for lung inflammation diagnosis from chest radiographs</article-title><source>Procedia Comput Sci</source><year>2024</year><volume>235</volume><fpage>1841</fpage><lpage>1850</lpage><pub-id pub-id-type="doi">10.1016/j.procs.2024.04.175</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Christian</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sergey</surname><given-names>I</given-names></name><name name-style="western"><surname>Vincent</surname><given-names>V</given-names></name><name name-style="western"><surname>Alex</surname><given-names>A</given-names></name></person-group><article-title>Inception-v4, Inception-ResNet and the impact of residual connections on learning</article-title><year>2017</year><conf-name>Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence</conf-name><conf-date>Feb 4-9, 2017</conf-date><conf-loc>San Francisco, CA, United States</conf-loc><fpage>1</fpage><pub-id pub-id-type="doi">10.1609/aaai.v31i1.11231</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shorten</surname><given-names>C</given-names> </name><name name-style="western"><surname>Khoshgoftaar</surname><given-names>TM</given-names> </name></person-group><article-title>A survey on image data augmentation for deep learning</article-title><source>J Big Data</source><year>2019</year><month>12</month><volume>6</volume><issue>1</issue><fpage>1</fpage><lpage>48</lpage><pub-id pub-id-type="doi">10.1186/s40537-019-0197-0</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Perez</surname><given-names>L</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name></person-group><article-title>The effectiveness of data augmentation in image classification using deep learning</article-title><source>arXiv</source><comment>Preprint posted online on  Dec 13, 2017</comment><pub-id pub-id-type="doi">10.48550/arXiv.1712.04621</pub-id></nlm-citation></ref></ref-list></back></article>