<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE root>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en"><front><journal-meta><journal-id journal-id-type="publisher-id">Russian Journal of Infection and Immunity</journal-id><journal-title-group><journal-title xml:lang="en">Russian Journal of Infection and Immunity</journal-title><trans-title-group xml:lang="ru"><trans-title>Инфекция и иммунитет</trans-title></trans-title-group></journal-title-group><issn publication-format="print">2220-7619</issn><issn publication-format="electronic">2313-7398</issn><publisher><publisher-name xml:lang="en">SPb RAACI</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">17896</article-id><article-id pub-id-type="doi">10.15789/2220-7619-ITC-17896</article-id><article-categories><subj-group subj-group-type="toc-heading" xml:lang="en"><subject>ORIGINAL ARTICLES</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="ru"><subject>ОРИГИНАЛЬНЫЕ СТАТЬИ</subject></subj-group><subj-group subj-group-type="article-type"><subject>Research Article</subject></subj-group></article-categories><title-group><article-title xml:lang="en">Identifying the characteristics of late HIV diagnosis using optimized machine learning algorithm</article-title><trans-title-group xml:lang="ru"><trans-title>Определение характеристик поздней диагностики ВИЧ с использованием оптимизированного алгоритма машинного обучения</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-6054-9850</contrib-id><name-alternatives><name xml:lang="en"><surname>Farhadian</surname><given-names>M.</given-names></name><name xml:lang="ru"><surname>Фархадян</surname><given-names>М.</given-names></name></name-alternatives><address><country country="IR">Iran, Islamic Republic of</country></address><bio xml:lang="en"><p>PhD, Associate Professor of Biostatistics Department, School of Public Health and Research Center for Health Sciences</p></bio><bio xml:lang="ru"><p>PhD, доцент кафедры биостатистики Школы общественного здравоохранения</p></bio><email>maryam_farhadian80@yahoo.com</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-1597-7327</contrib-id><name-alternatives><name xml:lang="en"><surname>Moslehi</surname><given-names>Samad</given-names></name><name xml:lang="ru"><surname>Мослехи</surname><given-names>Самад</given-names></name></name-alternatives><address><country country="IR">Iran, Islamic Republic of</country></address><bio xml:lang="en"><p>PhD, Associate Professor of Biostatistics Department, School of Public Health, Modeling of Noncommunicable Diseases Research Center</p></bio><bio xml:lang="ru"><p>PhD, доцент кафедры биостатистики, кафедра биостатистики, Школа общественного здравоохранения, Исследовательский центр моделирования неинфекционных заболеваний</p></bio><email>samadmoslehi999@gmail.com</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0001-9428-059X</contrib-id><name-alternatives><name xml:lang="en"><surname>Mirzaei</surname><given-names>M.</given-names></name><name xml:lang="ru"><surname>Мирзаи</surname><given-names>М.</given-names></name></name-alternatives><address><country country="IR">Iran, Islamic Republic of</country></address><bio xml:lang="en"><p>MSc, Disease Control Expert</p></bio><bio xml:lang="ru"><p>магистр наук, эксперт по контролю заболеваний</p></bio><email>mirzaei3589@gmail.com</email><xref ref-type="aff" rid="aff2"/></contrib></contrib-group><aff-alternatives id="aff1"><aff><institution xml:lang="en">Hamadan University of Medical Sciences</institution></aff><aff><institution xml:lang="ru">Университет медицинских наук Хамадана</institution></aff></aff-alternatives><aff-alternatives id="aff2"><aff><institution xml:lang="en">Center for Disease Control and Prevention</institution></aff><aff><institution xml:lang="ru">Центр по контролю и профилактике заболеваний</institution></aff></aff-alternatives><pub-date date-type="preprint" iso-8601-date="2025-06-06" publication-format="electronic"><day>06</day><month>06</month><year>2025</year></pub-date><pub-date date-type="pub" iso-8601-date="2025-12-08" publication-format="electronic"><day>08</day><month>12</month><year>2025</year></pub-date><volume>15</volume><issue>5</issue><issue-title xml:lang="en"/><issue-title xml:lang="ru"/><fpage>906</fpage><lpage>914</lpage><history><date date-type="received" iso-8601-date="2025-03-23"><day>23</day><month>03</month><year>2025</year></date><date date-type="accepted" iso-8601-date="2025-05-19"><day>19</day><month>05</month><year>2025</year></date></history><permissions><copyright-statement xml:lang="en">Copyright ©; 2025, Farhadian M., Moslehi S., Mirzaei M.</copyright-statement><copyright-statement xml:lang="ru">Copyright ©; 2025, Фархадян М., Мослехи С., Мирзаи М.</copyright-statement><copyright-year>2025</copyright-year><copyright-holder xml:lang="en">Farhadian M., Moslehi S., Mirzaei M.</copyright-holder><copyright-holder xml:lang="ru">Фархадян М., Мослехи С., Мирзаи М.</copyright-holder><ali:free_to_read xmlns:ali="http://www.niso.org/schemas/ali/1.0/"/><license><ali:license_ref xmlns:ali="http://www.niso.org/schemas/ali/1.0/">https://creativecommons.org/licenses/by/4.0</ali:license_ref></license></permissions><self-uri xlink:href="https://iimmun.ru/iimm/article/view/17896">https://iimmun.ru/iimm/article/view/17896</self-uri><abstract xml:lang="en"><p><bold>Background</bold>. Early detection of HIV infection is essential for clinical diagnosis, preventing transmission, and ensuring the safety of blood products. Individuals diagnosed late with HIV may unknowingly transmit the virus, and once diagnosed, they may experience worse health outcomes. Therefore, this study aims to identify the characteristics associated with late diagnosis of HIV patients. <bold>Materials and methods.</bold> In this retrospective cohort study, the information of 236 patients with HIV infection in Hamadan, the West of Iran, was collected by recording the CD4 count during 2011 to 2022 years. Late HIV diagnosis was considered with a CD4 ≤ 350/mm<sup>3</sup>. Initially, Extreme Gradient Boosting (XGBoost) and Random Forest (RF) algorithms identified important variables. Subsequently, models such as Logistic Model Tree (LMT), Classification and Regression Tree (CART), Deep Neural Network (DNN), and Support Vector Machine (SVM) were developed using a 70/30 training/test dataset split for clinical and demographic variables. Finally, the optimal model was selected based on accuracy and F1-score using Python software version 3.10. <bold>Results.</bold> The age, logarithm of Viral Load (LVL), Wight Blood Cell (WBC), Red Blood Cell (RBC), Lymphocyte (Lym), Hematocrit (Hct), Platelet (PLT), Hemoglobin (Hb), and clinical stage variables had relative importance above 6%. Among the developed models for the importance variables, the CART with F1-score and Accuracy values of 0.887 and 0.801 and 0.897 and 0.822 for training data, respectively. The AUC value obtained for the CART was equal to 0.918. <bold>Conclusions</bold><bold>.</bold> Late diagnosis of HIV infection is a substantial problem, particularly in developing an algorithm that can accurately and interpretably detect disease characteristics, such as the CART, which could be essential for identifying characteristics that influence late HIV diagnosis and clinical and therapeutic decisions.</p></abstract><trans-abstract xml:lang="ru"><p><bold>Введение.</bold> Раннее выявление ВИЧ-инфекции имеет важное значение для клинической диагностики, предотвращения трансмиссии и обеспечения безопасности продуктов крови. Лица с поздним диагностированием ВИЧ могут неосознанно передавать вирус, и после постановки диагноза у них могут возникнуть более неблагоприятные последствия для здоровья. Поэтому настоящее исследование направлено на выявление характеристик, связанных с поздней диагностикой ВИЧ-пациентов. <bold>Материалы и методы.</bold> В настоящем ретроспективном когортном исследовании была собрана информация о 236 пациентах с ВИЧ-инфекцией в Хамадане (запад Ирана) путем оценки количества CD4 Т клеток периферической крови в период с 2011 по 2022 гг. Поздняя диагностика ВИЧ считалась при уровне CD4 Т клеток ≤ 350/мм<sup>3</sup>. Первоначально алгоритмы Extreme Gradient Boosting (XGBoost) и Random Forest (RF) выявили основные переменные. Впоследствии были разработаны такие модели, как Logistic Model Tree (LMT), Classification and Regression Tree (CART), Deep Neural Network (DNN) и Support Vector Machine (SVM) с использованием 70/30 разделения набора данных для обучения/тестирования для клинических и демографических переменных. Наконец, оптимальная модель была выбрана на основе точности и F1-оценки с использованием программного обеспечения Python (версия 3.10). <bold>Результаты.</bold> Показано, что возраст, логарифм вирусной нагрузки (LVL), содержание лейкоцитов (WBC), эритроцитов (RBC), лимфоцитов (Lym), гематокрит (Hct), уровень тромбоцитов (PLT), гемоглобина (Hb) и параметры клинической стадии имели относительную важность выше уровня в 6%. Среди разработанных моделей для переменных важности CART со значениями F1-оценки и точности 0.887 и 0.801 и 0.897 и 0.822 для обучающих данных соответственно. Значение AUC, полученное для CART, было равно 0.918. <bold>Выводы.</bold> Поздняя диагностика ВИЧ-инфекции является существенной проблемой, особенно при разработке алгоритма, который может точно и интерпретируемо определять характеристики заболевания, такие как CART, что может быть важно для выявления характеристик, влияющих на позднюю диагностику ВИЧ и клинические и терапевтические решения.</p></trans-abstract><kwd-group xml:lang="en"><kwd>machine learning</kwd><kwd>deep learning</kwd><kwd>decision tree</kwd><kwd>HIV/AIDS</kwd><kwd>classification</kwd></kwd-group><kwd-group xml:lang="ru"><kwd>машинное обучение</kwd><kwd>глубокое обучение</kwd><kwd>дерево решений</kwd><kwd>ВИЧ/СПИД</kwd><kwd>классификация</kwd></kwd-group><funding-group><award-group><funding-source><institution-wrap><institution xml:lang="en">Hamedan University of Medical Sciences in Iran</institution></institution-wrap><institution-wrap><institution xml:lang="ru">Хамаданский университет медицинских наук в Иране</institution></institution-wrap></funding-source><award-id>140105113518</award-id></award-group></funding-group></article-meta></front><body></body><back><ref-list><ref id="B1"><label>1.</label><mixed-citation>Adler A., Mounier-Jack S., Coker R. Late diagnosis of HIV in Europe: definitional and public health challenges. AIDS Care, 2009, vol. 21, no. 3, pp. 284–293. doi: 10.1080/09540120802183537</mixed-citation></ref><ref id="B2"><label>2.</label><mixed-citation>Bath R.E., Emmett L., Verlander N.Q., Reacher M. Risk factors for late HIV diagnosis in the East of England: evidence from national surveillance data and policy implications. Int. J. STD AIDS, 2019, vol. 30, no. 1, pp. 37–44. doi: 10.1177/0956462418793327</mixed-citation></ref><ref id="B3"><label>3.</label><mixed-citation>Bendera A., Baryomuntebe D.M., Kevin N.U., Nanyingi M., Kinengyere P.B., Mujeeb S., Sulle E.J. Determinants of late HIV diagnosis and advanced HIV disease among people living with HIV in Tanzania. HIV AIDS-Res. Palliat. Care, 2024, vol. 26, no. 16, pp. 313–323. doi: 10.2147/HIV.S473291</mixed-citation></ref><ref id="B4"><label>4.</label><mixed-citation>Bisaso K.R., Anguzu G.T., Karungi S.A., Kiragga A., Castelnuovo B. A survey of machine learning applications in HIV clinical research and care. Comput. Biol. Med., 2017, vol. 91, pp. 366–371. doi: 10.1016/j.compbiomed.2017.11.001</mixed-citation></ref><ref id="B5"><label>5.</label><mixed-citation>Buetikofer S. Prevalence and risk factors of late presentation for HIV diagnosis and care in a tertiary referral center in Switzerland. Swiss Med. Wkly., 2014, vol. 144, pp. 1–8. doi: 10.4414/smw.2014.13913</mixed-citation></ref><ref id="B6"><label>6.</label><mixed-citation>Camoni L., Raimondo M., Regine V., Salfa M.C., Suligoi B. Late presenters among persons with a new HIV diagnosis in Italy, 2010–2011. BMC Public Health, 2013, vol. 13, no. 1, pp. 1–6. doi: 10.1186/1471-2458-13-281</mixed-citation></ref><ref id="B7"><label>7.</label><mixed-citation>Croxford S., Stengaard A.R., Brännström J., Combs L., Dedes N., Girardi E., Grabar S., Kirk O., Kuchukhidze G., Lazarus J.V., Noori T. Late diagnosis of HIV: an updated consensus definition. HIV Med., 2022, vol. 23, no. 11, pp. 1202–1208. doi: 10.1111/hiv.13425</mixed-citation></ref><ref id="B8"><label>8.</label><mixed-citation>Gallo R.C. A reflection on HIV/AIDS research after 25 years. Retrovirology, 2006, vol. 3, no. 1, pp. 1–7. doi: 10.1186/1742-4690-3-72</mixed-citation></ref><ref id="B9"><label>9.</label><mixed-citation>Gelaw Y.A., Senbete G.H., Adane A.A., Alene K.A. Determinants of late presentation to HIV/AIDS care in Southern Tigray Zone, Northern Ethiopia: an institution-based case-control study. AIDS Res. Ther., 2015, vol. 12, no. 1, pp. 1–8. doi: 10.1186/s12981-015-0074-4</mixed-citation></ref><ref id="B10"><label>10.</label><mixed-citation>Gesesew H.A., Ward P., Woldemichael K., Mwanri L. Late presentation for HIV care in Southwest Ethiopia in 2003–2015: prevalence, trend, outcomes and risk factors. BMC Infect. Dis., 2018, vol. 18, pp. 1–11. doi: 10.1186/s12879-018-2987-7</mixed-citation></ref><ref id="B11"><label>11.</label><mixed-citation>Holzinger A. Data mining with decision trees: theory and applications. Online Inf. Rev., 2015, vol. 39, no. 3, pp. 437–448</mixed-citation></ref><ref id="B12"><label>12.</label><mixed-citation>Landwehr N., Hall M., Frank E. Logistic model trees. Mach. Learn., 2005, vol. 59, pp. 161–205. doi: 10.1007/s10994-005-0466-3</mixed-citation></ref><ref id="B13"><label>13.</label><mixed-citation>Lee C.-Y., Lin Y.-P., Wang S.-F., Lu P.-L. Late CART initiation consistently driven by late HIV presentation: A multicenter retrospective cohort study in Taiwan from 2009 to 2019. Infect. Dis. Ther., 2022, vol. 11, no. 3, pp. 1033–1056. doi: 10.1007/s40121-022-00604-y</mixed-citation></ref><ref id="B14"><label>14.</label><mixed-citation>Likatavicius G., Van de Laar M. HIV and AIDS in the European Union, 2011. Euro Surveill., 2012, vol. 17, no. 48, pp. 1–17.</mixed-citation></ref><ref id="B15"><label>15.</label><mixed-citation>Madakkatel I., Zhou A., McDonnell M.D., Hyppönen E. Combining machine learning and conventional statistical approaches for risk factor discovery in a large cohort study. Sci. Rep., 2021, vol. 11, no. 1, pp. 22997:1-13. doi: 10.1038/s41598-021-02362-3</mixed-citation></ref><ref id="B16"><label>16.</label><mixed-citation>Mi J.X., Li A.D., Zhou L.F. Review study of interpretation methods for future interpretable machine learning. IEEE Access, 2020, vol. 8, pp. 191969–191985. doi: 10.1109/ACCESS.2020.3032494.</mixed-citation></ref><ref id="B17"><label>17.</label><mixed-citation>Mohammadi Y., Mirzaei M., Shirmohammadi-Khorram N., Farhadian M. Identifying risk factors for late HIV diagnosis and survival analysis of people living with HIV/AIDS in Iran (1987–2016). BMC Infect. Dis., 2021, vol. 21, no. 1, pp. 1–9. doi: 10.1186/s12879-021-06034-5</mixed-citation></ref><ref id="B18"><label>18.</label><mixed-citation>Morales-Sánchez R., Montalvo S., Riaño A., Martínez R., Velasco M. Early diagnosis of HIV cases by means of text mining and machine learning models on clinical notes. Comput. Biol. Med., 2024, vol. 179, pp. 108830:1-9. doi: 10.1016/j.compbiomed.2024.108830</mixed-citation></ref><ref id="B19"><label>19.</label><mixed-citation>Moslehi S., Rabiei N., Soltanian A.R., Mamani M. Application of machine learning models based on decision trees in classifying the factors affecting mortality of COVID-19 patients in Hamadan, Iran. BMC Med. Inform. Decis. Mak., 2022, vol. 22, no. 1, pp. 192:1-10. doi: 10.1186/s12911-022-01945-5</mixed-citation></ref><ref id="B20"><label>20.</label><mixed-citation>Najafi-Vosough R., Faradmal J., Hosseini S.K., Moghimbeigi A., Mahjub H. Predicting hospital readmission in heart failure patients in Iran: a comparison of various machine learning methods. Healthc. Inform. Res., 2021, vol. 27, no. 4, pp. 307–314. doi: 10.4258/hir.2021.27.4.307</mixed-citation></ref><ref id="B21"><label>21.</label><mixed-citation>Najafi-Vosough R., Faradmal J., Tapak L., Alafchi B., Najafi-Ghobadi K., Mohammadi T. Prediction the survival of patients with breast cancer using random survival forests for competing risks. J. Prev. Med. Hyg., 2022, vol. 63, no. 2, pp. 298–303. doi: 10.15167/2421-4248/jpmh2022.63.2.2089</mixed-citation></ref><ref id="B22"><label>22.</label><mixed-citation>Nyika H., Mugurungi O., Shambira G., Gombe N.T., Bangure D., Mungati M., Tshimanga M. Factors associated with late presentation for HIV/AIDS care in Harare City, Zimbabwe, 2015. BMC Public Health., 2016, vol. 16, no. 369, pp. 1–7. doi: 10.1186/s12889-016-3070-8</mixed-citation></ref><ref id="B23"><label>23.</label><mixed-citation>Osman A.I.A., Ahmed A.N., Chow M.F., Huang Y.F., El-Shafie A. Extreme gradient boosting (Xgboost) model to predict the groundwater levels in Selangor Malaysia. Ain Shams Eng. J., 2021, vol. 12, no. 2, pp. 1545–1556. doi: 10.1016/j.asej.2020.11.011</mixed-citation></ref><ref id="B24"><label>24.</label><mixed-citation>Reyad M., Sarhan A.M., Arafa M. A modified Adam algorithm for deep neural network optimization. Neural Comput. Appl., 2023, vol. 35, no. 23, pp. 17095–17112. doi: 10.1007/s00521-023-08568-z</mixed-citation></ref><ref id="B25"><label>25.</label><mixed-citation>Romero-Rodríguez D.P., Ramírez C., Imaz-Rosshandler I., Ormsby C.E., Peralta-Prado A., Olvera-García G., Cervantes F., Würsch-Molina D., Romero-Rodríguez J., Jiang W., Reyes-Terán G. Machine learning-selected variables associated with CD4 T cell recovery under antiretroviral therapy in very advanced HIV infection. Transl. Med. Commun., 2020, vol. 5, pp. 1–10. doi: 10.1186/s41231-020-00058-x</mixed-citation></ref><ref id="B26"><label>26.</label><mixed-citation>Rotily M., Bentz L., Pradier C., Obadia Y., Cavailler P. Factors related to delayed diagnosis of HIV infection in southeastern France. Int. J. STD AIDS, 2000, vol. 11, no. 8, pp. 531–535. doi: 10.1258/0956462001916193</mixed-citation></ref><ref id="B27"><label>27.</label><mixed-citation>Roustaei N. Application and interpretation of linear-regression analysis. Med. Hypothesis Discov. Innov. Ophthalmol., 2024, vol. 13, no. 3, pp. 151–159. doi: 10.51329/mehdiopt2024.309546</mixed-citation></ref><ref id="B28"><label>28.</label><mixed-citation>Valkenborg D., Rousseau A.J., Geubbelmans M., Burzykowski T. Support vector machines. Am. J. Orthod. Dentofacial Orthop., 2023, vol. 164, no. 5, pp. 754–757. doi: 10.1016/j.ajodo.2023.06.011</mixed-citation></ref><ref id="B29"><label>29.</label><mixed-citation>Wang D., Larder B., Revell A., Montaner J., Harrigan R., De Wolf F., Lange J., Wegner S., Ruiz L., Pérez-Elías M.J., Emery S. A comparison of three computational modelling methods for the prediction of virological response to combination HIV therapy. Artif. Intell. Med., 2009, vol. 47, no. 1, pp. 63–74. doi: 10.1016/j.artmed.2009.06.001</mixed-citation></ref><ref id="B30"><label>30.</label><mixed-citation>Weissman S., Yang X., Zhang J., Chen S., Olatosi B., Li X. Using a machine learning approach to explore predictors of health care visits as missed opportunities for HIV diagnosis. AIDS, 2021, vol. 35, no. 1, pp. S7-S18. doi: 10.1097/QAD.0000000000002724</mixed-citation></ref><ref id="B31"><label>31.</label><mixed-citation>World Health Statistics 2023: monitoring health for the SDGs, sustainable development goals. World Health Organization, 2023.</mixed-citation></ref><ref id="B32"><label>32.</label><mixed-citation>Xiang Y., Du J., Fujimoto K., Li F., Schneider J., Tao C. Application of artificial intelligence and machine learning for HIV prevention interventions. Lancet HIV., 2022, vol. 9, no. 1, pp. 54–62. doi: 10.1016/S2352-3018(21)00289-7</mixed-citation></ref><ref id="B33"><label>33.</label><mixed-citation>Zhao J., Gao M., Zhao D., Tian W. Prevalence of late HIV diagnosis and its impact on mortality: a comprehensive systematic review and meta-analysis. HIV Med., 2025, vol. 26, no. 4. doi: 10.1111/hiv.13530</mixed-citation></ref></ref-list></back></article>
