<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "https://jats.nlm.nih.gov/publishing/1.3/JATS-journalpublishing1-3.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.3" article-type="research-article" xml:lang="en"><front><journal-meta><journal-id journal-id-type="issn">2656-2804</journal-id><journal-title-group><journal-title>Indonesian Journal on Learning and Advanced Education (IJOLAE)</journal-title><abbrev-journal-title>ijolae</abbrev-journal-title></journal-title-group><issn pub-type="epub">2656-2804</issn><issn pub-type="ppub">2655-920X</issn><publisher><publisher-name>Universitas Muhammadiyah Surakarta</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.23917/ijolae.v6i3.23254</article-id><article-categories/><title-group><article-title>ITEMAN-Based Evaluation of End-of-Semester Assessment Items: A Case Study of Language Test in Indonesian School Context</article-title></title-group><contrib-group><contrib contrib-type="author"><name><surname>Himawan</surname><given-names>Riswanda</given-names></name><address><country>Indonesia</country><email>riswandahimawan.242119@students.um.ac.id</email></address><xref ref-type="aff" rid="AFF-1"/><xref ref-type="corresp" rid="cor-0"/></contrib><contrib contrib-type="author"><name><surname>Hermanto</surname><given-names>Hermanto</given-names></name><address><country>Indonesia</country></address><xref ref-type="aff" rid="AFF-2"/></contrib><contrib contrib-type="author"><name><surname>Nurgiyantoro</surname><given-names>Burhan</given-names></name><address><country>Indonesia</country></address><xref ref-type="aff" rid="AFF-3"/></contrib><contrib contrib-type="author"><name><surname>Suyono</surname><given-names>Suyono</given-names></name><address><country>Indonesia</country></address><xref ref-type="aff" rid="AFF-1"/></contrib><contrib contrib-type="author"><name><surname>Widyartono</surname><given-names>Didin</given-names></name><address><country>Indonesia</country></address><xref ref-type="aff" rid="AFF-1"/></contrib><contrib contrib-type="author"><name><surname>Purwanti</surname><given-names>Agustina</given-names></name><address><country>Indonesia</country></address><xref ref-type="aff" rid="AFF-4"/></contrib><contrib contrib-type="author"><name><surname>Yujing</surname><given-names>Le</given-names></name><address><country>China</country></address><xref ref-type="aff" rid="AFF-5"/></contrib><contrib contrib-type="author"><name><surname>Pogadaev</surname><given-names>Victor A.</given-names></name><address><country>Indonesia</country></address><xref ref-type="aff" rid="AFF-6"/></contrib></contrib-group><aff id="AFF-1"><institution content-type="dept">Faculty of Letters</institution><institution-wrap><institution>Universitas Negeri Malang</institution><institution-id institution-id-type="ror">https://ror.org/00ypgyy34</institution-id></institution-wrap><country country="ID">Indonesia</country></aff><aff id="AFF-2"><institution content-type="dept">Faculty of Teacher Training and Education</institution><institution-wrap><institution>Universitas Ahmad Dahlan</institution><institution-id institution-id-type="ror">https://ror.org/03hn13397</institution-id></institution-wrap><country country="ID">Indonesia</country></aff><aff id="AFF-3"><institution content-type="dept">Faculty of Languages, Arts and Culture</institution><institution-wrap><institution>Universitas Negeri Yogyakarta</institution><institution-id institution-id-type="ror">https://ror.org/05fryw881</institution-id></institution-wrap><country country="ID">Indonesia</country></aff><aff id="AFF-4">Sekolah Menengah Pertama Negeri 8, Kota Yogyakarta</aff><aff id="AFF-5">Faculty of Education, University Hainan College of Foreign Studies</aff><aff id="AFF-6"><institution content-type="dept">Faculty of Languages and Linguistics</institution><institution-wrap><institution>Moscow State Institute of International Relations</institution><institution-id institution-id-type="ror">https://ror.org/04zn7jb34</institution-id></institution-wrap><country country="RU">Russia</country></aff><author-notes><corresp id="cor-0"><bold>Corresponding author: Riswanda Himawan</bold>, Faculty of Letters, Universitas Negeri Malang .Email:<email>riswandahimawan.242119@students.um.ac.id</email></corresp></author-notes><pub-date date-type="pub" iso-8601-date="2024-9-20" publication-format="electronic"><day>20</day><month>9</month><year>2024</year></pub-date><pub-date date-type="collection" iso-8601-date="2024-11-22" publication-format="electronic"><day>22</day><month>11</month><year>2024</year></pub-date><fpage>387</fpage><lpage>402</lpage><history><date date-type="received" iso-8601-date="2023-12-22"><day>22</day><month>12</month><year>2023</year></date><date date-type="rev-recd" iso-8601-date="2024-5-31"><day>31</day><month>5</month><year>2024</year></date><date date-type="accepted" iso-8601-date="2024-8-27"><day>27</day><month>8</month><year>2024</year></date></history><permissions><copyright-statement>Copyright (c) 2024 Riswanda Himawan, Hermanto Hermanto, Burhan Nurgiyantoro, Suyono Suyono, Didin Widyartono, Agustina Purwanti, Le Yujing, Victor A. Pogadaev</copyright-statement><copyright-year>2024</copyright-year><copyright-holder>Riswanda Himawan, Hermanto Hermanto, Burhan Nurgiyantoro, Suyono Suyono, Didin Widyartono, Agustina Purwanti, Le Yujing, Victor A. Pogadaev</copyright-holder><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><ali:license_ref xmlns:ali="http://www.niso.org/schemas/ali/1.0/">https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This work is licensed under a Creative Commons Attribution 4.0 International License.</license-p></license></permissions><self-uri xlink:href="https://journals2.ums.ac.id/ijolae/article/view/9098" xlink:title="ITEMAN-Based Evaluation of End-of-Semester Assessment Items: A Case Study of Language Test in Indonesian School Context">ITEMAN-Based Evaluation of End-of-Semester Assessment Items: A Case Study of Language Test in Indonesian School Context</self-uri><abstract><p>Assessment is an inseparable part of the learning process. Assessment is a way to decide the choices that a teacher will make for students, so that what they do is truly measurable. In line with this description, this study aims to determine the quality of the end-of-semester assessment questions for Indonesian, grade VIII, at SMPN 8 Yogyakarta. The method used in this research is descriptive quantitative. Quantitative descriptive is used to analyze documents in the form of output analysis results from the ITEMAN application. The results of the review that will be submitted later are in the form of validity, reliability, IDB, ITK and the functioning of the distractor. In obtaining the results of the study. The research steps carried out in this study were are testing the validity of the question grids, testing the reliability, IDB, ITK, and distractors through the ITEMAN program, analyze the results according to expert opinions and research that are relevant to this study, and conclude the results of the study. There were 50 questions that were analyzed and tested on 32 class VIII students of SMPN 8 Yogyakarta. Overall, the research results show all questions are declared valid, Alpha is 0.926. This shows that the reliability of the items is very high, IDB results show 38 items in good, 5 moderate, 4 sufficient, and 3 failed categories, the results of ITK analysis showed 31 very easy questions, 10 easy questions, 5 moderate questions, 2 difficult questions, and 2 very difficult questions, and the results of the distractor function show that 32 questions do not work, and 18 item distractors work. Overall, the results of this analysis aim to provide a reference base related to the quality of questions through ITEMAN, especially in learning Indonesian, which so far has not been done much.</p></abstract><kwd-group><kwd>assesment item semester</kwd><kwd>indonesian school context</kwd><kwd>ITEMAN analysis</kwd><kwd>item discrimination index</kwd><kwd>learning objectives achievement</kwd></kwd-group><custom-meta-group><custom-meta><meta-name>File created by JATS Editor</meta-name><meta-value><ext-link ext-link-type="uri" xlink:href="https://jatseditor.com" xlink:title="JATS Editor">JATS Editor</ext-link></meta-value></custom-meta></custom-meta-group></article-meta></front><body><sec><title>1. Introduction</title><p>Assessment is an inseparable part of the learning process <xref ref-type="bibr" rid="BIBR-7">(Asrial et al., 2023)</xref><xref ref-type="bibr" rid="BIBR-35">(Prastikawati et al., 2024)</xref>. Assessment needs to be designed in such a way that learning objectives can be achieved <xref ref-type="bibr" rid="BIBR-21">(Kusumaningtyas et al., 2024)</xref>.</p><p>It serves as a strategic tool for teachers to determine the most effective learning activities and ensure student progress is truly measurable <xref ref-type="bibr" rid="BIBR-47">(Susanto et al., 2015)</xref>. One of the assessment types at the middle level is the end-of-semester or final exam. This exam serves as a process for both teachers and students to evaluate learning outcomes achieved throughout the semester. It allows the teachers and students to reflect on the results, so that adjustments can be made to improve future learning experiences. <xref ref-type="bibr" rid="BIBR-31">(Nurhalimah et al., 2022)</xref></p><p>At the end of the semester, students receive grades compiled with scores ​​from other assessments <xref ref-type="bibr" rid="BIBR-42">(Ruay Garcés, 2018)</xref>. These grades are measured by certain achievement standards, serving as parameters for student progress and learning success <xref ref-type="bibr" rid="BIBR-51">(Wahyuni &amp; Kurniawan, 2018)</xref>. This is in line with <xref ref-type="bibr" rid="BIBR-20">(Kurniawan, 2015)</xref> who stated that the final assessment is included in a summative test, which functions to determine the extent to which students have achieved competence in certain subjects. The results from these tests are then compared to learning objectives or the minimum passing grade (KKM) <xref ref-type="bibr" rid="BIBR-25">(Muhith, 2018)</xref>.</p><p>Therefore, the results of the end-ofsemester assessment serve a dual purpose: they function as a record of student learning progress and determine a student's eligibility for the next program. In this sense, the assessment falls under the category of learning evaluation <xref ref-type="bibr" rid="BIBR-4">(Anggraini &amp; Suyata, 2014)</xref><xref ref-type="bibr" rid="BIBR-34">(Parancika &amp; Suyata, 2020)</xref><xref ref-type="bibr" rid="BIBR-3">(Alnovgada &amp; Suyata, 2019)</xref>.</p><p>Assessment or evaluation of learning is very closely related to test instruments <xref ref-type="bibr" rid="BIBR-50">(Timor et al., 2022)</xref>. It is an important instrument used to measure learning achievement. A test can be defined as an assessment tool that uses questions or instructions for student to answer and complete. (Mania et al., 2020).</p><p><xref ref-type="bibr" rid="BIBR-37">(Purniasari et al., 2021)</xref>) stated that a valid instrument must be thoroughly tested to effectively evaluate student learning outcomes across all domains, from character development to critical thinking skills. A test is defined as an instrument or systematic procedure used to observe and measure one or more student characteristics. This measurement is typically done using a numerical scale or a classification scheme (Nitko &amp; Brookhart via Iskandar &amp; Rizal, 2018).</p><p>This description emphasizes the importance of developing tests following proper test guidelines. Before using the test, the teacher must test the validity of the test instrument to meet the requirements for both validity and reliability. As a part of learning, evaluation is one of the most significant components <xref ref-type="bibr" rid="BIBR-37">(Purniasari et al., 2021)</xref>. However, despite its undeniable importance, the role of evaluation in facilitating learning outcomes is not always fully recognized, especially teachers as facilitators in learning <xref ref-type="bibr" rid="BIBR-16">(Himawan &amp; Nurgiyantoro, 2022)</xref>.</p><p>This is further supported by the results of initial observations in several schools, which indicate that the education system included learning, must be balanced with good assessment. Many teachers have difficulty processing student assessment scores. Teachers often assign grades directly to students without analyzing the question items, potentially overlooking the crucial principles of clear learning evaluation <xref ref-type="bibr" rid="BIBR-12">(Fitriani et al., 2020)</xref><xref ref-type="bibr" rid="BIBR-8">(Azizah &amp; Sumardi, 2021)</xref></p><p>Building on this point, this align with <xref ref-type="bibr" rid="BIBR-41">(Rotama et al., 2020)</xref> who identify several issues within Indonesia’s education evaluation system. They point out an overemphasis on students’ cognitive abilities. Additionalyy, the instruments used are very limited. Develop by teachers, they lack essential validation and reliability testing, as well as crucial item analysis processes like discrimination index, difficulty level, and distractor functioning analysis <xref ref-type="bibr" rid="BIBR-13">(Fridaram et al., 2021)</xref>. As <xref ref-type="bibr" rid="BIBR-15">(Himawan &amp; K., 2024)</xref> suggest, a valid assessment instrument for learning evaluation necessitates both high validity and reliability <xref ref-type="bibr" rid="BIBR-5">(Arifin &amp; Retnawati, 2017)</xref>.</p><p>Item analysis can be conducted using two primary theories: Classical Test Theory (CTT) and Item Response Theory (IRT). This study employed CTT to analyze the instrument’s results through the ITEMAN program. A widely used software program, ITEMAN is designed for classical item analysis <xref ref-type="bibr" rid="BIBR-2">(Alfarisa et al., 2019)</xref>. It is part of the MicroCATn software suite developed by the Assessment Systems Corporation in 1982 <xref ref-type="bibr" rid="BIBR-16">(Himawan &amp; Nurgiyantoro, 2022)</xref></p><p>Subsequently, <xref ref-type="bibr" rid="BIBR-16">(Himawan &amp; Nurgiyantoro, 2022)</xref> explain that there are several stages to carrying out item analysis, using the ITEMAN computer program. The data input process begins by creating a text file. User can navigate from the Start menu and searching for, “Notepad”. This file serves as the input for the ITEMAN program and requires specific information: The first line should include the number of questions in the assessment, a code for omitted responses (typically “O” or “0”) for items unanswered questions, the population code (denoted by “N”), and the type of participant identification number (usually a number). The second line requires the answer key which can be filled with answer options (e.g., A, B, C, D, E). The third line specifies the total number of answer options available for the questions <xref ref-type="bibr" rid="BIBR-27">(Nanda Pratiwiningtyas et al., 2017)</xref>. The fourth line allows users to request analysis for each item. “Y” indicates the item should be analyzed, “N” not to be analyzed. Finally, the fifth line requires the student answer alongside their corresponding identification information. Next, it is important to remember to save the data file within the same folder where the ITEMAN program is located (<xref ref-type="bibr" rid="BIBR-17">(Himawan &amp; Suyata, 2022)</xref>; <xref ref-type="bibr" rid="BIBR-6">(Arvianto, 2016)</xref>; <xref ref-type="bibr" rid="BIBR-45">(Shanta Monica, 2013)</xref>)</p><p><xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref> explains that the results of the ITEMAN analysis consist of Item Statistics and Alternative Statistics. The former (statistics for items) consists of Seq No. (sequential number) according to the order of data entry. The scale Item is the serial number of the item. Pop. Correct (proportion of correct answers/difficulty level) contains an index of the proportion of correct answers per item which shows the item difficulty index. Biser is the biserial correlation between the correct answer per item and the correct answer score. Point Biser is the point biserial correlation between the correct answers per item and the total score (<xref ref-type="bibr" rid="BIBR-52">(Wijaya et al., 2019)</xref>; <xref ref-type="bibr" rid="BIBR-1">(Al-faruq, 2023)</xref>; <xref ref-type="bibr" rid="BIBR-26">(Mustafidah et al., 2021)</xref>). This correlation coefficient is expressed as the discriminating power index (IDB) <xref ref-type="bibr" rid="BIBR-16">(Himawan &amp; Nurgiyantoro, 2022)</xref>.</p><p>Validity refers to the degree to which a test instrument measures what it is intended to measure. A test instrument with high validity is appropriate to carry out measurements or data collection, and the results will be precise and accurate (<xref ref-type="bibr" rid="BIBR-14">(Hanifah, 2014)</xref>; <xref ref-type="bibr" rid="BIBR-44">(Setiawan et al., 2020)</xref>)</p><p>Reliability, on the other hand, refers to the consistency of instrument’s measurements. A reliable test will produce consistent results when administered multiple times under similar conditions (<xref ref-type="bibr" rid="BIBR-23">(Mardiana &amp; Suyata, 2017)</xref>; <xref ref-type="bibr" rid="BIBR-31">(Nurhalimah et al., 2022)</xref>)</p><p>Building on the importance of validity, <xref ref-type="bibr" rid="BIBR-30">(Nurgiyantoro et al., 2020)</xref> suggests that the instrument’s validity can be assessed through the review indicators. The review includes aspects of material, construction, and language. The material aspect contains matters relating to (a) the conformity of the items with indicators in the blueprint; (b) The suitability of the content of the material with science; (c) the Answer key; (d) the function of the distractor option in the test items. The construction aspect includes (a) the clarity of the formulation of the main problems; (b) the clarity of answer choices; (c) homogeneous answer choices; (d) certainty that there is no double negative form; (e) determining the length of the answer for each item; (f) there is no dependence between items; (g) the order of the choices in the form of numbers and time <xref ref-type="bibr" rid="BIBR-16">(Himawan &amp; Nurgiyantoro, 2022)</xref>.</p><p>Meanwhile, the aspect of language includes (a) communicative language; (b) grammatical sentences; (c) no double meaning of sentences; (d) vocabulary selection/diction. In the discussion of item analysis, classically, through the ITEMAN program, the test items are regarded as feasible if the item difficulty index (ITK) falls within an acceptable range and the discriminating power index (IDB) meets the requirements <xref ref-type="bibr" rid="BIBR-47">(Susanto et al., 2015)</xref>. The item difficulty level index (ITK) shows how easy or how difficult an item is for the test takers, while the discriminating power index (IDB) is a statement about how far an item can differentiate the ability of participants in the high and low groups <xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref>.</p><p>An item with a difficulty index (ITK) between 0.20-0.80 is considered acceptable. This range can be further categorized: difficult (0.20-0.40), moderate (0.41-0.60), and easy (0.61-0.80). The discriminating power index (IDB) can be claimed eligible if the index is greater than or equal to 0.20 <xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref>. The function of the right distractor is a good item chosen evenly by students. Conversely, if the items are not effective, students will tend to choose the answer choches unevenly <xref ref-type="bibr" rid="BIBR-39">(Putri &amp; Ofianto, 2019)</xref></p><p>Furthermore, Nurgiyantoro (2016) identified that there are several criteria for determining the effectiveness of the distractor, namely (1) all distractors (false-options) must be selected, (2) the number of false-option voters from the high group participants must be less than the low group, and (3) if there is only one false-option voter, he must be from the low group. Criteria (2) and (3) are often seen as burdensome, which in essence are similar to the logic of the IDB's demands above, so only Criterion (1) is used effectively. False options were ineffective because none of the test takers chose, as a consequence, the item had to be revised <xref ref-type="bibr" rid="BIBR-22">(Magdalena et al., 2021)</xref>.</p><p>Based on the elaboration, the research describe the results of the item analysis for the final exam in the Indonesian language subject at the State Junior High School 8 Yogyakarta. The analysis aimed to find out the result of item analysis for the end semester assessment for Class VIII in Indonesian Language subject, which has been tested in Class VIII at State Junior High School 8 Yogyakarta. In addition, it tried to provide a basis in the form of references to teachers who will carry out the analysis of the test items developed.</p><p>In the context of the item analysis using the ITEMAN program, there are several previous studies relevant to the research. The first research was conducted by the first, <xref ref-type="bibr" rid="BIBR-2">(Alfarisa et al., 2019)</xref> with their research entitled Item Analysis of Social Science Test Using ITEMAN Software for Class V Elementary School. This study has similarities in that both analyze the items using classical theories and the ITEMAN program. In contrast to <xref ref-type="bibr" rid="BIBR-33">(Pangesti et al., 2020)</xref> who examined social science test items for fifth graders, this study investigated items for the end-of-semester assessment in Indonesian language specifically for Class VIII at State Junior High School 8 Yogyakarta.</p><p>The research contribution made by <xref ref-type="bibr" rid="BIBR-2">(Alfarisa et al., 2019)</xref> to this study is to provide various theoretical foundations regarding item Indonesian School Context analysis and a starting point in the form of a method for analyzing test items using the ITEMAN program.</p><p>Second, relevant and previous research was conducted by study <xref ref-type="bibr" rid="BIBR-43">(Setiawan et al., 2022)</xref> with the research entitled "Item Analysis of End-of-Semester Test (PAT) for Indonesian Language Subject in Class XI State Senior High School 1 Polanharjo, Klaten". This study shares similarities with the current study as it analyzes test items in the Indonesian language subject using the ITEMAN program.</p><p>The difference is that the study <xref ref-type="bibr" rid="BIBR-43">(Setiawan et al., 2022)</xref> analyzed the items for the senior high school student level. However, this study analyzed the Indonesian language test items for the junior high school level. Concerning the contributions made, the research has produced various findings on item analysis, including validity, reliability, IDB, ITK, and distractor analysis. This study can provide theoretical contributions related to theories on item analysis using the ITEMAN program.</p><p>Another relevant research by <xref ref-type="bibr" rid="BIBR-37">(Purniasari et al., 2021)</xref> entitled "Item Analysis for the Chemistry Subject School Exam at State Senior High School 1 Kutowinangun for the 2019/2020 Academic Year Using the ITEMAN and Rasch Models".</p><p>In common, all research focused on analyzing test items in one subject. Research <xref ref-type="bibr" rid="BIBR-37">(Purniasari et al., 2021)</xref> analyzes the test items and school exam at the high school level. Meanwhile, this study analyzes the end-ofsemester test for the Indonesian language subject at the junior high school level. Research <xref ref-type="bibr" rid="BIBR-37">(Purniasari et al., 2021)</xref> produced several findings regarding the validity and reliability of the test so that it can contribute to providing a theoretical basis for this study.</p><p>Several research related to item analysis using the ITEMAN program have been conducted. This study is a continuation of those research <xref ref-type="bibr" rid="BIBR-4">(Anggraini &amp; Suyata, 2014)</xref>. The aspects that have not been addressed in those research will be the focus of this study, thereby complementing the related research on item analysis with the ITEMAN program and contributing to the evaluation of learning in schools.</p><p>Based on the description, this research aims to find out how the final assessment items for the Indonesian language subject at SMPN 8 Yogyakarta are analyzed. It encompasses the validity of the grid as a guideline in creating appropriate and useful grids, determining test reliability which reflects the consistency of an assessment instrument, and understanding the IDB, ITK, and distractors in the evaluation questions developed by the teachers at SMPN 8 Yogyakarta. These components can be used as references for the teachers in developing questions before administering them to students, allowing for an assessment of question quality that can promote critical thinking processes and achieve learning objectives.</p><p>Based on these research objectives, the research questions formulated for this research are as follows; (1) what are the results of the validity of the question created by the teacher and applied to students in PAS at SMPN 8 Yogyakarta; (2) What is the reliability of the questions created by the teacher and applied to students in PAS at SMPN 8 Yogyakarta; (3) What are the results of the IDB, ITK and distractor question assessments created by the teacher and applied to students in PAS at SMPN 8 Yogyakarta.</p><p>Overall, the research aims to provide a reference base for improving the quality of education, especially at the junior high school level. The novelty demonstrated in this research includes several steps regarding item analysis using the ITEMAN program, which currently needs to be performed. The aspects described in this research will serve as a contribution and reference for conducting item analysis before the questions are administered to students. This approach aims to produce high-quality questions that effectively promote the achievement of learning objectives.</p><p>This research, through its focus on item analysis in learning evaluation, aligns with the goals of <xref ref-type="bibr" rid="BIBR-38">(Putri Pangestu &amp; Rohinah, 2019)</xref> to enhance the quality of teacher-developed assessment questions. Ultimately, this improvement contributes to achieving the broader objective of raising educational quality.</p></sec><sec><title>2. Method</title><p>The research on the item analysis of the final exam for the Indonesian language subject in Class VIII State Junior High School 8 Yogyakarta is included in the document analysis research using a quantitative descriptive approach. Quantitative descriptive analysis was used to describe the results of the score obtained in the ITEMAN program. Quantitative data analysis was chosen as the source of data analysis because it was used to describe data in the form of numbers resulting from the translation of question item analysis, which was viewed through the ITEMAN program.</p><p>The quantitative approach in this research was implemented to describe the results of ITEMAN output, which was combined with theory, expert judgement, and several studies relevant to this research. This approach aimed to find data to be discussed regarding item analysis as a reference for teachers in developing evaluation questions in schools, which will undoubtedly have an impact on the advancement of education.</p><p>The sources of data information used in this study were the blueprint, test items, and answer sheets for students' answers or responses. Based on this description, the data collection in this study was carried out through documentation, namely documenting the students' answers, related to the test items which were developed. There were 50 questions analyzed and the number of answers analyzed using ITEMAN was taken from 32 students, with various student conditions. There are students with high, moderate, and low abilities. The try-out was conducted at the State Senior High School 8 Yogyakarta in November 2022.</p><p>In this study, the analysis of end-ofsemester test items for Indonesian Language, Class VIII State Senior High School 8 Yogyakarta was carried out by analyzing the blueprint, questions, and answer sheets containing student responses. The item analysis of the items is seen and reviewed through validity, reliability, level of difficulty, discriminating power, and the effectiveness of distractor with classical theory using the ITEMAN computer program. In addition, this study will also analyze and provide examples of descriptions of test items that are appropriate and not appropriate for use.</p><p>The data analysis process included several stages, namely; (1) documenting PAS class VIII questions at SMPN 8 Yogyakarta; (2) measuring the validity of the items by analyzing the blueprint developed according to several indicators from experts including the quality of question writing, writing of words, clauses and sentences to the distribution of cognitive levels contained in the questions; (3) carrying out reliability tests and checking the question difficulty index, discriminating power index, and distractor function using the ITEMAN program, by recording the results of students' answers and then processing them using the ITEMAN program; (4) analyze the results according to expert opinion, relevant theory and research; (5) draw conclusions and classify the quality of the questions and; (6) analyzing items that meet the requirements and do not meet the requirements.</p><p>The validity of the test instrument according to <xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref> was shown by several indicators. The item validity was measured by the researchers' colleagues using the indicators proposed by the experts. The indicators include some aspects, namely material, construction, and language. First, the material aspect consists of (a) the conformity of the items with indicators in the blueprint, (b) the conformity of the items with science, (c) the answer key, and (d) the distractor function. Second, the construction aspect includes: (a) the clarity of item formulation, (b) the clarity of the options, (c) the homogeneous answer choices, (d) no items with double negative statements, (e) the length of each item, (f) dependency of each item, and (g) the order of items including numbers and time. At last, the language aspect focuses on (a) the communicative language used in the items, (b) grammar, (c) sentences with a double meaning, and (d) the use of vocabulary. These components were used as guidelines for assessing the validity of the grids, ensuring that the questions to be administered to students were appropriate in terms of format, content, number of items, and alignment with indicators <xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref>.</p><p>The reliability of the items was measured using ITEMAN by checking the statistics of the Alpha section of the test items. Then, analyzed several items using ITEMAN program. The discrimination index of the items was shown by the point-biserial correlation coefficient. The biserial point was calculated using ITEMAN. The item difficulty level was shown by proportional correct answers calculated using ITEMAN, and the distractor function is shown by proportional endorsing value in the ITEMAN program <xref ref-type="bibr" rid="BIBR-16">(Himawan &amp; Nurgiyantoro, 2022)</xref></p><p>After obtaining the ITEMAN output, the results were interpreted by expert judgment, including the Point Biserial, which is the point biserial correlation between the correct answers per item and the total score. This correlation coefficient was expressed as the Item Discrimination Index (IDB). The Item Difficulty Index indicates how easy or difficult an item is for the tested participants. The functionality of distractors can be seen through the proportional endorsing value in the ITEMAN program <xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref>. Overall, this research not only involved data presentation and analysis but also drew conclusions from the findings.</p></sec><sec><title>3. Result and Discussion</title><p>The results of the item analysis, particularly in terms of validity, are presented below:</p><sec><title>1. Validity</title><p>Validity refers to the degree to which a test measures what it is intended to measure. Data or information can be considered valid if it accurately reflects the actual situation.</p><p>Item validity is the appropriateness of test items in relation to the indicators that refer to the definitions or rules of the item before being administered to students <xref ref-type="bibr" rid="BIBR-22">(Magdalena et al., 2021)</xref>; <xref ref-type="bibr" rid="BIBR-40">(Retnawati, 2015)</xref>; <xref ref-type="bibr" rid="BIBR-9">(Dewi &amp; Sudaryanto, 2020)</xref>; <xref ref-type="bibr" rid="BIBR-22">(Magdalena et al., 2021)</xref>. Based on these statements, it is evident that item validity must be assessed to determine the fundamental quality of the items. This includes verifying the validity of the item writing, punctuation errors, and the distribution of cognitive levels before the items are presented to the students <xref ref-type="bibr" rid="BIBR-19">(Iskandar &amp; Rizal, 2018)</xref>; <xref ref-type="bibr" rid="BIBR-24">(Martin, 2020)</xref>; <xref ref-type="bibr" rid="BIBR-53">(Yadi, 2017)</xref>.</p><p>The validity of the test instrument according to <xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref> is shown by several indicators. The item validity was measured by the colleagues using the indicators proposed by the experts. The indicators included some aspects, namely material, construction, and language. First, the material aspect consists of (a) the conformity of the items with indicators in the blueprint, (b) the conformity of the items with science, (c) the answer key, and (d) the distractor function. Second, the construction aspect includes: (a) the clarity of item formulation, (b) the clarity of the options, (c) the homogeneous answer choices, (d) no items with double negative statements, (e) the length of each item, (f) dependency of each item, and (g) the order of items including numbers and time.</p><p>At last, the language aspect focuses on (a) the communicative language used in the items, (b) grammar, (c) sentences with a double meaning, and (d) the use of vocabulary. These indicators were used as references to assess the validity of the grids against the questions developed by teachers, ensuring alignment between the items planned in the grids and those written in the evaluation questions.</p><p>Based on the explanation above, the item validity analyzed in this research can be further explained as follows.</p><table-wrap id="table-1" ignoredToc=""><label>Table 1</label><caption><p>Question Validity Study Results</p></caption><table frame="box" rules="all"><thead><tr><th colspan="1" rowspan="1" style="" align="left" valign="top">No</th><th colspan="1" rowspan="1" style="" align="left" valign="top">Category</th><th colspan="1" rowspan="1" style="" align="left" valign="top">Item</th><th colspan="1" rowspan="1" style="" align="left" valign="top">Amount</th></tr></thead><tbody><tr><td colspan="1" rowspan="1" style="" align="left" valign="top">1</td><td colspan="1" rowspan="1" style="" align="left" valign="top">Valid</td><td colspan="1" rowspan="1" style="" align="left" valign="top">1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50</td><td colspan="1" rowspan="1" style="" align="left" valign="top">50</td></tr><tr><td colspan="1" rowspan="1" style="" align="left" valign="top">2</td><td colspan="1" rowspan="1" style="" align="left" valign="top">In Valid</td><td colspan="1" rowspan="1" style="" align="left" valign="top">-</td><td colspan="1" rowspan="1" style="" align="left" valign="top">0</td></tr></tbody></table></table-wrap><p>Validity is the ability of an instrument to measure accurately according to the circumstances to be measured. A test instrument that has a high level of validity is appropriate for assessing and collecting data because the results will be precise and accurate <xref ref-type="bibr" rid="BIBR-48">(Syaifudin, 2020)</xref>. A good test instrument is considered to have high reliability if it can produce data that are relatively consistent <xref ref-type="bibr" rid="BIBR-23">(Mardiana &amp; Suyata, 2017)</xref>. <xref ref-type="bibr" rid="BIBR-40">(Retnawati, 2015)</xref> stated that validity and reliability are often discussed in measurement results. Validity is related to the quality of the interpretation of the test scores, while reliability is related to the consistency of test scores. Both are interrelated because reliability will affect the validity of the measurement, but not everything reliable is valid <xref ref-type="bibr" rid="BIBR-31">(Nurhalimah et al., 2022)</xref>.</p><p>The validity of the 50 items developed by the teachers in this study was measured by the researchers' colleagues. They were Indonesian language teachers at SMPN 8 Yogyakarta. The validity of the test instrument according to <xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref> is shown by several indicators. The item validity was measured by the colleagues using the indicators proposed by the experts. The indicators include some aspects, namely material, construction, and language. First, the material aspect consists of (a) the conformity of the items with indicators in the blueprint, (b) the conformity of the items with science, (c) the answer key, and (d) the distractor function. Second, the construction aspect includes: (a) the clarity of item formulation, (b) the clarity of the options, (c) the homogeneous answer choices, (d) no items with double negative statements, (e) the length of each item, (f) dependency of each item, and (g) the order of items including numbers and time. At last, the language aspect focuses on (a) the communicative language used in the items, (b) grammar, (c) sentences with a double meaning, and (d) the use of vocabulary <xref ref-type="bibr" rid="BIBR-30">(Nurgiyantoro et al., 2020)</xref></p><p>After the 40 items were examined, it can be concluded that all items were categorized as valid items because all of them were considered appropriate. Based on the table above, there were fifty items categorized as valid items (100%). They were item numbers 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, and 50.</p><p>Based on the result above, it was found that there was no invalid item (0%), and all items were considered valid (100%). Therefore, it can be stated that all items in the final exam have a high level of validity. Himawan &amp; Nurgiyantoro (2022) pointed out that in testing content validity, teachers may compare the developed items with the blueprint or curriculum that has been taught. This process can be done by the teachers and their peers.</p></sec><sec><title>2. Reliability</title><p>A test instrument is considered reliable if it generates the same results when used to assess anyone at any time. In terms of reliability, the researchers used ITEMAN 3.0 to find the Alpha value. The result of the reliability analysis is presented in the ITEMAN output below.</p><fig id="figure-1" ignoredToc=""><label>Figure 1</label><caption><p>Reliability Test Results Through the ITEMAN Program</p></caption><graphic xlink:href="https://journals2.ums.ac.id/ijolae/article/download/9098/4392/51385" mimetype="image" mime-subtype="png"><alt-text>Image</alt-text></graphic></fig><p>A test instrument is considered reliable if it generates the same results when it is used to assess anyone at any time. In terms of reliability, the researchers used ITEMAN 3.0 to get the Alpha value. <xref ref-type="bibr" rid="BIBR-39">(Putri &amp; Ofianto, 2019)</xref> define reliability as a measure that shows the level of consistency of a test item. In this research, the reliability value obtained was 0.926, indicating that the developed multiple-choice questions have a very high level of reliability. The results are in line with a study conducted by <xref ref-type="bibr" rid="BIBR-31">(Nurhalimah et al., 2022)</xref>. It is shown that a coefficient that is higher or equal to 0.20 indicates a very high level of reliability (<xref ref-type="bibr" rid="BIBR-28">(Nayla Amalia &amp; Widayati, 2012)</xref>; <xref ref-type="bibr" rid="BIBR-46">(Suharti, 2017)</xref>; <xref ref-type="bibr" rid="BIBR-18">(Ida &amp; Musyarofah, 2021)</xref>)</p><p>The results of the reliability test questions are in accordance with research <xref ref-type="bibr" rid="BIBR-11">(Fernanda &amp; Hidayah, 2020)</xref> which stated that the reliability test of test instruments using a classical test theory approach can be seen from the alpha score obtained. In a test, it is important to observe the consistency and certainty, which are reflected in the test results obtained <xref ref-type="bibr" rid="BIBR-32">(Nuryanti et al., 2018)</xref>. thus making it trustworthy or dependable. The Alpha score in the ITEMAN output can be used as a guide in item analysis, as Alpha is closely related to the reliability of a test instrument</p><p>The alpha score can be used as a determinant of the quality of criteria questions with reliability classification: 0.00-0.20 (very low); 0.21-0.40 (low); 0.41-0.70 (medium); 0.71-0.90 (high); 0.91-1.00 (very high). Test reliability refers to understanding whether a test can consistently measure something that will be measured from time to time. Measurement results can be trusted only if relatively similar results are obtained several times on the same group of subjects, as long as the aspect being measured in the subject has not changed. The meaning of reliability of measuring instruments and reliability of measuring results are usually considered the same <xref ref-type="bibr" rid="BIBR-10">(Erfan et al., 2020)</xref>; <xref ref-type="bibr" rid="BIBR-14">(Hanifah, 2014)</xref>; <xref ref-type="bibr" rid="BIBR-20">(Kurniawan, 2015)</xref>.</p></sec><sec><title>3. Item Discrimination Index (IDB)</title><p>Item discrimination index shows whether an item can distinguish the abilities of the test takers in the high and low groups <xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref>. The Item Discrimination Index (IDB) indicates the extent to which an item can differentiate between the abilities of participants in high and low groups <xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref>. A good test item should have a high and positive coefficient on the correct answer. The IDB can be seen from the ITEMAN output in the Item Discrimination section. It is stated that an IDB ranging from 0.00 to 0.20 is considered poor, an IDB from 0.21 to 0.40 is deemed adequate, an IDB from 0.41 to 0.70 is considered good, and an IDB from 0.71 to 1.00 is considered very good. Negative results indicate poor discrimination.</p><p>The discrimination index of the final exam for the Indonesian language subject for the class VIII students of SMPN 8 Yogyakarta is presented in the table below.</p><table-wrap id="table-2" ignoredToc=""><label>Table 2</label><caption><p>IDB Study Results</p></caption><table frame="box" rules="all"><thead><tr><th colspan="1" rowspan="1" style="" align="left" valign="top">No</th><th colspan="1" rowspan="1" style="" align="left" valign="top">Category</th><th colspan="1" rowspan="1" style="" align="left" valign="top">Number of Items</th><th colspan="1" rowspan="1" style="" align="left" valign="top">Amount</th></tr></thead><tbody><tr><td colspan="1" rowspan="1" style="" align="left" valign="top">1</td><td colspan="1" rowspan="1" style="" align="left" valign="top">Failed</td><td colspan="1" rowspan="1" style="" align="left" valign="top">10, 21, 22</td><td colspan="1" rowspan="1" style="" align="left" valign="top">3</td></tr><tr><td colspan="1" rowspan="1" style="" align="left" valign="top">2</td><td colspan="1" rowspan="1" style="" align="left" valign="top">Enough</td><td colspan="1" rowspan="1" style="" align="left" valign="top">24, 29, 38,42</td><td colspan="1" rowspan="1" style="" align="left" valign="top">4</td></tr><tr><td colspan="1" rowspan="1" style="" align="left" valign="top">3</td><td colspan="1" rowspan="1" style="" align="left" valign="top">Medium</td><td colspan="1" rowspan="1" style="" align="left" valign="top">11,14, 17, 19, 20</td><td colspan="1" rowspan="1" style="" align="left" valign="top">5</td></tr><tr><td colspan="1" rowspan="1" style="" align="left" valign="top">4</td><td colspan="1" rowspan="1" style="" align="left" valign="top">Good</td><td colspan="1" rowspan="1" style="" align="left" valign="top">1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 15, 16, 18, 23, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50</td><td colspan="1" rowspan="1" style="" align="left" valign="top">38</td></tr><tr><td colspan="1" rowspan="1" style="" align="left" valign="top">5</td><td colspan="1" rowspan="1" style="" align="left" valign="top">Very Good</td><td colspan="1" rowspan="1" style="" align="left" valign="top">-</td><td colspan="1" rowspan="1" style="" align="left" valign="top">0</td></tr></tbody></table></table-wrap><p>Item discrimination index shows the ability of an item to distinguish the abilities of the test takers in the high and low groups <xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref>. Test items with a high item discrimination index should have answers with a positive and high coefficient. For the purpose of learning in the classroom, a more moderate strategy can be implemented by considering a discrimination index higher or equal to 0.20 as a qualified item <xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref></p><p>Based on the results of the item discrimination analysis, good test items have discrimination indices between 0.40 and 1.00. In this study, there are 38 items with Very Good discriminating indices (item numbers 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 15, 16, 18, 23, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50). Items with good discrimination indices are numbers 11, 14, 17, 19, and 20. Those items have indices between 0.30 and 0.39. Then, items that belong to the Fair category are numbers 24, 29, 38, and 42. The discrimination indices of those items are between 0.20 s.d. 0.29. At last, items having Poor indices (below 0.20) are numbers 10, 21, and 22. Those items were then deleted.</p><p>The categorization proposed by <xref ref-type="bibr" rid="BIBR-4">(Anggraini &amp; Suyata, 2014)</xref> shows that indices between 0.00 and 0.20 belong to the Poor category, indices from 0.21 to 0.40 belong to the Fair category, indices ranging from 0.41 to 0.70 belong to the Good category, the indices between 0.71 and 1.00 belong to the Very Good category, and negative indices indicate Very Poor discriminators <xref ref-type="bibr" rid="BIBR-16">(Himawan &amp; Nurgiyantoro, 2022)</xref>.</p></sec><sec><title>4. Item Difficulty Index</title><p>The level of difficulty can be seen from the ITEMAN analysis in the item difficulty section. ITK is an index that indicates how easy or difficult a test item is for the tested participants <xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref>. The range of this difficulty index is from 0.00 to 1.00. If the difficulty index shows a value of 1.00, then the item is not difficult to answer. Conversely, if the difficulty index is 0.00, then the item is very difficult to answer. ITK significantly determines the difficulty of the questions being tested, making it closely related to critical thinking skills.</p><table-wrap id="table-3" ignoredToc=""><label>Table 3</label><caption><p>ITK Study Results</p></caption><table frame="box" rules="all"><thead><tr><th colspan="1" rowspan="1" style="" align="left" valign="top">No</th><th colspan="1" rowspan="1" style="" align="left" valign="top">Category</th><th colspan="1" rowspan="1" style="" align="left" valign="top">Range</th><th colspan="1" rowspan="1" style="" align="left" valign="top">Item Number</th><th colspan="1" rowspan="1" style="" align="left" valign="top">Amount</th></tr></thead><tbody><tr><td colspan="1" rowspan="1" style="" align="left" valign="top">1</td><td colspan="1" rowspan="1" style="" align="left" valign="top">Very Easy</td><td colspan="1" rowspan="1" style="" align="left" valign="top">0.81-1.00</td><td colspan="1" rowspan="1" style="" align="left" valign="top">1, 2, 3, 5, 6, 7, 8, 9, 13, 14, 15, 16, 18, 23, 25, 26, 28, 31, 32, 33, 34, 35, 37, 39, 41, 43, 44, 45, 46, 49, 50</td><td colspan="1" rowspan="1" style="" align="left" valign="top">31</td></tr><tr><td colspan="1" rowspan="1" style="" align="left" valign="top">2</td><td colspan="1" rowspan="1" style="" align="left" valign="top">Easy</td><td colspan="1" rowspan="1" style="" align="left" valign="top">0.61-0.80</td><td colspan="1" rowspan="1" style="" align="left" valign="top">4, 11, 12, 22, 27, 30, 36, 40, 42, 47</td><td colspan="1" rowspan="1" style="" align="left" valign="top">10</td></tr><tr><td colspan="1" rowspan="1" style="" align="left" valign="top">3</td><td colspan="1" rowspan="1" style="" align="left" valign="top">Currently</td><td colspan="1" rowspan="1" style="" align="left" valign="top">0.41-0.60</td><td colspan="1" rowspan="1" style="" align="left" valign="top">19, 20, 29, 38, 48</td><td colspan="1" rowspan="1" style="" align="left" valign="top">5</td></tr><tr><td colspan="1" rowspan="1" style="" align="left" valign="top">4</td><td colspan="1" rowspan="1" style="" align="left" valign="top">Difficult</td><td colspan="1" rowspan="1" style="" align="left" valign="top">0.21-040</td><td colspan="1" rowspan="1" style="" align="left" valign="top">17, 21</td><td colspan="1" rowspan="1" style="" align="left" valign="top">2</td></tr><tr><td colspan="1" rowspan="1" style="" align="left" valign="top">5</td><td colspan="1" rowspan="1" style="" align="left" valign="top">Very Difficult</td><td colspan="1" rowspan="1" style="" align="left" valign="top">0.00-0.20</td><td colspan="1" rowspan="1" style="" align="left" valign="top">10, 24</td><td colspan="1" rowspan="1" style="" align="left" valign="top">2</td></tr></tbody></table></table-wrap><p>Item difficulty is an index that represents how easy or difficult the item is according to test takers <xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref>. Item difficulty index ranges from 0.00 to. 1.00. If the index is 1.00, the item is not difficult to answer. Conversely, if the difficulty index is 0.00, the question is very difficult to answer. The item difficulty index ranging from 0.20-0.80 is tolerable <xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref>. There are some categories of difficulty index, namely Very Difficult (0.00-0.199) Difficult (0.20-0.40), Moderately Difficult (0.41-0.60), and Easy (0.61-0.80).</p><p>Based on the results of this present study, five items belong to the Moderately Difficult category (item numbers 19, 20, 29, 38, and 48). Ten items are in the Easy category (numbers 4, 11, 12, 22, 27, 30, 36, 40, 42, and 47). There are two Very Difficult questions (numbers 10 and 24). Then, there are 31 Very Easy items (numbers 1, 2, 3, 5, 6, 7, 8, 9, 13, 14, 15, 16, 18, 23, 25, 26, 28, 31, 32, 33, 34, 35, 37, 39, 41, 43, 44, 45, 46, 49, and 50).</p><p>The categories of good and poor discrimination significantly impact the quality of classroom learning and, consequently, determine the quality of students in solving problems. Questions can be considered good if their cognitive level distribution is even, thereby it can teach students to solve various conceptual problems through evaluation questions developed during classroom learning <xref ref-type="bibr" rid="BIBR-36">(Pujiastuti &amp; Kulup, 2021)</xref>; <xref ref-type="bibr" rid="BIBR-8">(Azizah &amp; Sumardi, 2021)</xref>.</p></sec><sec><title>5. Distractor Function</title><p>Distractors are used to identify test takers with high ability. The distractor is said to function effectively if there are more test-takers with low ability selecting it. Meanwhile, if there are more test takers with high ability selecting the distractor, the distractor does not function well <xref ref-type="bibr" rid="BIBR-19">(Iskandar &amp; Rizal, 2018)</xref>. Distractors are effective when they are selected by test takers; however, if any distractors receive no selections, they are considered ineffective <xref ref-type="bibr" rid="BIBR-6">(Arvianto, 2016)</xref>. The function of the distractor in this study is presented as follows. The effectiveness of distractors can be assessed by examining the proportional endorsing values provided in the ITEMAN output. Distractors are considered effective if they are selected by at least 5% of test participants. A question is considered suitable if it includes effective distractors that surpass the 5% threshold. Distractors are derived from analyzing test takers' responses to incorrect answer options <xref ref-type="bibr" rid="BIBR-49">(Syarifah et al., 2020)</xref>.</p><table-wrap id="table-4" ignoredToc=""><label>Table 4</label><caption><p>Results of Study of Distractor Items</p></caption><table frame="box" rules="all"><thead><tr><th colspan="1" rowspan="1" style="" align="left" valign="top">No</th><th colspan="1" rowspan="1" style="" align="left" valign="top">Category</th><th colspan="1" rowspan="1" style="" align="left" valign="top">Item</th><th colspan="1" rowspan="1" style="" align="left" valign="top">Amount</th></tr></thead><tbody><tr><td colspan="1" rowspan="1" style="" align="left" valign="top">1</td><td colspan="1" rowspan="1" style="" align="left" valign="top">Works</td><td colspan="1" rowspan="1" style="" align="left" valign="top">2, 3, 10, 11, 12, 18, 19, 22, 27, 30, 33, 37, 38, 40, 42, 47, 48, 49</td><td colspan="1" rowspan="1" style="" align="left" valign="top">18</td></tr><tr><td colspan="1" rowspan="1" style="" align="left" valign="top">2</td><td colspan="1" rowspan="1" style="" align="left" valign="top">Not Working</td><td colspan="1" rowspan="1" style="" align="left" valign="top">1, 4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 32, 34, 35, 36, 37, 39, 41, 43, 44, 45, 46, 50</td><td colspan="1" rowspan="1" style="" align="left" valign="top">32</td></tr></tbody></table></table-wrap><p><xref ref-type="bibr" rid="BIBR-29">(Nurgiyantoro, 2016)</xref> states that there are several criteria for determining the effectiveness of a distractor, namely: (1) all distractors (wrong options) must be selected, (2) test takers from a high group should select less number of wrong options than the test takers from low-group, and (3) if there is only one test-taker chooses one of the wrong options, he belongs to the low-group. Criteria 2 and 3 are often seen as burdensome since they are similar to the concepts of Distractor Function. Thus, criterion 1 is more effectively used. Wrong options are not effective if no test taker chooses them. This type of option should be deleted or revised (Mardiana &amp; Suyata, 2017). </p><p>Based on the results of distractor function analysis, it was found that there are effective and ineffective distractors. A set of distractors is considered effective when all or three of the options work well, and a distractor is considered ineffective if one, two, or three distractors do not work. There are 18 items with effective distractors (numbers 2, 3, 10, 11, 12, 18, 19, 22, 27, 30, 33, 37, 38, 40, 42, 47, 48, and 49). Meanwhile, the other 32 items have ineffective distractors (numbers 1, 4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 20, 21, 23, 24, 25, 26, 28, 29, 31, 32, 34, 35, 36, 39, 41, 43, 44, 45, 46, 50). According to (Putri &amp; Ofianto, 2019) effective distractors are those chosen evenly by test takers. Meanwhile, ineffective test items are unequally chosen by the takers.</p></sec></sec><sec><title>4.  Conclusion</title><p>Based on the results of an analysis of the quality of the end-of-semester assessment questions for the Indonesian language subject at SMPN 8 Yogyakarta class VIII. Can be explained as follows.</p><p>Validity was carried out to colleagues, by analyzing 50 questions, and giving colleagues a validity questionnaire containing indicators of the validity of the questions, according to the expert. Based on the results of the test reliability analysis, an Alpha result of 0.926 was obtained. This shows that the reliability of the multiple-choice questions in PAS Odd Indonesian Class VIII-B at SMP Negeri 8 Yogyakarta is categorized as very high.</p><p>Based on the results of the IDB analysis, it was found that the categories of items that had good discriminating power were 0.40 to.d. 1.00. In this study, it was found that there were 38 items with good discriminating power, Item categories which has a moderate difference power is 0.30 to.d. 0.39. There are 5 average item power. The categories of items that have sufficient discriminating power are 0.20 to.d. 0.29. There are only 4 items in power. The categories of items that have poor discriminatory power or fail are below 0.20. There are 3 bad or failed item items. Based on the results of the study, there were 5 difficulty levels of students with moderate item categories. There were 10 difficulty levels of students with easy item categories. There are 2 difficulty levels of students with very difficult item categories. There are 2 levels of difficulty for students with very easy item categories 31 <xref ref-type="bibr" rid="BIBR-54">(Yusmilda et al., 2023)</xref>.</p><p>Based on the results of an analysis of the functioning of the distractor based on the results of the multiple-choice item items in PAS Odd Indonesian Language Class VIII at SMP Negeri 8 Yogyakarta, it is known that a total of 50 items have a functioning distractor and a non-functioning distractor. The item distractors work if all or 3 of the distractors work. A distractor doesn't work if you have 1, 2, or 3 distractors that don't work. The results of the analysis of the distractor items, in the developed questions, show that there are 18 items with good distractor function, and there are 32 questions with distractors that don't work properly. Overall, the results of this analysis aim to provide a reference point, teachers. Dealing with item analysis, especially in Indonesian language subjects, which is currently still rarely done.</p><p>In summary, the implications of the research findings can be used as a reference for teachers in presenting evaluation instruments, including grids, assessment guidelines, and questions, thus determining the quality of the items tested on students. In relation to this, the process of higher-order thinking skills (HOTS) can be easily applied by teachers to students. Aspects of reading skills in learning, especially in Indonesian language-based text learning, will assist teachers and students in implementing reading literacy in teaching.</p><p>This research enables teachers to conduct evaluations through steps such as: (1) drafting question grids, (2) conducting question validity tests with experts or peers, (3) revising and analyzing evaluation questions using the ITEMAN program, and ( 4) examining the ITEMAN output to construct genuinely effective evaluation instruments, considering aspects like reliability, IDB, ITK, and the functionality of distractors.</p></sec></body><back><ref-list><title>References</title><ref id="BIBR-1"><element-citation publication-type="article-journal"><article-title>Peran Penggunaan Desain Evaluasi Untuk Meningkatkan Kualitas Pembelajaran</article-title><source>Ilma Jurnal Pendidikan Islam</source><volume>1</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Al-faruq</surname><given-names>Z.</given-names></name></person-group><year>2023</year><fpage>158</fpage><lpage>171</lpage><page-range>158-171</page-range><pub-id pub-id-type="doi">10.58569/ilma.v1i2.587</pub-id></element-citation></ref><ref id="BIBR-2"><element-citation publication-type="article-journal"><article-title>Analisis Butir Soal IPS Kelas V Sekolah Dasar Menggunakan Software ITEMAN</article-title><source>EduBasic Journal: Jurnal Pendidikan Dasar</source><volume>1</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Alfarisa</surname><given-names>F.</given-names></name><name><surname>Chudari</surname><given-names>I.N.</given-names></name><name><surname>Robiansyah</surname><given-names>F.</given-names></name></person-group><year>2019</year><fpage>100</fpage><lpage>106</lpage><page-range>100-106</page-range><pub-id pub-id-type="doi">10.17509/ebj.v1i2.26474</pub-id></element-citation></ref><ref id="BIBR-3"><element-citation publication-type=""><article-title>The Effectiveness of Picture and picture Cooperative Learning Models of Writing Instructions Skills in Class VIII Students of SMP Negeri 2 Sui Ambawang</article-title><volume>297</volume><issue>Icille 2018</issue><person-group person-group-type="author"><name><surname>Alnovgada</surname><given-names>V.R.S.</given-names></name><name><surname>Suyata</surname><given-names>P.</given-names></name></person-group><year>2019</year><fpage>432</fpage><lpage>437</lpage><page-range>432-437</page-range><pub-id pub-id-type="doi">10.2991/icille-18.2019.90</pub-id></element-citation></ref><ref id="BIBR-4"><element-citation publication-type="article-journal"><article-title>Karakteristik Soal Uasbn Mata Pelajaran Bahasa Indonesia Di Daerah Istimewa Yogyakarta Pada Tahun Pelajaran 2008/2009</article-title><source>Jurnal Prima Edukasia</source><volume>2</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Anggraini</surname><given-names>D.</given-names></name><name><surname>Suyata</surname><given-names>P.</given-names></name></person-group><year>2014</year><page-range>57</page-range><pub-id pub-id-type="doi">10.21831/jpe.v2i1.2644</pub-id></element-citation></ref><ref id="BIBR-5"><element-citation publication-type="article-journal"><article-title>Pengembangan instrumen pengukur higher order thinking skills matematika siswa SMA kelas X</article-title><source>PYTHAGORAS: Jurnal Pendidikan Matematika</source><volume>12</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Arifin</surname><given-names>Z.</given-names></name><name><surname>Retnawati</surname><given-names>H.</given-names></name></person-group><year>2017</year><page-range>98</page-range><pub-id pub-id-type="doi">10.21831/pg.v12i1.14058</pub-id></element-citation></ref><ref id="BIBR-6"><element-citation publication-type="article-journal"><article-title>Pemanfaatan program Iteman 3.0 untuk analisis butir soal lomba cerdas cermat teknologi informasi dan komunikasi tingkat SMA sederajat</article-title><source>Jurnal Teknologi Informasi</source><volume>XI</volume><issue>33</issue><person-group person-group-type="author"><name><surname>Arvianto</surname><given-names>I.R.</given-names></name></person-group><year>2016</year><fpage>1</fpage><lpage>13</lpage><page-range>1-13</page-range></element-citation></ref><ref id="BIBR-7"><element-citation publication-type="article-journal"><article-title>Quantitative Analysis Of Elementary School Students’ Curiosity and Web-Based Assessment Responses</article-title><source>Indonesian Journal on Learning and Advanced Education (IJOLAE</source><volume>5</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Asrial</surname><given-names>A.</given-names></name><name><surname>Syahrial</surname><given-names>S.</given-names></name><name><surname>Sabil</surname><given-names>H.</given-names></name><name><surname>Kurniawan</surname><given-names>D.A.</given-names></name><name><surname>Perdana</surname><given-names>R.</given-names></name><name><surname>Nawahdani</surname><given-names>A.M.</given-names></name><name><surname>Widodi</surname><given-names>B.</given-names></name><name><surname>Rahmi</surname><given-names>R.</given-names></name><name><surname>Nyirahabimana</surname><given-names>P.</given-names></name></person-group><year>2023</year><fpage>107</fpage><lpage>119</lpage><page-range>107-119</page-range><pub-id pub-id-type="doi">10.23917/ijolae.v5i2.21646</pub-id></element-citation></ref><ref id="BIBR-8"><element-citation publication-type="article-journal"><article-title>Analisis Kualitas Dan Tingkat Kognitif Soal Matematika Penilaian Akhir Semester (Pas) Ganjil Kelas Ix Di Smp N 10 Kota Bengkulu Tahun 2020/2021</article-title><source>Journal Mathematics Education Sigma [JMES</source><volume>2</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Azizah</surname><given-names>N.</given-names></name><name><surname>Sumardi</surname><given-names>H.</given-names></name></person-group><year>2021</year><pub-id pub-id-type="doi">10.30596/jmes.v2i2.7936</pub-id></element-citation></ref><ref id="BIBR-9"><element-citation publication-type="chapter"><article-title>Validitas dan Reliabilitas Kuesioner Pengetahuan , Sikap dan Perilaku Pencegahan Demam Berdarah</article-title><source>Seminar Nasional Keperawatan Universitas Muhammadiyah Surakarta (SEMNASKEP</source><person-group person-group-type="author"><name><surname>Dewi</surname><given-names>S.K.</given-names></name><name><surname>Sudaryanto</surname><given-names>A.</given-names></name></person-group><year>2020</year><fpage>73</fpage><lpage>79</lpage><page-range>73-79</page-range></element-citation></ref><ref id="BIBR-10"><element-citation publication-type="article-journal"><article-title>Tes Klasik Dan Model Rasch</article-title><source>Indonesian Journal of Educational Research and Review</source><volume>3</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Erfan</surname><given-names>M.</given-names></name><name><surname>Maulyda</surname><given-names>M.A.</given-names></name><name><surname>Hidayati</surname><given-names>V.R.</given-names></name><name><surname>Astria</surname><given-names>F.P.</given-names></name><name><surname>Ratu</surname><given-names>T.</given-names></name></person-group><year>2020</year><fpage>11</fpage><lpage>19</lpage><page-range>11-19</page-range></element-citation></ref><ref id="BIBR-11"><element-citation publication-type="article-journal"><article-title>Analisis Kualitas Soal Ujian Statistika Menggunakan Classical Test Theory dan Rasch Model</article-title><source>Square : Journal of Mathematics and Mathematics Education</source><volume>2</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Fernanda</surname><given-names>J.W.</given-names></name><name><surname>Hidayah</surname><given-names>N.</given-names></name></person-group><year>2020</year><page-range>49</page-range><pub-id pub-id-type="doi">10.21580/square.2020.2.1.5363</pub-id></element-citation></ref><ref id="BIBR-12"><element-citation publication-type="article-journal"><article-title>Analisis Soal Ujian Akhir Semester Pada Mata Pelajaran Ipa Berdasarkan Dimensi Proses Kognitif Taksonomi Anderson Dankemampuan Berpikir Kritisdi Smp Negeri 1 Nunukan Selatan</article-title><source>Biopedagogia</source><volume>2</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Fitriani</surname><given-names>F.</given-names></name><name><surname>Ibrahim</surname><given-names>I.</given-names></name><name><surname>Nugroho</surname><given-names>E.D.</given-names></name></person-group><year>2020</year><fpage>37</fpage><lpage>43</lpage><page-range>37-43</page-range><pub-id pub-id-type="doi">10.35334/biopedagogia.v2i1.1716</pub-id></element-citation></ref><ref id="BIBR-13"><element-citation publication-type="article-journal"><article-title>Meningkatkan Konsentrasi Belajar Peserta Didik dengan Bimbingan Klasikal Metode Cooperative Learning Tipe Jigsaw</article-title><source>Magistrorum et Scholarium: Jurnal Pengabdian Masyarakat</source><volume>1</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Fridaram</surname><given-names>O.</given-names></name><name><surname>Istharini</surname><given-names>E.</given-names></name><name><surname>Cicilia</surname><given-names>P.G.C.</given-names></name><name><surname>Nuryani</surname><given-names>A.</given-names></name><name><surname>Wibowo</surname><given-names>D.H.</given-names></name></person-group><year>2021</year><fpage>161</fpage><lpage>170</lpage><page-range>161-170</page-range><pub-id pub-id-type="doi">10.24246/jms.v1i22020p161-170</pub-id></element-citation></ref><ref id="BIBR-14"><element-citation publication-type="article-journal"><article-title>Perbandingan Tingkat Kesukaran, Daya Pembeda Butir Soal Dan Reliabilitas Tes Bentuk Pilihan Ganda Biasa Dan Pilihan Ganda Asosiasi Mata Pelajaran Ekonomi</article-title><source>SOSIO E-KONS</source><volume>6</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Hanifah</surname><given-names>N.</given-names></name></person-group><year>2014</year><fpage>41</fpage><lpage>55</lpage><page-range>41-55</page-range></element-citation></ref><ref id="BIBR-15"><element-citation publication-type="article-journal"><article-title>Developing Project-Based Learning-Based eBook “Critical and Creative Reading” to Improve Students’ Critical Thinking Skills Riswanda</article-title><source>Jurnal Kependidikan: Jurnal Hasil Penelitian Dan Kajian Kepustakaan Di Bidang Pendidikan, Pengajaran Dan Pembelajaran</source><volume>10</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Himawan</surname><given-names>Suyata</given-names></name><name name-style="given-only"><given-names>K.</given-names></name></person-group><year>2024</year><fpage>392</fpage><lpage>404</lpage><page-range>392-404</page-range></element-citation></ref><ref id="BIBR-16"><element-citation publication-type="article-journal"><article-title>Analisis butir soal latihan penilaian akhir semester ganjil mata pelajaran bahasa Indonesia kelas VIII SMPN 1 Bambanglipuro Bantul menggunakan program ITEMAN ( Analysis of exercise items for odd semester end of semester Indonesian language subjects class</article-title><source>Kembara: Jurnal Keilmuan Bahasa, Sastra, Dan Pengajarannya</source><volume>8</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Himawan</surname><given-names>R.</given-names></name><name><surname>Nurgiyantoro</surname><given-names>B.</given-names></name></person-group><year>2022</year><fpage>160</fpage><lpage>180</lpage><page-range>160-180</page-range></element-citation></ref><ref id="BIBR-17"><element-citation publication-type="article-journal"><source>DEVELOPING HOTS QUESTIONS : EVALUATING PERSUASIVE SPEECH TEXT LEARNING IN GRADE IX OF JUNIOR HIGH SCHOOL. Jurnal Gramatika: Jurnal Penelitian Pendidikan Bahasa Dan Sastra Indonesia</source><volume>8</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Himawan</surname><given-names>R.</given-names></name><name><surname>Suyata</surname><given-names>P.</given-names></name></person-group><year>2022</year><fpage>50</fpage><lpage>64</lpage><page-range>50-64</page-range></element-citation></ref><ref id="BIBR-18"><element-citation publication-type="article-journal"><article-title>Validitas dan Reliabilitas dalam Analisis Butir Soal</article-title><source>Al-Mu’Arrib: Journal of Arabic Education</source><volume>1</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Ida</surname><given-names>F.F.</given-names></name><name><surname>Musyarofah</surname><given-names>A.</given-names></name></person-group><year>2021</year><fpage>34</fpage><lpage>44</lpage><page-range>34-44</page-range><pub-id pub-id-type="doi">10.32923/al-muarrib.v1i1.2100</pub-id></element-citation></ref><ref id="BIBR-19"><element-citation publication-type="article-journal"><article-title>Analisis kualitas soal di perguruan tinggi berbasis aplikasi TAP</article-title><source>Jurnal Penelitian Dan Evaluasi Pendidikan</source><volume>22</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Iskandar</surname><given-names>A.</given-names></name><name><surname>Rizal</surname><given-names>M.</given-names></name></person-group><year>2018</year><fpage>12</fpage><lpage>23</lpage><page-range>12-23</page-range><pub-id pub-id-type="doi">10.21831/pep.v22i1.15609</pub-id></element-citation></ref><ref id="BIBR-20"><element-citation publication-type="article-journal"><article-title>Analisis Butir Soal Ulangan Akhir Semester Gasal Mata Pelajaran IPS Sekolah Dasar (Analysis of Odd Semester Final Test Items in Elementary School of Social Studies Subjects</article-title><source>Journal of Elementary Education</source><volume>4</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Kurniawan</surname><given-names>T.</given-names></name></person-group><year>2015</year><fpage>1</fpage><lpage>6</lpage><page-range>1-6</page-range></element-citation></ref><ref id="BIBR-21"><element-citation publication-type="article-journal"><article-title>Enhancing Learning Outcomes: A Study on the Development of Higher Order Thinking Skills based Evaluation Instruments for Work and Energy in High School Physics</article-title><source>Indonesian Journal on Learning and Advanced Education (IJOLAE</source><volume>6</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Kusumaningtyas</surname><given-names>D.A.</given-names></name><name><surname>Manyunu</surname><given-names>M.</given-names></name><name><surname>Kurniasari</surname><given-names>E.</given-names></name><name><surname>Awalin</surname><given-names>A.N.</given-names></name><name><surname>Rahmaniati</surname><given-names>R.</given-names></name><name><surname>Febriyanti</surname><given-names>A.</given-names></name></person-group><year>2024</year><fpage>14</fpage><lpage>31</lpage><page-range>14-31</page-range><pub-id pub-id-type="doi">10.23917/ijolae.v6i1.23125</pub-id></element-citation></ref><ref id="BIBR-22"><element-citation publication-type="article-journal"><article-title>Analisis Validitas, Reliabilitas, Tingkat Kesulitan Dan Daya Beda Butir Soal Ujian Akhir Semester Tema 7 Kelas III SDN Karet 1 Sepatan</article-title><source>BINTANG : Jurnal Pendidikan Dan Sains</source><volume>3</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Magdalena</surname><given-names>I.</given-names></name><name><surname>Fauziah</surname><given-names>S.N.</given-names></name><name><surname>Faziah</surname><given-names>S.N.</given-names></name><name><surname>Nupus</surname><given-names>F.S.</given-names></name></person-group><year>2021</year><fpage>198</fpage><lpage>214</lpage><page-range>198-214</page-range></element-citation></ref><ref id="BIBR-23"><element-citation publication-type="article-journal"><article-title>Evaluating the philosophical foundation of 2013 Curriculum</article-title><source>Jurnal Penelitian Dan Evaluasi Pendidikan</source><volume>21</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Mardiana</surname><given-names>M.</given-names></name><name><surname>Suyata</surname><given-names>P.</given-names></name></person-group><year>2017</year><fpage>175</fpage><lpage>188</lpage><page-range>175-188</page-range><pub-id pub-id-type="doi">10.21831/pep.v21i2.13336</pub-id></element-citation></ref><ref id="BIBR-24"><element-citation publication-type="article-journal"><article-title>Pengembangan Instrumen Soal HOTS (High Order Thinking Skill) Pada Mata Kuliah Fisika Dasar 1</article-title><source>Jurnal Pendidikan Fisika</source><volume>8</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Martin</surname><given-names>T.I.H.</given-names></name></person-group><year>2020</year><fpage>18</fpage><lpage>21</lpage><page-range>18-21</page-range></element-citation></ref><ref id="BIBR-25"><element-citation publication-type="article-journal"><article-title>Problematika Pembelajaran Tematik Terpadu di Min III Bondowoso</article-title><source>Indonesian Journal of Islamic Teaching</source><volume>1</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Muhith</surname><given-names>A.</given-names></name></person-group><year>2018</year><fpage>45</fpage><lpage>61</lpage><page-range>45-61</page-range></element-citation></ref><ref id="BIBR-26"><element-citation publication-type="article-journal"><article-title>Intelligent computational model to determine the order of thinking skills of test items</article-title><source>ICIC Express Letters</source><volume>15</volume><issue>9</issue><person-group person-group-type="author"><name><surname>Mustafidah</surname><given-names>H.</given-names></name><name><surname>Hartati</surname><given-names>S.</given-names></name><name><surname>Wardoyo</surname><given-names>R.</given-names></name><name><surname>Suyata</surname><given-names>P.</given-names></name></person-group><year>2021</year><fpage>999</fpage><lpage>1006</lpage><page-range>999-1006</page-range><pub-id pub-id-type="doi">10.24507/icicel.15.09.999</pub-id></element-citation></ref><ref id="BIBR-27"><element-citation publication-type="article-journal"><article-title>Pengembangan Instrumen Penilaian Kognitif untuk Mengukur Literasi Membaca Bahasa Indonesia Berbasis Model Pirls pada Siswa Kelas IV SD</article-title><source>Journal of Educational Research and Evaluation Sejarah Artikel</source><volume>6</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Nanda Pratiwiningtyas</surname><given-names>B.</given-names></name><name><surname>Susilaningsih</surname><given-names>E.</given-names></name><name><surname>Made Sudana</surname><given-names>I.</given-names></name></person-group><year>2017</year><fpage>1</fpage><lpage>9</lpage><page-range>1-9</page-range></element-citation></ref><ref id="BIBR-28"><element-citation publication-type="article-journal"><article-title>Analisis Butir Soal Tes Kendali Mutu Kelas XII Sma Mata Pelajaran Ekonomi Akuntansi Di Kota Yogyakarta</article-title><source>Jurnal Pendidikan Akuntansi Indonesia</source><volume>X</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Nayla Amalia</surname><given-names>A.</given-names></name><name><surname>Widayati</surname><given-names>A.</given-names></name></person-group><year>2012</year><fpage>1</fpage><lpage>26</lpage><page-range>1-26</page-range></element-citation></ref><ref id="BIBR-29"><element-citation publication-type="article-journal"><article-title>Penilaian Pembelajaran Bahasa Berbasis Kompetensi</article-title><source>BPFE-Yogyakarta</source><person-group person-group-type="author"><name><surname>Nurgiyantoro</surname><given-names>B.</given-names></name></person-group><year>2016</year></element-citation></ref><ref id="BIBR-30"><element-citation publication-type="article-journal"><article-title>Konstruk Asesmen Literasi Fungsional Untuk Siswa Sekolah Menengah Pertama</article-title><source>Litera</source><volume>19</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Nurgiyantoro</surname><given-names>B.</given-names></name><name><surname>Lestyarini</surname><given-names>B.</given-names></name><name><surname>Rahayu</surname><given-names>D.H.</given-names></name></person-group><year>2020</year><fpage>194</fpage><lpage>211</lpage><page-range>194-211</page-range><pub-id pub-id-type="doi">10.21831/ltr.v19i2.32977</pub-id></element-citation></ref><ref id="BIBR-31"><element-citation publication-type="article-journal"><article-title>Hubungan Antara Validitas Item Dengan Daya Pembeda Dan Tingkat Kesukaran Soal Pilihan Ganda Pas</article-title><source>Natural Science Education Research</source><volume>4</volume><issue>3</issue><person-group person-group-type="author"><name><surname>Nurhalimah</surname><given-names>S.</given-names></name><name><surname>Hidayati</surname><given-names>Y.</given-names></name><name><surname>Rosidi</surname><given-names>I.</given-names></name><name><surname>Hadi</surname><given-names>W.P.</given-names></name></person-group><year>2022</year><fpage>249</fpage><lpage>257</lpage><page-range>249-257</page-range><pub-id pub-id-type="doi">10.21107/nser.v4i3.8682</pub-id></element-citation></ref><ref id="BIBR-32"><element-citation publication-type="article-journal"><article-title>Analisis Iteman dan Model Rasch pada Pengembangan Instrumen Kemampuan Berpikir Kritis Peserta Didik Sekolah Menengah Kejuruan</article-title><source>Jurnal Inovasi Pendidikan IPA</source><volume>4</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Nuryanti</surname><given-names>S.</given-names></name><name><surname>Masykuri</surname><given-names>M.</given-names></name><name><surname>Susilowati</surname><given-names>E.</given-names></name></person-group><year>2018</year><fpage>224</fpage><lpage>233</lpage><page-range>224-233</page-range></element-citation></ref><ref id="BIBR-33"><element-citation publication-type="article-journal"><article-title>Kualitas butir soal try out uji pengetahuan dalam memprediksi tingkat kelulusan mahasiswa PPG</article-title><source>Jurnal Pendidikan Profesi</source><volume>1</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Pangesti</surname><given-names>F.</given-names></name><name><surname>Fauzan</surname><given-names>F.</given-names></name><name><surname>Risnawati</surname><given-names>R.</given-names></name></person-group><year>2020</year><fpage>91</fpage><lpage>98</lpage><page-range>91-98</page-range></element-citation></ref><ref id="BIBR-34"><element-citation publication-type="article-journal"><article-title>Implementasi Pembelajaran Menulis Teks Eksplanasi Kompleks Pada Siswa Kelas XI SMAN 10 Yogyakarta Dengan Menggunakan Strategi Writing a Story Based on a Picture / Photograph</article-title><source>Rumpun Jurnal Persatuan Melayu</source><volume>8</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Parancika</surname><given-names>R.B.</given-names></name><name><surname>Suyata</surname><given-names>P.</given-names></name></person-group><year>2020</year><fpage>13</fpage><lpage>25</lpage><page-range>13-25</page-range></element-citation></ref><ref id="BIBR-35"><element-citation publication-type="article-journal"><article-title>Fostering Effective Teaching Practices: Integrating Formative Assessment and Mentorship in Indonesian Preservice Teacher Education</article-title><source>Indonesian Journal on Learning and Advanced Education</source><volume>6</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Prastikawati</surname><given-names>E.F.</given-names></name><name><surname>Adeoye</surname><given-names>M.A.</given-names></name><name><surname>Ryan</surname><given-names>J.C.</given-names></name></person-group><year>2024</year><fpage>230</fpage><lpage>253</lpage><page-range>230-253</page-range><pub-id pub-id-type="doi">10.23917/ijolae.v6i2.23431</pub-id></element-citation></ref><ref id="BIBR-36"><element-citation publication-type="article-journal"><article-title>Penyusunan Instrumen Penilaian Kognitif Berbasis HOTS Melalui Problem Based Learning dan Peer Assessment</article-title><source>Indonesian Language Education and Literature</source><volume>7</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Pujiastuti</surname><given-names>R.</given-names></name><name><surname>Kulup</surname><given-names>L.I.</given-names></name></person-group><year>2021</year><page-range>88</page-range><pub-id pub-id-type="doi">10.24235/ileal.v7i1.9058</pub-id></element-citation></ref><ref id="BIBR-37"><element-citation publication-type="article-journal"><article-title>Analisis Butir Soal Ujian Sekolah Mata Pelajaran Kimia SMA N 1 Kutowinangun Tahun Pelajaran 2019/2022 Menggunakan Model Iteman dan Rasch</article-title><source>Jurnal Pendidikan Kimia</source><volume>10</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Purniasari</surname><given-names>L.</given-names></name><name><surname>Masykuri</surname><given-names>M.</given-names></name><name><surname>Ariani</surname><given-names>S.R.D.</given-names></name></person-group><year>2021</year><fpage>205</fpage><lpage>214</lpage><page-range>205-214</page-range></element-citation></ref><ref id="BIBR-38"><element-citation publication-type="article-journal"><article-title>Pengaruh Kesiapan Belajar Terhadap Keaktifan Peserta Didik dalam Proses Pembelajaran AUD</article-title><source>Golden Age: Jurnal Ilmiah Tumbuh Kembang Anak Usia Dini</source><volume>3</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Putri Pangestu</surname><given-names>D.</given-names></name><name><surname>Rohinah</surname><given-names>R.</given-names></name></person-group><year>2019</year><fpage>81</fpage><lpage>90</lpage><page-range>81-90</page-range><pub-id pub-id-type="doi">10.14421/jga.2018.32-02</pub-id></element-citation></ref><ref id="BIBR-39"><element-citation publication-type="article-journal"><article-title>Efektivitas Analisis Butir Menggunakan Anajohn, Anates dan Iteman Studi Soal USBN Pelajaran Sejarah Kota Padang</article-title><source>Jurnal Mahasiswa Ilmu Sejarah Dan Pendidikan</source><volume>1</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Putri</surname><given-names>R.H.</given-names></name><name name-style="given-only"><given-names>Ofianto</given-names></name></person-group><year>2019</year><fpage>1</fpage><lpage>11</lpage><page-range>1-11</page-range></element-citation></ref><ref id="BIBR-40"><element-citation publication-type="book"><article-title>Validitas, Realibilitas &amp; Karakteristik Butir</article-title><person-group person-group-type="author"><name><surname>Retnawati</surname><given-names>H.</given-names></name></person-group><year>2015</year><publisher-name>Parama Publishing</publisher-name></element-citation></ref><ref id="BIBR-41"><element-citation publication-type="article-journal"><article-title>ANALISIS BUTIR SOAL PENILAIAN TENGAH SEMESTER MATA PELAJARAN PPKn KELAS VII DI SMP MUHAMMADIYAH 7 YOGYAKARTA</article-title><source>Academy of Education Journal</source><volume>11</volume><issue>01</issue><person-group person-group-type="author"><name><surname>Rotama</surname><given-names>A.D.</given-names></name><name><surname>Budiutomo</surname><given-names>T.W.</given-names></name><name><surname>Bowo</surname><given-names>A.N.A.</given-names></name></person-group><year>2020</year><fpage>24</fpage><lpage>35</lpage><page-range>24-35</page-range><pub-id pub-id-type="doi">10.47200/aoej.v11i01.314</pub-id></element-citation></ref><ref id="BIBR-42"><element-citation publication-type="article-journal"><article-title>La Evaluación: Una estrategia para desarrollar Aprendizajes Profundos en el estudiante</article-title><source>Boletín Redipe</source><volume>7</volume><person-group person-group-type="author"><name><surname>Ruay Garcés</surname><given-names>R.</given-names></name></person-group><year>2018</year><fpage>47</fpage><lpage>62</lpage><page-range>47-62</page-range></element-citation></ref><ref id="BIBR-43"><element-citation publication-type="article-journal"><article-title>ANALISIS BUTIR SOAL PENILAIAN AKHIR TAHUN (PAT) BAHASA INDONESIA KELAS XI SMA NEGERI 1 POLANHARJO KLATEN</article-title><source>Lingua Rima</source><volume>11</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Setiawan</surname><given-names>K.E.P.</given-names></name><name><surname>Yudha</surname><given-names>R.K.</given-names></name><name><surname>Arwansyah</surname><given-names>Y.B.</given-names></name></person-group><year>2022</year><fpage>25</fpage><lpage>33</lpage><page-range>25-33</page-range></element-citation></ref><ref id="BIBR-44"><element-citation publication-type="article-journal"><article-title>Pendeteksian DIF pada Perangkat Tes Objektif Penilaian Akhir Semester IPA dengan Mengunakan Permodelan Rasch</article-title><source>Pancasakti Science Education Journal</source><volume>5</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Setiawan</surname><given-names>M.A.</given-names></name><name><surname>Susongko</surname><given-names>P.</given-names></name><name><surname>Hayati</surname><given-names>M.N.</given-names></name></person-group><year>2020</year><fpage>23</fpage><lpage>29</lpage><page-range>23-29</page-range></element-citation></ref><ref id="BIBR-45"><element-citation publication-type="article-journal"><article-title>Analisis Butir Soal Ujian Tengah Semester Ganjil Seni Budaya Kelas VII Di SMPN 29 Sijunjung</article-title><source>Journal of Chemical Information and Modeling</source><volume>53</volume><issue>9</issue><person-group person-group-type="author"><name><surname>Shanta Monica</surname><given-names>Y.sudarman</given-names></name></person-group><year>2013</year><fpage>1689</fpage><lpage>1699</lpage><page-range>1689-1699</page-range></element-citation></ref><ref id="BIBR-46"><element-citation publication-type="article-journal"><article-title>Kualitas Tes Bahasa Arab dan Prestasi Peserta Didik Madrasah Tsanawiyah Kabupaten Bantul ( Analisis Butir Soal UAMBN</article-title><source>Jurnal Pendidikan Madrasah</source><volume>2</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Suharti</surname><given-names>S.</given-names></name></person-group><year>2017</year><fpage>185</fpage><lpage>196</lpage><page-range>185-196</page-range></element-citation></ref><ref id="BIBR-47"><element-citation publication-type="article-journal"><article-title>Analisis Validitas Reabilitas Tingkat Kesukaran dan Daya Beda pada Butir Soal Ujian Akhir Semester Ganjil Mata Pelajaran Matematika</article-title><source>Al-Jabar: Jurnal Pendidikan Matematika</source><volume>6</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Susanto</surname><given-names>H.</given-names></name><name><surname>Rinaldi</surname><given-names>A.</given-names></name><name name-style="given-only"><given-names>Novalia</given-names></name></person-group><year>2015</year><page-range>343</page-range></element-citation></ref><ref id="BIBR-48"><element-citation publication-type="article-journal"><article-title>Validitas dan Reliabilitas Instrumen Penilaian Pada Mata Pelajaran Bahasa Arab</article-title><source>Jurnal Kajian Perbatasa Antarnegara</source><volume>3</volume><issue>2</issue><person-group person-group-type="author"><name name-style="given-only"><given-names>Syaifudin</given-names></name></person-group><year>2020</year><fpage>106</fpage><lpage>118</lpage><page-range>106-118</page-range></element-citation></ref><ref id="BIBR-49"><element-citation publication-type="article-journal"><article-title>Analisis Soal-Soal Pada Buku Ajar Matematika Siswa Kelas XI Ditinjau Dari Aspek Kognitif</article-title><source>Jurnal Cendekia : Jurnal Pendidikan Matematika</source><volume>4</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Syarifah</surname><given-names>L.L.</given-names></name><name><surname>Yenni</surname><given-names>Y.</given-names></name><name><surname>Dewi</surname><given-names>W.K.</given-names></name></person-group><year>2020</year><fpage>1259</fpage><lpage>1272</lpage><page-range>1259-1272</page-range><pub-id pub-id-type="doi">10.31004/cendekia.v4i2.335</pub-id></element-citation></ref><ref id="BIBR-50"><element-citation publication-type="chapter"><source>PENGEMBANGAN SOAL LITERASI MEMBACA MODEL PISA DEVELOPMENT OF PISA MODEL READING LITERACY QUESTIONS BASED ON</source><volume>7</volume><person-group person-group-type="author"><name><surname>Timor</surname><given-names>U.</given-names></name><name><surname>Bahasa</surname><given-names>P.</given-names></name><name><surname>Km</surname><given-names>J.</given-names></name><name><surname>Sasi</surname><given-names>K.</given-names></name></person-group><year>2022</year><fpage>42</fpage><lpage>50</lpage><page-range>42-50</page-range></element-citation></ref><ref id="BIBR-51"><element-citation publication-type="article-journal"><article-title>Hubungan Kemampuan Berpikir Kreatif Terhadap Hasil Belajar Mahasiswa</article-title><source>Matematika</source><volume>17</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Wahyuni</surname><given-names>A.</given-names></name><name><surname>Kurniawan</surname><given-names>P.</given-names></name></person-group><year>2018</year><fpage>1</fpage><lpage>8</lpage><page-range>1-8</page-range><pub-id pub-id-type="doi">10.29313/jmtm.v17i2.4114</pub-id></element-citation></ref><ref id="BIBR-52"><element-citation publication-type="article-journal"><article-title>Analisis Butir Soal Persiapan Ujian Nasional Ipa Smp/Mts Tahun 2018 Sampai Dengan 2019 Berdasarkan Taksonomi Bloom</article-title><source>LENSA (Lentera Sains): Jurnal Pendidikan IPA</source><volume>9</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Wijaya</surname><given-names>A.</given-names></name><name><surname>Eresti</surname><given-names>A.</given-names></name><name><surname>Despa</surname><given-names>D.</given-names></name><name><surname>Walid</surname><given-names>A.</given-names></name></person-group><year>2019</year><fpage>57</fpage><lpage>63</lpage><page-range>57-63</page-range><pub-id pub-id-type="doi">10.24929/lensa.v9i2.78</pub-id></element-citation></ref><ref id="BIBR-53"><element-citation publication-type="article-journal"><article-title>Validitas isi: tahap awal pengembangan kuesioner. Jurnal Riset Manajemen Dan Bisnis (JRMB</article-title><source>Fakultas Ekonomi UNIAT</source><volume>2</volume><issue>2</issue><person-group person-group-type="author"><name><surname>Yadi</surname><given-names>H.</given-names></name></person-group><year>2017</year><fpage>169</fpage><lpage>178</lpage><page-range>169-178</page-range></element-citation></ref><ref id="BIBR-54"><element-citation publication-type="article-journal"><article-title>Pengembangan Instrumen Penilaian Tes Berbasis HOTS Pada Jenjang Pendidikan Dasar Di Era Society 5.0</article-title><source>Al-Madrasah: Jurnal Pendidikan Madrasah Ibtidaiyah</source><volume>7</volume><issue>1</issue><person-group person-group-type="author"><name><surname>Yusmilda</surname><given-names>Y.</given-names></name><name><surname>Budi</surname><given-names>I.S.</given-names></name><name><surname>Zuhad</surname><given-names>H.</given-names></name></person-group><year>2023</year><page-range>429</page-range><pub-id pub-id-type="doi">10.35931/am.v7i1.1885</pub-id></element-citation></ref></ref-list></back></article>
