diff --git a/bibliography.bib b/bibliography.bib
index d224e5d..9acac32 100644
--- a/bibliography.bib
+++ b/bibliography.bib
@@ -6,7 +6,6 @@
   journal = {{International Journal of Information}},
   volume = {Volume: 2, Number: 2},
   number = {Volume: 2, Number: 2},
-  url = {https://iugspace.iugaza.edu.ps/handle/20.500.12358/25066},
   urldate = {2021-09-20},
   abstract = {Educational data mining concerns with developing methods for discovering knowledge from data that come from educational domain. In this paper we used educational data mining to improve graduate students' performance, and overcome the problem of low grades of graduate students. In our case study we try to extract useful knowledge from graduate students data collected from the college of Science and Technology--Khanyounis. The data include fifteen years period [1993-2007]. After preprocessing the data, we applied data mining techniques to discover association, classification, clustering and outlier detection rules. In each of these four tasks, we present the extracted knowledge and describe its importance in educational domain.},
   copyright = {Creative Commons (CC-BY)},
@@ -26,7 +25,6 @@
   pages = {40},
   issn = {2365-9440},
   doi = {10.1186/s41239-019-0172-z},
-  url = {https://doi.org/10.1186/s41239-019-0172-z},
   urldate = {2024-02-14},
   abstract = {In the current study interaction data of students in an online learning setting was used to research whether the academic performance of students at the end of term could be predicted in the earlier weeks. The study was carried out with 76\,second-year university students registered in a Computer Hardware course. The study aimed to answer two principle questions: which algorithms and features best predict the end of term academic performance of students by comparing different classification algorithms and pre-processing techniques and whether or not academic performance can be predicted in the earlier weeks using these features and the selected algorithm. The results of the study indicated that the kNN algorithm accurately predicted unsuccessful students at the end of term with a rate of 89\%. When findings were examined regarding the analysis of data obtained in weeks 3, 6, 9, 12,~and 14~to predict whether the end-of-term academic performance of students could be predicted in the earlier weeks, it was observed that students who were unsuccessful at the end of term could be predicted with a rate of 74\% in as short as 3\,weeks' time. The findings obtained from this study are important for the determination of features for early warning systems that can be developed for online learning systems and as indicators of student success. At the same time, it will aid researchers in the selection of algorithms and pre-processing techniques in the analysis of educational data.},
   langid = {english},
@@ -45,7 +43,6 @@
   pages = {334--345},
   issn = {0360-1315},
   doi = {10.1016/j.compedu.2018.07.021},
-  url = {https://www.sciencedirect.com/science/article/pii/S0360131518302045},
   urldate = {2022-08-16},
   abstract = {This study presents a large-scale systematic review of the literature on the flipped classroom, with the goals of examining its reported advantages and challenges for both students and instructors, and to note potentially useful areas of future research on the flipped model's in and out-of-class activities. The full range of Social Sciences Citation Indexed journals was surveyed through the Web of Science site, and a total of 71 research articles were selected for the review. The findings reveal that the most frequently reported advantage of the flipped classroom is the improvement of student learning performance. We also found a number of challenges in this model. The majority of these are related to out-of-class activities, such as much reported inadequate student preparation prior to class. Several other challenges and the numerous advantages of the flipped classroom are discussed in detail. We then offer suggestions for future research on flipped model activities.},
   langid = {english},
@@ -65,7 +62,6 @@
   publisher = {Routledge},
   issn = {0899-3408},
   doi = {10.1080/08993400500150747},
-  url = {https://doi.org/10.1080/08993400500150747},
   urldate = {2022-08-16},
   abstract = {Practical programming is one of the basic skills pursued in computer science education. On programming courses, the coursework consists of programming assignments that need to be assessed from different points of view. Since the submitted assignments are executable programs with a formal structure, some features can be assessed automatically. The basic requirement for automated assessment is the numerical measurability of assessment targets, but semiautomatic approaches can overcome this restriction. Recognizing automatically assessable features can help teachers to create educational models, where automatic tools let teachers concentrate their work on the learning issues that need student-teacher interaction the most. Several automatic tools for both static and dynamic assessment of computer programs have been reported in the literature. This article promotes these issues by surveying several automatic approaches for assessing programming assignments. Not all the existing tools will be covered, simply because of the vast number of them. The article concentrates on bringing forward different assessment techniques and approaches to give an interested reader starting points for finding further information in the area. Automatic assessment tools can be used to help teachers in grading tasks as well as to support students' working process with automatic feedback. Common advantages of automation are the speed, availability, consistency and objectivity of assessment. However, automatic tools emphasize the need for careful pedagogical design of the assignment and assessment settings. To effectively share the knowledge and good assessment solutions already developed, better interoperability and portability of the tools is needed.}
 }
@@ -78,7 +74,6 @@
   month = dec,
   pages = {631--637},
   doi = {10.1109/CSCI46756.2018.00126},
-  url = {https://ieeexplore.ieee.org/abstract/document/8947659},
   urldate = {2023-10-02},
   abstract = {With the advancement of today's ubiquitous technology, and due to the increasing number of technologies supported by the Internet, a variety of Online Learning Platforms have rapidly grown as modern learning methods. This fast-emerging learning option interests researchers to study and investigate the main features and functionality of the most popular Online Learning Platforms. This paper surveys the state-of-the-art Online Learning Platforms that aim to teach computer programming, in terms of principles, design, and implementations. In addition, the paper investigates the feasibility of incorporating human-oriented Situation-Awareness as the driving factor to facilitate the delivery of improved user learning experiences.},
   file = {/home/charlotte/sync/Zotero/storage/T767PWYN/Albashaireh and Ming - 2018 - A Survey of Online Learning Platforms with Initial.pdf;/home/charlotte/sync/Zotero/storage/P34UIQEK/8947659.html}
@@ -109,7 +104,6 @@
   pages = {1--13},
   issn = {1468-2389},
   doi = {10.1111/j.1468-2389.2008.00404.x},
-  url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/j.1468-2389.2008.00404.x},
   urldate = {2021-04-30},
   abstract = {This paper reports reactions to employee selection methods in the Netherlands and compares these findings internationally against six other previously published samples covering the United States, France, Spain, Portugal, and Singapore. A sample of 167 participants rated 10 popular assessment techniques using a translated version of Steiner and Gilliland's measure. In common with other country samples, we found that the most popular methods among applicants were interviews, work sample tests, and resumes. Least popular methods were graphology, personal contacts, and honesty and integrity tests. Generally, method favorability was found to be highly similar to the US and other published studies internationally. Across the six countries mean process favorability correlated at .87 and mean cross-national procedural justice correlated .68. Process dimension ratings correlated at between .79 and .97 between the United States and the Netherlands. Only medium effect size differences (Cohen's d) were found between Dutch and US reactions to resumes and personality tests, the former being more favorably rated in the United States (d=.62) and the latter being more positively rated in the Netherlands (d=-.76). Implications for the design of selection procedures are discussed, especially implications for likely similarities and differences in applicant reactions internationally.},
   langid = {english},
@@ -128,7 +122,6 @@
   pages = {2302--2315},
   issn = {2620-5440},
   doi = {10.11591/ijere.v12i4.25859},
-  url = {https://ijere.iaescore.com/index.php/IJERE/article/view/25859},
   urldate = {2023-12-04},
   abstract = {The university learning style worldwide was analyzed to obtain a model adapted to Peru, that was complemented in the initial part with the study of the biliometric analysis. In the first steps that were developed, the information search was done in a general way with Scopus. Then specifically adding the Dimensions database, obtaining 59 items from the selection. The Prism statement was used, which allowed it to be developed in the methodology sequentially until the selected articles were obtained. The objective was to carry out a study of the systematic review of the literature (RSL) that allowed analysis by categories such as academic performance, teaching strategy and competencies related to the learning style. Where the data obtained was with the use of VOSviewer and Rstudio. The result obtained was an innovative model that relates the categories with the most relevant models that studied the learning style. As a conclusion, the different learning styles can be adapted to the different study programs and their different courses to plan it from the macrocurricular to the microcurricular, taking into account the strategy and the didactics of teaching, the contribution for the university sector.},
   copyright = {Copyright (c) 2023 Institute of Advanced Engineering and Science},
@@ -148,7 +141,6 @@
   pages = {2754--2763},
   publisher = {{The Institute of Electronics, Information and Communication Engineers}},
   issn = {, 0916-8532},
-  url = {https://search.ieice.org/bin/summary.php?id=e87-d_12_2754&category=D&year=2004&lang=E&abst=},
   urldate = {2022-07-06},
   abstract = {In this paper, we consider a data mining problem for semi-structured data. Modeling semi-structured data as labeled ordered trees, we present an efficient algorithm for discovering frequent substructures from a large collection of semi-structured data. By extending the enumeration technique developed by Bayardo (SIGMOD'98) for discovering long itemsets, our algorithm scales almost linearly in the total size of maximal tree patterns contained in an input collection depending mildly on the size of the longest pattern. We also developed several pruning techniques that significantly speed-up the search. Experiments on Web data show that our algorithm runs efficiently on real-life datasets combined with proposed pruning techniques in the wide range of parameters.},
   file = {/home/charlotte/sync/Zotero/storage/I46PGVCC/Asai et al. - 2004 - Efficient Substructure Discovery from Large Semi-S.pdf;/home/charlotte/sync/Zotero/storage/PNH8LAUL/summary.html}
@@ -164,7 +156,6 @@
   pages = {177--194},
   issn = {0360-1315},
   doi = {10.1016/j.compedu.2017.05.007},
-  url = {https://www.sciencedirect.com/science/article/pii/S0360131517301124},
   urldate = {2021-02-19},
   abstract = {The tremendous growth in electronic data of universities creates the need to have some meaningful information extracted from these large volumes of data. The advancement in the data mining field makes it possible to mine educational data in order to improve the quality of the educational processes. This study, thus, uses data mining methods to study the performance of undergraduate students. Two aspects of students' performance have been focused upon. First, predicting students' academic achievement at the end of a four-year study programme. Second, studying typical progressions and combining them with prediction results. Two important groups of students have been identified: the low and high achieving students. The results indicate that by focusing on a small number of courses that are indicators of particularly good or poor performance, it is possible to provide timely warning and support to low achieving students, and advice and opportunities to high performing students.},
   langid = {english},
@@ -177,7 +168,6 @@
   author = {Avery, Jonathan Wardell},
   year = {2015},
   publisher = {University of Canterbury},
-  url = {https://ir.canterbury.ac.nz/handle/10092/14446},
   urldate = {2022-07-06},
   abstract = {Detection of similar programs is a highly studied problem. Detecting similar code is an important strategy for detecting badly modularized code, finding vulnerabilities due to error prone copy-paste programming methodologies, and detecting academic dishonesty in online code assignment submissions following the copy-paste-adapt-it pattern. The latter is the impetus for this work. A novel system is presented that is specifically adapted to programs that may be small, and similar by virtue of being written to solve the same problem. The system is also adapted toward specific expected behaviors of plagiarists, making use of algorithms custom built to both recognize these behaviors while satisfying hierarchical properties. A defining and novel property of the proposed method is the categorical information it provides. A hierarchy of categories with an implication relationship are leveraged in the production of descriptive, rank-able results.},
   copyright = {All Right Reserved},
@@ -194,7 +184,6 @@
   pages = {379--396},
   publisher = {John Wiley \& Sons, Ltd},
   doi = {10.1002/9781118956588.ch16},
-  url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/9781118956588.ch16},
   urldate = {2024-05-08},
   abstract = {In recent years, there has been increasing interest in using the methods of educational data mining (EDM) and learning analytics (LA) to study and measure learner cognition. In this chapter, we discuss how these types of methods can be used to measure complex cognition and meta-cognition in types of environments where inference can be challenging: exploratory and inquiry learning environments, complex games, and project-based learning. We give examples from a range of projects for the types of constructs that can be inferred using EDM/LA methods and how these measures compare to what can be obtained from more traditional methods. We conclude with a discussion of future discussion and potentials for these kinds of methods.},
   chapter = {16},
@@ -217,7 +206,6 @@
   pages = {3--17},
   issn = {2157-2100},
   doi = {10.5281/zenodo.3554657},
-  url = {https://jedm.educationaldatamining.org},
   urldate = {2021-04-30},
   copyright = {Copyright (c) 2014 JEDM - Journal of Educational Data Mining},
   langid = {english},
@@ -238,7 +226,6 @@
   publisher = {Routledge},
   issn = {1050-8406},
   doi = {10.1207/s15327809jls1301_1},
-  url = {https://doi.org/10.1207/s15327809jls1301_1},
   urldate = {2021-09-15},
   file = {/home/charlotte/sync/Zotero/storage/I5L3SPUC/Barab and Squire - 2004 - Design-Based Research Putting a Stake in the Grou.pdf;/home/charlotte/sync/Zotero/storage/AMUNRJ5E/s15327809jls1301_1.html}
 }
@@ -254,7 +241,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/1734263.1734319},
-  url = {https://doi.org/10.1145/1734263.1734319},
   urldate = {2022-02-24},
   abstract = {At Carnegie Mellon's Silicon Valley campus we employ a learn-by-doing educational approach in which nearly all student learning, and thus instruction, is in the context of realistic, team-based projects. Consequently, we have adopted coaching as our predominant teaching model. In this paper we reflect on our experience with the nature of teaching by coaching using a framework derived from Cognitive Apprenticeship, and explain how we employ the techniques it suggests in our teaching. We also discuss a range of instructional tensions that arise in teaching by coaching and present a survey of student attitudes regarding the effectiveness of our approach.},
   isbn = {978-1-4503-0006-3},
@@ -292,7 +278,6 @@
   pages = {89--111},
   issn = {1571-0661},
   doi = {10.1016/j.entcs.2005.12.014},
-  url = {https://www.sciencedirect.com/science/article/pii/S1571066106000442},
   urldate = {2022-03-03},
   abstract = {The development of large software systems is a complex and error prone process. Faults might occur at any development stage and they must be identified and removed as early as possible to stop their propagation and reduce verification costs. Quality engineers must be involved in the development process since the very early phases to identify required qualities and estimate their impact on the development process. Their tasks span over the whole development cycle and go beyond the product deployment through maintenance and post mortem analysis. Developing and enacting an effective quality process is not a simple task, but it requires that we integrate many quality-related activities with product characteristics, process organization, available resources and skills, and budget constraints. This paper discusses the main characteristics of a good quality process, then surveys the key testing phases and presents modern functional and model-based testing approaches.},
   langid = {english},
@@ -312,7 +297,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3344429.3372508},
-  url = {https://doi.org/10.1145/3344429.3372508},
   urldate = {2022-02-21},
   abstract = {Diagnostic messages generated by compilers and interpreters such as syntax error messages have been researched for over half of a century. Unfortunately, these messages which include error, warning, and run-time messages, present substantial difficulty and could be more effective, particularly for novices. Recent years have seen an increased number of papers in the area including studies on the effectiveness of these messages, improving or enhancing them, and their usefulness as a part of programming process data that can be used to predict student performance, track student progress, and tailor learning plans. Despite this increased interest, the long history of literature is quite scattered and has not been brought together in any digestible form. In order to help the computing education community (and related communities) to further advance work on programming error messages, we present a comprehensive, historical and state-of-the-art report on research in the area. In addition, we synthesise and present the existing evidence for these messages including the difficulties they present and their effectiveness. We finally present a set of guidelines, curated from the literature, classified on the type of evidence supporting each one (historical, anecdotal, and empirical). This work can serve as a starting point for those who wish to conduct research on compiler error messages, runtime errors, and warnings. We also make the bibtex file of our 300+ reference corpus publicly available. Collectively this report and the bibliography will be useful to those who wish to design better messages or those that aim to measure their effectiveness, more effectively.},
   isbn = {978-1-4503-7567-2},
@@ -332,7 +316,6 @@
   publisher = {Athabasca University Press (AU Press)},
   issn = {1492-3831},
   doi = {10.19173/irrodl.v12i3.902},
-  url = {https://www.erudit.org/en/journals/irrodl/2011-v12-n3-irrodl05132/1067617ar/},
   urldate = {2022-08-16},
   abstract = {The sociotechnical context for learning and education is dynamic and makes great demands on those trying to seize the opportunities presented by emerging technologies. The goal of this paper is to explore certain theories for our plans and actions in technology-enabled learning. Although presented as a successor to previous learning theories, connectivism alone is insufficient to inform learning and its support by technology in an internetworked world. However, because of its presence in massive open online courses (MOOCs), connectivism is influential in the practice of those who take these courses and who wish to apply it in teaching and learning. Thus connectivism is perceived as relevant by its practitioners but as lacking in rigour by its critics. Five scenarios of change are presented with frameworks of different theories to explore the variety of approaches educators can take in the contexts for change and their associated research/evaluation. I argue that the choice of which theories to use depends on the scope and purposes of the intervention, the funding available to resource the research/evaluation, and the experience and philosophical stances of the researchers/practitioners.},
   langid = {english},
@@ -350,7 +333,6 @@
   pages = {45--73},
   publisher = {Association for the Advancement of Computing in Education (AACE)},
   issn = {0731-9258},
-  url = {https://www.learntechlib.org/primary/p/8505/},
   urldate = {2022-02-24},
   abstract = {Constructivism is a theory of learning, which claims that stu-dents construct knowledge rather than merely receive and store knowledge transmitted by the teacher. Constructivism has been extremely influential in science and mathematics education, but much less so in computer science education (CSE). This paper surveys constructivism in the context of CSE, and shows how the theory can supply a theoretical ba-sis for debating issues and evaluating proposals. An analysis of constructivism in computer science education leads to two claims: (a) students do not have an effective model of...},
   langid = {english},
@@ -368,7 +350,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/2899415.2899443},
-  url = {https://dl.acm.org/doi/10.1145/2899415.2899443},
   urldate = {2023-10-05},
   abstract = {We present a framework for automatic grading of programming exercises using property-based testing, a form of model-based black-box testing. Models are developed to assess both the functional behaviour of programs and their algorithmic complexity. From the functional correctness model a large number of test cases are derived automatically. Executing them on the body of exercises gives rise to a (partial) ranking of programs, so that a program A is ranked higher than program B if it fails a strict subset of the test cases failed by B. The model for algorithmic complexity is used to compute worst-case complexity bounds. The framework moreover considers code structural metrics, such as McCabe's cyclomatic complexity, giving rise to a composite program grade that includes both functional, non-functional, and code structural aspects. The framework is evaluated in a course teaching algorithms and data structures using Java.},
   isbn = {978-1-4503-4231-5},
@@ -387,7 +368,6 @@
   pages = {32--36},
   issn = {0097-8418},
   doi = {10.1145/1272848.1272879},
-  url = {https://doi.org/10.1145/1272848.1272879},
   urldate = {2021-02-19},
   abstract = {It is a common conception that CS1 is a very difficult course and that failure rates are high. However, until now there has only been anecdotal evidence for this claim. This article reports on a survey among institutions around the world regarding failure rates in introductory programming courses. The article describes the design of the survey and the results. The number of institutions answering the call for data was unfortunately rather low, so it is difficult to make firm conclusions. It is our hope that this article can be the starting point for a systematic collection of data in order to find solid proof of the actual failure and pass rates of CS1.},
   keywords = {CS1,failure rate,introductory programming,pass rate},
@@ -406,7 +386,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/971300.971461},
-  url = {https://doi.org/10.1145/971300.971461},
   urldate = {2022-02-24},
   abstract = {The recommendations of the Joint Task Force on Computing Curricula 2001 encompass suggestions for an object-first introductory programming course. We have identified conceptual modeling as a lacking perspective in the suggestions for CS1. Conceptual modeling is the defining characteristic of object-orientation and provides a unifying perspective and a pedagogical approach focusing upon the modelling aspects of object-orientation. Reinforcing conceptual modelling as a basis for CS1 provides an appealing course structure based on core elements from a conceptual framework for object-orientation as well as a systematic approach to programming; both of these are a big help to newcomers. The approach has a very positive impact on the number of students passing the course.},
   isbn = {978-1-58113-798-9},
@@ -427,7 +406,6 @@
   publisher = {MCB UP Ltd},
   issn = {1066-2243},
   doi = {10.1108/eb047254},
-  url = {https://doi.org/10.1108/eb047254},
   urldate = {2024-02-08},
   abstract = {The World-Wide Web (W3) initiative is a practical project designed to bring a global information universe into existence using available technology. This article describes the aims, data model, and protocols needed to implement the ``web'' and compares them with various contemporary systems.},
   file = {/home/charlotte/sync/Zotero/storage/D4DFLNVS/10.1108@eb047254.pdf.pdf;/home/charlotte/sync/Zotero/storage/6X7DIDMX/html.html}
@@ -443,7 +421,6 @@
   pages = {100081},
   issn = {2666-920X},
   doi = {10.1016/j.caeai.2022.100081},
-  url = {https://www.sciencedirect.com/science/article/pii/S2666920X22000364},
   urldate = {2024-01-10},
   abstract = {Many engineering disciplines require problem-solving skills, which cannot be learned by memorization alone. Open-ended textual exercises allow students to acquire these skills. Students can learn from their mistakes when instructors provide individual feedback. However, grading these exercises is often a manual, repetitive, and time-consuming activity. The number of computer science students graduating per year has steadily increased over the last decade. This rise has led to large courses that cause a heavy workload for instructors, especially if they provide individual feedback to students. This article presents CoFee, a framework to generate and suggest computer-aided feedback for textual exercises based on machine learning. CoFee utilizes a segment-based grading concept, which links feedback to text segments. CoFee automates grading based on topic modeling and an assessment knowledge repository acquired during previous assessments. A language model builds an intermediate representation of the text segments. Hierarchical clustering identifies groups of similar text segments to reduce the grading overhead. We first demonstrated the CoFee framework in a small laboratory experiment in 2019, which showed that the grading overhead could be reduced by 85\%. This experiment confirmed the feasibility of automating the grading process for problem-solving exercises. We then evaluated CoFee in a large course at the Technical University of Munich from 2019 to 2021, with up to 2, 200 enrolled students per course. We collected data from 34 exercises offered in each of these courses. On average, CoFee suggested feedback for 45\% of the submissions. 92\% (Positive Predictive Value) of these suggestions were precise and, therefore, accepted by the instructors.},
   keywords = {Assessment support system,Automatic assessment,Education,Feedback,Grading,Interactive learning,Learning,Software engineering},
@@ -461,7 +438,6 @@
   pages = {252--256},
   issn = {0010-4620},
   doi = {10.1093/comjnl/9.3.252},
-  url = {https://doi.org/10.1093/comjnl/9.3.252},
   urldate = {2024-02-07},
   abstract = {This paper examines the possibility of using automatic grading programs for checking some of the practical work of students on a Numerical Analysis course. Two existing programs for checking root-finding techniques were tested to gain experience in using grader programs. A program to check solutions to a system of n first order differential equations was written.},
   file = {/home/charlotte/sync/Zotero/storage/M66WNCES/berry1966.pdf.pdf;/home/charlotte/sync/Zotero/storage/WEVW8H9W/Berry - 1966 - Grader Programs.pdf;/home/charlotte/sync/Zotero/storage/34CCGYRS/406256.html}
@@ -492,7 +468,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/1324302.1324324},
-  url = {https://dl.acm.org/doi/10.1145/1324302.1324324},
   urldate = {2024-01-10},
   abstract = {Originally there was one computing curriculum, computer science, which provided a "one-size-fits-all" education in programming and computing in general. Today, computing education has diverged into an array of sub-discipline areas as educators try to meet the changing computing needs of business and industry. Information technology, software engineering, computer engineering, and information systems have emerged from computer science as distinct computing disciplines. Plus, additional "micro-disciplines" are quickly emerging: games and networking from information technology, for example. The foundation skill for all computing disciplines is programming. However as computing technologies advance, discipline-specific differences increase. Each computing sub-discipline needs to approach programming from a slightly different viewpoint to meet student expectations of being highly marketable and employer expectations of quick productivity. How can colleges and universities economically meet the competing demands for a focused computing education while maintaining a strong foundation in programming fundamentals. This paper discusses how an introductory programming sequence can be designed with a common base to support multiple computing sub-disciplines as well as differentiated to address the specific, focused needs of a given sub-discipline. We identify both commonalities that support economy of scale and important differences that distinguish sub-discipline curricula.},
   isbn = {978-1-59593-920-3},
@@ -512,7 +487,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3173574.3173951},
-  url = {https://doi.org/10.1145/3173574.3173951},
   urldate = {2021-04-30},
   abstract = {Data-driven decision-making consequential to individuals raises important questions of accountability and justice. Indeed, European law provides individuals limited rights to 'meaningful information about the logic' behind significant, autonomous decisions such as loan approvals, insurance quotes, and CV filtering. We undertake three experimental studies examining people's perceptions of justice in algorithmic decision-making under different scenarios and explanation styles. Dimensions of justice previously observed in response to human decision-making appear similarly engaged in response to algorithmic decisions. Qualitative analysis identified several concerns and heuristics involved in justice perceptions including arbitrariness, generalisation, and (in)dignity. Quantitative analysis indicates that explanation styles primarily matter to justice perceptions only when subjects are exposed to multiple different styles---under repeated exposure of one style, scenario effects obscure any explanation effects. Our results suggests there may be no 'best' approach to explaining algorithmic decisions, and that reflection on their automated nature both implicates and mitigates justice dimensions.},
   isbn = {978-1-4503-5620-6},
@@ -529,7 +503,6 @@
   month = jun,
   pages = {23.1200.1-23.1200.18},
   issn = {2153-5965},
-  url = {https://peer.asee.org/the-flipped-classroom-a-survey-of-the-research},
   urldate = {2022-08-16},
   file = {/home/charlotte/sync/Zotero/storage/NK937ZL5/Bishop and Verleger - 2013 - The Flipped Classroom A Survey of the Research.pdf;/home/charlotte/sync/Zotero/storage/849MXNUK/the-flipped-classroom-a-survey-of-the-research.html}
 }
@@ -546,7 +519,6 @@
   publisher = {Routledge},
   issn = {0969-594X},
   doi = {10.1080/0969595980050102},
-  url = {https://doi.org/10.1080/0969595980050102},
   urldate = {2021-08-10},
   abstract = {This article is a review of the literature on classroom formative assessment. Several studies show firm evidence that innovations designed to strengthen the frequent feedback that students receive about their learning yield substantial learning gains. The perceptions of students and their role in self-assessment are considered alongside analysis of the strategies used by teachers and the formative strategies incorporated in such systemic approaches as mastery learning. There follows a more detailed and theoretical analysis of the nature of feedback, which provides a basis for a discussion of the development of theoretical models for formative assessment and of the prospects for the improvement of practice.}
 }
@@ -564,7 +536,6 @@
   publisher = {Routledge},
   issn = {1050-8406},
   doi = {10.1080/10508406.2014.954750},
-  url = {https://doi.org/10.1080/10508406.2014.954750},
   urldate = {2023-10-18},
   abstract = {New high-frequency, automated data collection and analysis algorithms could offer new insights into complex learning processes, especially for tasks in which students have opportunities to generate unique open-ended artifacts such as computer programs. These approaches should be particularly useful because the need for scalable project-based and student-centered learning is growing considerably. In this article, we present studies focused on how students learn computer programming, based on data drawn from 154,000 code snapshots of computer programs under development by approximately 370 students enrolled in an introductory undergraduate programming course. We use methods from machine learning to discover patterns in the data and try to predict final exam grades. We begin with a set of exploratory experiments that use fully automated techniques to investigate how much students change their programming behavior throughout all assignments in the course. The results show that students' change in programming patterns is only weakly predictive of course performance. We subsequently hone in on 1 single assignment, trying to map students' learning process and trajectories and automatically identify productive and unproductive (sink) states within these trajectories. Results show that our process-based metric has better predictive power for final exams than the midterm grades. We conclude with recommendations about the use of such methods for assessment, real-time feedback, and course improvement.},
   file = {/home/charlotte/sync/Zotero/storage/XVUQVM6A/Blikstein et al. - 2014 - Programming Pluralism Using Learning Analytics to.pdf}
@@ -590,7 +561,6 @@
   pages = {96--100},
   issn = {1471-5953},
   doi = {10.1016/j.nepr.2012.07.010},
-  url = {https://www.sciencedirect.com/science/article/pii/S1471595312001424},
   urldate = {2021-09-30},
   abstract = {The majority of nursing students are kinesthetic learners, preferring a hands-on, active approach to education. Research shows that active-learning strategies can increase student learning and satisfaction. This study looks at the use of one active-learning strategy, a Jeopardy-style game, `Nursopardy', to reinforce Fundamentals of Nursing material, aiding in students' preparation for a standardized final exam. The game was created keeping students varied learning styles and the NCLEX blueprint in mind. The blueprint was used to create 5 categories, with 26 total questions. Student survey results, using a five-point Likert scale showed that they did find this learning method enjoyable and beneficial to learning. More research is recommended regarding learning outcomes, when using active-learning strategies, such as games.},
   langid = {english},
@@ -610,7 +580,6 @@
   publisher = {Routledge},
   issn = {1050-8406},
   doi = {10.1080/10508406.2020.1759605},
-  url = {https://doi.org/10.1080/10508406.2020.1759605},
   urldate = {2021-09-15},
   abstract = {Background This case reports on a teacher education course that aimed to support adult learners with a vocational education background to accomplish open-ended tasks. Conjecture mapping was used to identify the most salient design features, and to test if, how, and why these course features supported learners. Methods: Inspired by ethnographic approaches, sustained engagement and multiple data sources were used to explain the effects of the course design on participants' behavior and perceptions: student and teacher interviews, observations, and artifacts. Findings: The results reveal that almost all of the proposed design features stimulated the participants toward the intended enactment processes, which in turn yielded the intended learning outcomes. For instance, worked examples (i.e., design feature) not only engendered the production of artifacts that meet high standards (i.e., enactment process) because they clarify the task requirements, but also fostered a safe structure (i.e., enactment process) by providing an overall picture of the task. Contribution: The conjecture map resulting from this study provides a theoretical frame to describe, explain, and predict how specific course design features support vocationally educated adult learners (VEAL) in open-ended tasks, and assists those who aim to implement open-ended tasks in similar contexts.},
   file = {/home/charlotte/sync/Zotero/storage/QQ3Z4SAU/Boelens et al. - 2020 - Conjecture mapping to support vocationally educate.pdf;/home/charlotte/sync/Zotero/storage/TSZVRFQN/10508406.2020.html}
@@ -636,7 +605,6 @@
   publisher = {Routledge},
   issn = {1356-2517},
   doi = {10.1080/13562517.2020.1839746},
-  url = {https://doi.org/10.1080/13562517.2020.1839746},
   urldate = {2022-03-03},
   abstract = {This article reviews research that consistently, across borders and over time, reveals inertia in Higher Education institutions related to innovation in academic teaching. Despite frequent calls for more student-active learning, studies find that teaching remains predominantly traditional and teacher-centred. While research is recognised as continuously developing, border-crossing, investigative and innovative collaborative activities that needs an infrastructure to succeed, the need for collaborative development and a supporting infrastructure is rarely mentioned in academic teaching, often described as individual and traditional in the research. To better understand this paradox, and to identify barriers to student active learning, we reanalysed articles from two systematic reviews, one on campus development and one on learning and teaching with technology. The article identified the following prerequisites for student active learning to succeed: (1) better alignment between research and teaching practices, (2) a supporting infrastructure for research and teaching, (3) staff professional development and learning designs.},
   keywords = {barriers,infrastructure,literature review,scholarly approach,Student active learning},
@@ -657,7 +625,6 @@
   year = {2024},
   month = may,
   publisher = {IEEE},
-  url = {https://orbi.uliege.be/handle/2268/313506},
   urldate = {2024-02-26},
   abstract = {Shaping first-year students' minds to solve problems at different levels of abstraction is both important and challenging. Although abstraction is a crucial skill in problem-solving, especially in STEM subjects, students often struggle with abstract thinking. They tend to focus their efforts on concrete aspects of the problem, where they feel more comfortable and closer to the final solution. Unfortunately, this approach can cause them to overlook critical details related to the problem or its solution.  To address this issue in our Introduction to Programming (CS1) course, we introduced a programming methodology that requires students to create a graphical representation of their solution and then derive the code from it. To enable them to practice this diagrammatic reasoning approach on a regular basis, we developed a learning tool called CAF{\'E} 2.0. It facilitates a semester-long activity in which students solve problems by submitting both a graphical representation of their solution and its implementation. Further to checking the final implementation, CAF{\'E} 2.0 also provides personalized {\textbackslash}fb on how students have graphically modeled their solution and how consistent it is with their code. This paper presents an overview of the features of CAF{\'E} 2.0 and the methodology it currently supports in the context of our CS1 course. Then, using a survey and learning analytics, this paper evaluates students' interactions with CAF{\'E} 2.0. Finally, the potential for extending CAF{\'E} 2.0 to other STEM disciplines is discussed.},
   langid = {Anglais},
@@ -678,7 +645,6 @@
   year = {2024},
   month = jan,
   doi = {10.5281/ZENODO.4619183},
-  url = {https://zenodo.org/doi/10.5281/zenodo.4619183},
   urldate = {2024-02-05},
   abstract = {An incremental parsing system for programming tools},
   copyright = {Creative Commons Attribution 4.0 International},
@@ -697,7 +663,6 @@
   pages = {6--es},
   issn = {1531-4278},
   doi = {10.1145/1163405.1163411},
-  url = {https://dl.acm.org/doi/10.1145/1163405.1163411},
   urldate = {2024-02-09},
   abstract = {Individualized exercises are a promising feature in promoting modern e-learning. The focus of this article is on the QuizPACK system, which is able to generate parameterized exercises for the C language and automatically evaluate the correctness of student answers. We introduce QuizPACK and present the results of its comprehensive classroom evaluation during four consecutive semesters. Our studies demonstrate that when QuizPACK is used for out-of-class self-assessment, it is an exceptional learning tool. The students' work with QuizPACK significantly improved their knowledge of semantics and positively affected higher-level knowledge and skills. The students themselves praised the system highly as a learning tool. We also demonstrated that the use of the system in self-assessment mode can be significantly increased by basing later classroom paper-and-pencil quizzes on QuizPACK questions, motivating students to practice them more.},
   keywords = {assessment,classroom study,code execution,E-learning,individualized exercises,introductory programming,parameterized questions},
@@ -713,7 +678,6 @@
   pages = {5691--5700},
   publisher = {E.T.S.I. Telecomunicaci{\'o}n (UPM)},
   address = {Valencia, Spain},
-  url = {https://oa.upm.es/25765/},
   urldate = {2022-08-16},
   abstract = {Automatic grading of programming assignments is an important topic in academic research. It aims at improving the level of feedback given to students and optimizing the professor time. Several researches have reported the development of software tools to support this process. Then, it is helpfulto get a quickly and good sight about their key features. This paper reviews an ample set of tools forautomatic grading of programming assignments. They are divided in those most important mature tools, which have remarkable features; and those built recently, with new features. The review includes the definition and description of key features e.g. supported languages, used technology, infrastructure, etc. The two kinds of tools allow making a temporal comparative analysis. This analysis infrastructure, etc. The two kinds of tools allow making a temporal comparative analysis. This analysis shows good improvements in this research field, these include security, more language support, plagiarism detection, etc. On the other hand, the lack of a grading model for assignments is identified as an important gap in the reviewed tools. Thus, a characterization of evaluation metrics to grade programming assignments is provided as first step to get a model. Finally new paths in this research field are proposed.},
   copyright = {https://creativecommons.org/licenses/by-nc-nd/3.0/es/},
@@ -732,7 +696,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3338906.3340458},
-  url = {https://dl.acm.org/doi/10.1145/3338906.3340458},
   urldate = {2023-11-23},
   abstract = {There have been multiple recent proposals on using deep neural networks for code search using natural language. Common across these proposals is the idea of embedding code and natural language queries into real vectors and then using vector distance to approximate semantic correlation between code and the query. Multiple approaches exist for learning these embeddings, including unsupervised techniques, which rely only on a corpus of code examples, and supervised techniques, which use an aligned corpus of paired code and natural language descriptions. The goal of this supervision is to produce embeddings that are more similar for a query and the corresponding desired code snippet. Clearly, there are choices in whether to use supervised techniques at all, and if one does, what sort of network and training to use for supervision. This paper is the first to evaluate these choices systematically. To this end, we assembled implementations of state-of-the-art techniques to run on a common platform, training and evaluation corpora. To explore the design space in network complexity, we also introduced a new design point that is a minimal supervision extension to an existing unsupervised technique. Our evaluation shows that: 1. adding supervision to an existing unsupervised technique can improve performance, though not necessarily by much; 2. simple networks for supervision can be more effective that more sophisticated sequence-based networks for code search; 3. while it is common to use docstrings to carry out supervision, there is a sizeable gap between the effectiveness of docstrings and a more query-appropriate supervision corpus.},
   isbn = {978-1-4503-5572-8},
@@ -752,7 +715,6 @@
   publisher = {Athabasca University Press (AU Press)},
   issn = {1492-3831},
   doi = {10.19173/irrodl.v9i1.469},
-  url = {https://www.erudit.org/en/journals/irrodl/2008-v9-n1-irrodl05535/1071813ar/},
   urldate = {2022-10-03},
   abstract = {The role of distance education is shifting. Traditionally distance education was limited in the number of people served because of production, reproduction, and distribution costs. Today, while it still costs the university time and money to produce a course, technology has made it such that reproduction costs are almost non-existent. This shift has significant implications, and allows distance educators to play an important role in the fulfillment of the promise of the right to universal education. At little or no cost, universities can make their content available to millions. This content has the potential to substantially improve the quality of life of learners around the world. New distance education technologies, such as OpenCourseWares, act as enablers to achieving the universal right to education. These technologies, and the associated changes in the cost of providing access to education, change distance education's role from one of classroom alternative to one of social transformer.},
   langid = {english},
@@ -768,7 +730,6 @@
   journal = {{International Journal of Artificial Intelligence in Education}},
   issn = {1560-4306},
   doi = {10.1007/s40593-023-00375-w},
-  url = {https://doi.org/10.1007/s40593-023-00375-w},
   urldate = {2024-01-10},
   abstract = {Educational feedback is a crucial factor in the student's learning journey, as through it, students are able to identify their areas of deficiencies and improve self-regulation. However, the literature shows that this is an area of great dissatisfaction, especially in higher education. Providing effective feedback becomes an increasingly challenging task as the number of students increases. Therefore, this article explores the use of automated content analysis to examine instructor feedback based on reputable models from the literature that provide best practices and classify feedback at different levels. For this, this article proposes using the transformer model BERT to classify feedback messages. The proposed method outperforms previous works by up to 35.71\% in terms of Cohen's kappa. Finally, this study adopted an explainable artificial intelligence to provide insights into the most predictive features for each classifier analyzed.},
   langid = {english},
@@ -798,7 +759,6 @@
   publisher = {Inderscience Publishers},
   issn = {1753-5255},
   doi = {10.1504/IJTEL.2012.051815},
-  url = {https://www.inderscienceonline.com/doi/10.1504/IJTEL.2012.051815},
   urldate = {2024-02-13},
   abstract = {Recently, there is an increasing interest in learning analytics in Technology-Enhanced Learning (TEL). Generally, learning analytics deals with the development of methods that harness educational datasets to support the learning process. Learning analytics (LA) is a multi-disciplinary field involving machine learning, artificial intelligence, information retrieval, statistics and visualisation. LA is also a field in which several related areas of research in TEL converge. These include academic analytics, action analytics and educational data mining. In this paper, we investigate the connections between LA and these related fields. We describe a reference model for LA based on four dimensions, namely data and environments (what?), stakeholders (who?), objectives (why?) and methods (how?). We then review recent publications on LA and its related fields and map them to the four dimensions of the reference model. Furthermore, we identify various challenges and research opportunities in the area of LA in relation to each dimension.},
   keywords = {academic analytics,action research,educational data mining,learning analytics,literature review,reference model},
@@ -816,7 +776,6 @@
   pages = {121--131},
   issn = {0360-1315},
   doi = {10.1016/S0360-1315(03)00030-7},
-  url = {https://www.sciencedirect.com/science/article/pii/S0360131503000307},
   urldate = {2021-10-01},
   abstract = {Practise is one of the most important steps in learning the art of computer programming. Unfortunately, human grading of programming assignments is a tedious and error-prone task, a problem compounded by the large enrolments of many programming courses. As a result, students in such courses tend to be given fewer programming assignments than should be ideally given. One solution to this problem is to automate the grading process such that students can electronically submit their programming assignments and receive instant feedback. This paper studies the implementation of one such automated grading system, called the Online Judge, in the School of Computing of the National University of Singapore for a compulsory first-year course that teaches basic programming techniques with over 700 students, describing the student reactions and behavior as well as the difficulties encountered. The Online Judge was also successfully employed for an advanced undergraduate course and an introductory high school course.},
   langid = {english},
@@ -834,7 +793,6 @@
   pages = {167341--167354},
   issn = {2169-3536},
   doi = {10.1109/ACCESS.2020.3024102},
-  url = {https://ieeexplore.ieee.org/document/9195825},
   urldate = {2023-10-18},
   abstract = {Automated programming assessment systems are useful tools to track the learning progress of students automatically and thereby reduce the workload of educators. They can also be used to gain insights into how students learn, making it easier to formulate strategies aimed at enhancing learning performance. Rather than functional code which is always inspected, code quality remains an essential aspect to which not many educators consider when designing an automated programming assessment system. In this study, we applied data mining techniques to analyze the results of an automated assessment system to reveal unexpressed patterns in code quality improvement that are predictive of final achievements in the course. Cluster analysis is first utilized to categorize students according to their learning behavior and outcomes. Cluster profile analysis is then leveraged to highlight actionable factors that could affect their final grades. Finally, the same factors are employed to construct a classification model by which to make early predictions of the students' final results. Our empirical results demonstrate the efficacy of the proposed scheme in providing valuable insights into the learning behaviors of students in novice programming courses, especially in code quality assurance, which could be used to enhance programming performance at the university level.},
   file = {/home/charlotte/sync/Zotero/storage/C8KXJ7TR/Chen et al. - 2020 - Analysis of Learning Behavior in an Automated Prog.pdf;/home/charlotte/sync/Zotero/storage/V3F96LAQ/9195825.html}
@@ -846,7 +804,6 @@
   year = {1987},
   month = mar,
   journal = {{AAHE Bulletin}},
-  url = {https://eric.ed.gov/?id=ed282491},
   urldate = {2022-09-09},
   abstract = {Seven principles that can help to improve undergraduate education are identified. Based on research on college teaching and learning, good practice in undergraduate education: (1) encourages contacts between students and faculty; (2) develops reciprocity and cooperation among students; (3) uses active learning techniques; (4) gives prompt feedback; (5) emphasizes time on task; (6) communicates high expectations; and (7) respects diverse talents and ways of learning. Examples of approaches that have been used in different kinds of college in the last few years are described. In addition, the implications of these principles for the way states fund and govern higher education and for the way institutions are run are briefly discussed. Examples of good approaches include: freshman  seminars on important topics taught by senior faculty; learning groups of five to seven students who meet regularly during class to solve problems set by the instructor; active learning using structured exercises, discussions, team projects, and peer critiques, as well as internships and independent study; and mastery learning, contract learning, and computer-assisted instruction approaches, which required adequate time on learning. (SW)},
   langid = {english},
@@ -865,7 +822,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3099023.3099065},
-  url = {https://doi.org/10.1145/3099023.3099065},
   urldate = {2022-08-02},
   abstract = {Formative feedback is essential for learning computer programming but is also a challenge to automate because of the many solutions a programming exercise can have. Whilst programming tutoring systems can easily generate automated feedback on how correct a program is, they less often provide some personalised guidance on how to improve or fix the code. In this paper, we present an approach for generating hints using previous student data. Utilising a range of techniques such as filtering, clustering and pattern mining, four different types of data-driven hints are generated: input suggestion, code-based, concept and pre-emptive hints. We evaluated our approach with data from 5529 students using the Grok Learning platform for teaching programming in Python. The results show that we can generate various types of hints for over 90\% of students with data from only 10 students, and hence, reduce the cold-start problem.},
   isbn = {978-1-4503-5067-9},
@@ -884,7 +840,6 @@
   pages = {101079},
   issn = {2352-7110},
   doi = {10.1016/j.softx.2022.101079},
-  url = {https://www.sciencedirect.com/science/article/pii/S2352711022000577},
   urldate = {2022-11-23},
   abstract = {Automated assessment tools (AATs) are software systems used in teaching environments to automate the evaluation of computer programs implemented by students. These tools can be used to stimulate the interest of computer science students in programming courses by providing quick feedback on their work and highlighting their mistakes. Despite the abundance of such tools, most of them are developed for a specific course and are not production-ready. Others lack advanced features that are required for certain pedagogical goals (e.g. Git integration) and/or are not flexible enough to be used with students having different computer literacy levels, such as first year and second year students. In this paper we present Drop Project (DP), an automated assessment tool built on top of the Maven build automation software. We have been using DP in our teaching activity since 2018, having received more than fifty thousand submissions between projects, classroom exercises, tests and homework assignments. The tool's automated feedback has allowed us to raise the difficulty level of the course's projects, while the grading process has become more efficient and consistent between different teachers. DP is an extensively tested, production-ready tool. The software's code and documentation are available in GitHub under an open-source software license.},
   langid = {english},
@@ -894,7 +849,6 @@
 
 @book{CodeCloneAnalysis,
   title = {Code {{Clone Analysis}}},
-  url = {https://link.springer.com/book/10.1007/978-981-16-1927-4},
   urldate = {2022-07-05},
   abstract = {This book selects past research results that are important to the progress of code clone analysis and updates them with new results and future directions.},
   langid = {english},
@@ -913,7 +867,6 @@
   publisher = {Routledge},
   issn = {0260-2938},
   doi = {10.1080/713611435},
-  url = {https://doi.org/10.1080/713611435},
   urldate = {2022-08-16},
   abstract = {This paper presents the development, through action research, of formative elements in assessment in a level 3 compulsory module of the BSc Health Studies and BSc Nursing programmes at the University of Sunderland. The paper reviews three cycles of planning, implementing and evaluating change in assessment strategy and is written in the first person to emphasise the connections between the writer and the material. From a consideration of the format and characteristics of the assessment within the module, the action research is reported through the implementation of actions taken to facilitate more effective use of formative feedback. The evaluation of these actions through my own reflections, student performance, dialogue with team colleagues and student feedback through the production of short narrative accounts of their learning experience is outlined. The paper demonstrates that through explicitly using the learning potential within assessment, learning can be facilitated through challenging students to move from 'doing' assignments, to reflexive thinking about their writing.}
 }
@@ -934,7 +887,6 @@
   year = {2008},
   month = apr,
   publisher = {EUROSIS-ETI},
-  url = {http://repositorium.sdum.uminho.pt/},
   urldate = {2021-09-16},
   abstract = {Although the educational level of the Portuguese population has improved in the last decades, the statistics keep Portugal at Europe's tail end due to its high student failure rates. In particular, lack of success in the core classes of Mathematics and the Portuguese language is extremely serious. On the other hand, the fields of Business Intelligence (BI)/Data Mining (DM), which aim at extracting high-level knowledge from raw data, offer interesting automated tools that can aid the education domain. The present work intends to approach student achievement in secondary education using BI/DM techniques. Recent real-world data (e.g. student grades, demographic, social and school related features) was collected by using school reports and questionnaires. The two core classes (i.e. Mathematics and Portuguese) were modeled under binary/five-level classification and regression tasks. Also, four DM models (i.e. Decision Trees, Random Forest, Neural Networks and Support Vector Machines) and three input selections (e.g. with and without previous grades) were tested. The results show that a good predictive accuracy can be achieved, provided that the first and/or second school period grades are available. Although student achievement is highly influenced by past evaluations, an explanatory analysis has shown that there are also other relevant features (e.g. number of absences, parent's job and education, alcohol consumption). As a direct outcome of this research, more efficient student prediction tools can be be developed, improving the quality of education and enhancing school resource management.},
   copyright = {openAccess},
@@ -954,7 +906,6 @@
   pages = {247--256},
   issn = {0747-5632},
   doi = {10.1016/j.chb.2017.01.047},
-  url = {https://www.sciencedirect.com/science/article/pii/S0747563217300596},
   urldate = {2023-10-18},
   abstract = {The data about high students' failure rates in introductory programming courses have been alarming many educators, raising a number of important questions regarding prediction aspects. In this paper, we present a comparative study on the effectiveness of educational data mining techniques to early predict students likely to fail in introductory programming courses. Although several works have analyzed these techniques to identify students' academic failures, our study differs from existing ones as follows: (i) we investigate the effectiveness of such techniques to identify students likely to fail at early enough stage for action to be taken to reduce the failure rate; (ii) we analyse the impact of data preprocessing and algorithms fine-tuning tasks, on the effectiveness of the mentioned techniques. In our study we evaluated the effectiveness of four prediction techniques on two different and independent data sources on introductory programming courses available from a Brazilian Public University: one comes from distance education and the other from on-campus. The results showed that the techniques analyzed in our study are able to early identify students likely to fail, the effectiveness of some of these techniques is improved after applying the data preprocessing and/or algorithms fine-tuning, and the support vector machine technique outperforms the other ones in a statistically significant way.},
   keywords = {Artificial intelligence in education,Automatic instructional planner,Automatic prediction,Educational data mining,Interactive learning environment,Learner modeling},
@@ -983,7 +934,6 @@
   pages = {383--387},
   issn = {0097-8418},
   doi = {10.1145/1047124.1047473},
-  url = {https://dl.acm.org/doi/10.1145/1047124.1047473},
   urldate = {2024-02-09},
   abstract = {We used a new technique to analyse how students plagiarise programs in an introductory programming course. This involved placing a watermark on a student's program and monitoring programs for the watermark during assignment submission. We obtained and analysed extensive and objective data on student plagiarising behaviour. In contrast to the standard plagiarism detection approaches based on pair comparison, the watermark based approach allows us to distinguish between the supplier and the recipient of the code. This gives us additional insight into student behaviour. We found that the dishonest students did not perform significantly worse than the honest students in the exams. However, when dishonest students are further classified into supplier and recipient, it emerged that the recipient students performed significantly worse than the suppliers.},
   keywords = {automatic evaluation,introductory computer programming,plagiarism,watermarks},
@@ -995,7 +945,6 @@
   author = {Danielson, R. L. and Others, And},
   year = {1976},
   month = jun,
-  url = {https://eric.ed.gov/?id=ED125599},
   urldate = {2024-02-07},
   abstract = {At the University of Illinois at Urbana, a computer based curriculum called Automated Computer Science Education System (ACSES) has been developed to supplement instruction in introductory computer science courses or to assist individuals interested in acquiring a foundation in computer science through independent study. The system, which uses PLATO terminals, is presently in routine use in several courses at the University of Illinois, and it has been used at Wright Community College in Chicago. Recent changes in programing and technical innovations have increased its instructional effectiveness. The first section of this report describes the goals and design of ACSES. Later sections provide yearly reviews of progress made for the duration of a grant from the National Science  Foundation. (EMH)},
   langid = {english},
@@ -1015,7 +964,6 @@
   publisher = {International World Wide Web Conferences Steering Committee},
   address = {Republic and Canton of Geneva, CHE},
   doi = {10.1145/3041021.3054164},
-  url = {https://dl.acm.org/doi/10.1145/3041021.3054164},
   urldate = {2024-02-14},
   abstract = {Educational Data Mining (EDM) and Learning Analytics (LA) research have emerged as interesting areas of research, which are unfolding useful knowledge from educational databases for many purposes such as predicting students' success. The ability to predict a student's performance can be beneficial for actions in modern educational systems. Existing methods have used features which are mostly related to academic performance, family income and family assets; while features belonging to family expenditures and students' personal information are usually ignored. In this paper, an effort is made to investigate aforementioned feature sets by collecting the scholarship holding students' data from different universities of Pakistan. Learning analytics, discriminative and generative classification models are applied to predict whether a student will be able to complete his degree or not. Experimental results show that proposed method significantly outperforms existing methods due to exploitation of family expenditures and students' personal information feature sets. Outcomes of this EDM/LA research can serve as policy improvement method in higher education.},
   isbn = {978-1-4503-4914-7},
@@ -1036,7 +984,6 @@
   publisher = {Routledge},
   issn = {0260-2938},
   doi = {10.1080/02602938.2015.1111294},
-  url = {https://doi.org/10.1080/02602938.2015.1111294},
   urldate = {2022-08-16},
   abstract = {`Rubric' is a term with a variety of meanings. As the use of rubrics has increased both in research and practice, the term has come to represent divergent practices. These range from secret scoring sheets held by teachers to holistic student-developed articulations of quality. Rubrics are evaluated, mandated, embraced and resisted based on often imprecise and inconsistent understandings of the term. This paper provides a synthesis of the diversity of rubrics, and a framework for researchers and practitioners to be clearer about what they mean when they say `rubric'. Fourteen design elements or decision points are identified that make one rubric different from another. This framework subsumes previous attempts to categorise rubrics, and should provide more precision to rubric discussions and debate, as well as supporting more replicable research and practice.},
   keywords = {assessment design,replicable research,research synthesis,rubric design,rubrics}
@@ -1060,7 +1007,6 @@
   number = {4},
   issn = {1449-5554},
   doi = {10.14742/ajet.1198},
-  url = {https://ajet.org.au/index.php/AJET/article/view/1198},
   urldate = {2024-02-05},
   abstract = {Assessment of student learning is a core function of educators. Ideally students should be provided with timely, constructive feedback to facilitate learning. However, provision of high quality feedback becomes more complex as class sizes increase, modes of study expand and academic workloads increase. ICT solutions are being developed to facilitate quality feedback, whilst not impacting adversely upon staff workloads. Hence the research question of this study is 'How do academic staff perceive the usefulness of an automated feedback system in terms of impact on workloads and quality of feedback?' This study used an automated feedback generator (AFG) across multiple tutors and assessment items within an MBA course delivered in a variety of modes. All academics marking in the course completed a survey based on an adaptation of the unified theory of acceptance and use of technology (UTAUT) model. Results indicated that while the workload impact was generally positive with savings in both cost and time, improvements and modifications to the system could further reduce workloads. Furthermore, results indicated that AFG improves quality in terms of timeliness, greater consistency between markers and an increase in the amount of feedback provided.},
   copyright = {Copyright (c)},
@@ -1079,7 +1025,6 @@
   pages = {21:1--21:21},
   issn = {1544-3566},
   doi = {10.1145/2086696.2086700},
-  url = {https://dl.acm.org/doi/10.1145/2086696.2086700},
   urldate = {2023-11-23},
   abstract = {An important aspect of system optimization research is the discovery of program traits or behaviors. In this paper, we present an automated method of program characterization which is able to examine and cluster program graphs, i.e., dynamic data graphs or control flow graphs. Our novel approximate graph clustering technology allows users to find groups of program fragments which contain similar code idioms or patterns in data reuse, control flow, and context. Patterns of this nature have several potential applications including development of new static or dynamic optimizations to be implemented in software or in hardware. For the SPEC CPU 2006 suite of benchmarks, our results show that approximate graph clustering is effective at grouping behaviorally similar functions. Graph based clustering also produces clusters that are more homogeneous than previously proposed non-graph based clustering methods. Further qualitative analysis of the clustered functions shows that our approach is also able to identify some frequent unexploited program behaviors. These results suggest that our approximate graph clustering methods could be very useful for program characterization.},
   file = {/home/charlotte/sync/Zotero/storage/6WUFLLW5/Demme and Sethumadhavan - 2012 - Approximate graph clustering for program character.pdf}
@@ -1089,7 +1034,6 @@
   title = {{Papyros: schrijven, uitvoeren en testen van Python-code in de browser}},
   author = {De Ridder, Winnie and Van Petegem, Charlotte and Dawyndt, Peter and Mesuere, Bart},
   year = {2022},
-  url = {http://lib.ugent.be/catalog/rug01:003059976},
   langid = {dutch},
   school = {Ghent University},
   file = {/home/charlotte/sync/Zotero/storage/HG3PDTXX/De Ridder et al. - 2022 - Papyros schrijven, uitvoeren en testen van Python.pdf}
@@ -1106,7 +1050,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3027385.3027441},
-  url = {https://doi.org/10.1145/3027385.3027441},
   urldate = {2022-11-25},
   abstract = {Many introductory programming environments generate a large amount of log data, but making insights from these data accessible to instructors remains a challenge. This research demonstrates that student outcomes can be accurately predicted from student program states at various time points throughout the course, and integrates the resulting predictive models into an instructor dashboard. The effectiveness of the dashboard is evaluated by measuring how well the dashboard analytics correctly suggest that the instructor help students classified as most in need. Finally, we describe a method of matching low-performing students with high-performing peer tutors, and show that the inclusion of peer tutors not only increases the amount of help given, but the consistency of help availability as well.},
   isbn = {978-1-4503-4870-6},
@@ -1135,7 +1078,6 @@
   number = {3},
   pages = {16:1--16:22},
   doi = {10.1145/3264507},
-  url = {https://doi.org/10.1145/3264507},
   urldate = {2021-04-30},
   abstract = {This study analyzes the impact of adding a review exercises module to an online tool used in a software engineering degree program. The objective of the module is to promote students' self-learning effort to improve their performance. We also intend to determine if this new feature has any effect on the amount of code copies detected in lab sessions when using the same online tool. Two groups of students were compared quantitatively: the first group used the tool exclusively during lab sessions, whereas the second group had the option of employing the tool's new module to enhance their study. The tool allows us to collect interesting data related to the focus of this research: supplementary work completed voluntarily by students and the percentage of students copying others' code during compulsory lab sessions. The results show that the students in the second group achieved better academic results and copied less in lab sessions. In the second group, the students who invested more effort in doing revision exercises and copied less in lab sessions obtained better results; and, interestingly, the effort invested in completing review exercises did not seem to compensate for the learning effort avoided by copying others' exercises during lab sessions. The results show the advantages of a tool used with a dual orientation: compulsory and voluntary. Mandatory usage in lab sessions establishes some milestones that, eventually, act as an incentive fostering learning, while voluntary use reinforces students' perception of the tool's usefulness in terms of learning.},
   keywords = {academic performance,code copying,non-compulsory exercises,Online learning tool},
@@ -1149,7 +1091,6 @@
   publisher = {Apress},
   address = {Berkeley, CA},
   doi = {10.1007/978-1-4302-3802-7},
-  url = {http://link.springer.com/10.1007/978-1-4302-3802-7},
   urldate = {2022-08-16},
   isbn = {978-1-4302-3801-0 978-1-4302-3802-7},
   langid = {english}
@@ -1167,7 +1108,6 @@
   pages = {4--es},
   issn = {1531-4278},
   doi = {10.1145/1163405.1163409},
-  url = {https://doi.org/10.1145/1163405.1163409},
   urldate = {2022-08-16},
   abstract = {Systems that automatically assess student programming assignments have been designed and used for over forty years. Systems that objectively test and mark student programming work were developed simultaneously with programming assessment in the computer science curriculum. This article reviews a number of influential automatic assessment systems, including descriptions of the earliest systems, and presents some of the most recent developments. The final sections explore a number of directions automated assessment systems may take, presenting current developments alongside a number of important emerging e-learning specifications.},
   keywords = {computer-based training,Education,learning,programming assessment},
@@ -1185,7 +1125,6 @@
   pages = {29--44},
   publisher = {Informing Science Institute},
   issn = {1552-2237},
-  url = {https://www.learntechlib.org/p/44796/},
   urldate = {2022-10-03},
   abstract = {This paper depicts the sustainability of Open Educational Resources (OERs) in terms of the three models: funding, technical, and content. Discussion and recommendations are focused on the sustainability of OERs and the requirement that we think of OERs as only part of a larger picture -- one that includes volunteers and incentives, community and partnerships, co-production and sharing, distributed management and control.},
   langid = {english},
@@ -1253,7 +1192,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/971300.971312},
-  url = {https://doi.org/10.1145/971300.971312},
   urldate = {2022-03-03},
   abstract = {Introductory computer science students rely on a trial and error approach to fixing errors and debugging for too long. Moving to a reflection in action strategy can help students become more successful. Traditional programming assignments are usually assessed in a way that ignores the skills needed for reflection in action, but software testing promotes the hypothesis-forming and experimental validation that are central to this mode of learning. By changing the way assignments are assessed--where students are responsible for demonstrating correctness through testing, and then assessed on how well they achieve this goal--it is possible to reinforce desired skills. Automated feedback can also play a valuable role in encouraging students while also showing them where they can improve.},
   isbn = {978-1-58113-798-9},
@@ -1273,7 +1211,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/1384271.1384371},
-  url = {https://dl.acm.org/doi/10.1145/1384271.1384371},
   urldate = {2023-08-21},
   abstract = {This demonstration introduces participants to using Web-CAT, an open-source automated grading system. Web-CAT is customizable and extensible, allowing it to support a wide variety of programming languages and assessment strategies. Web-CAT is most well-known as the system that "grades students on how well they test their own code," with experimental evidence that it offers greater learning benefits than more traditional output-comparison grading. Participants will learn how to set up courses, prepare reference tests, set up assignments, and allow graders to manually grade for design.},
   isbn = {978-1-60558-078-4},
@@ -1286,7 +1223,6 @@
   shorttitle = {Web-{{CAT}}},
   author = {Edwards, S.},
   year = {2006},
-  url = {https://www.semanticscholar.org/paper/Web-CAT-%3A-the-Web-based-Center-for-Automated-Edwards/9bad816ad294dfdf13599a7e3ac11e72d77af7fc},
   urldate = {2024-02-20},
   abstract = {The Web-CAT software system for evaluating student programming assignments has had substantial impact both within Virginia Tech and in other universities. Web-CAT, the Web-based Center for Automated Testing, is a tool that provides rapid, directed comments on student work, encourages students to write software tests for their own work, and empowers students with the responsibility of demonstrating the correctness and validity of their own programs. Web-CAT has allowed Dr. Edwards to transform the way programming assignments are given and assessed in our freshman and sophomore CS programming courses. While students have always focused on ``writing code,'' Web-CAT has given instructors a tool that encourages students to step back and reflect on their own work and what they are trying to achieve. Web-CAT does not grade student programs for correctness---instead, the student is responsible for demonstrating correctness by writing and running test cases. Each test case is a minihypothesis about how the student believes his or her program should work, and students continually write, refine, and experimentally validate these hypotheses as they develop solutions. Web-CAT then grades students on how well they test their own programs, that is, how rigorous and convincing is their own demonstration of the correctness of their own work. As a result, students learn more and produce higher-quality code. Students who use Web-CAT produce an average of 28\% fewer program bugs, are more likely to turn their work in on time, and receive higher scores. Further, students see clear benefits to using Web-CAT, since it increases their confidence in the correctness of their own work, helps them incrementally develop solutions, and reduces some of the most frustrating factors that cause students to fail to complete working solutions. The lessons learned from these development efforts have been disseminated through journal articles, conference papers, demonstrations, poster presentations, workshop papers, and four tutorials at national conferences.}
 }
@@ -1328,7 +1264,6 @@
   publisher = {SAGE Publications Inc},
   issn = {0033-2941},
   doi = {10.2466/pr0.2001.88.3.889},
-  url = {https://doi.org/10.2466/pr0.2001.88.3.889},
   urldate = {2022-02-24},
   abstract = {Performance on two multiple-choice testing procedures was examined during unit tests and a final examination. The Immediate Feedback Assessment Technique provided immediate response feedback in an answer-until-correct style of responding. The testing format which served as a point of comparison was the Scantron form. One format was completed by students in introductory psychology courses during unit tests whereas all students used the Scantron form on the final examination. Students tested with Immediate Feedback forms on the unit tests correctly answered more of the final examination questions which were repeated from earlier unit tests than did students tested with Scantron forms. Also, students tested with Immediate Feedback forms correctly answered more final examination questions previously answered incorrectly on the unit tests than did students tested previously with Scantron forms.},
   langid = {english},
@@ -1357,7 +1292,6 @@
   year = {2002},
   month = nov,
   journal = {{Organization for the Advancement of Structured Information Standards (OASIS) Standard}},
-  url = {http://www.oasis-open.org/committees/download.php/1371/oasis-sstc-saml-core-1.0.pdf},
   file = {/home/charlotte/sync/Zotero/storage/US6RPQBH/Farrell et al. - 2002 - Assertions and Protocol for the OASIS Security Ass.pdf}
 }
 
@@ -1373,7 +1307,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3358711.3361626},
-  url = {https://dl.acm.org/doi/10.1145/3358711.3361626},
   urldate = {2024-01-22},
   abstract = {Students learning to program often need help completing assignments and understanding why their code does not work as they expect it to. One common place where they seek such help is at teaching assistant office hours. We found that teaching assistants in introductory programming (CS1) courses frequently answer some variant of the question ``Am I on the Right Track?''. The goal of this work is to develop an automated tool that provides similar feedback for students in real-time from within an IDE as they are writing their program. Existing automated tools lack the generality that we seek, often assuming a single approach to a problem, using hand-coded error models, or applying sample fixes from other students. In this paper, we explore the use of program synthesis to provide less constrained automated answers to ``Am I on the Right Track'' (AIORT) questions. We describe an observational study of TA-student interactions that supports targeting AIORT questions, as well as the development of and design considerations behind a prototype integrated development environment (IDE). The IDE uses an existing program synthesis engine to determine if a student is on the right track and we present pilot user studies of its use.},
   isbn = {978-1-4503-6989-3},
@@ -1393,7 +1326,6 @@
   publisher = {Taylor \& Francis},
   issn = {0162-1459},
   doi = {10.1080/01621459.1982.10477894},
-  url = {https://www.tandfonline.com/doi/abs/10.1080/01621459.1982.10477894},
   urldate = {2021-02-19},
   abstract = {An example is given of a family of distributions on [--- 1, 1] with a continuous one-dimensional parameterization that joins the triangular distribution (when {$\Theta$} = 0) to the uniform (when {$\Theta$} = 1), for which the maximum likelihood estimates exist and converge strongly to {$\Theta$} = 1 as the sample size tends to infinity, whatever be the true value of the parameter. A modification that satisfies Cram{\'e}r's conditions is also given.},
   keywords = {Asymptotic efficiency,Inconsistency,Maximum likelihood estimates,Mixtures},
@@ -1410,7 +1342,6 @@
   number = {5/6},
   pages = {304--317},
   issn = {1753-5263},
-  url = {http://www.inderscience.com/info/ingeneral/forthcoming.php?jcode=ijtel},
   urldate = {2022-08-16},
   abstract = {Learning analytics is a significant area of technology-enhanced learning that has emerged during the last decade. This review of the field begins with an examination of the technological, educational and political factors that have driven the development of analytics in educational settings. It goes on to chart the emergence of learning analytics, including their origins in the 20th century, the development of data-driven analytics, the rise of learning-focused perspectives and the influence of national economic concerns. It next focuses on the relationships between learning analytics, educational data mining and academic analytics. Finally, it examines developing areas of learning analytics research, and identifies a series of future challenges.},
   langid = {english},
@@ -1441,7 +1372,6 @@
   pages = {1--3},
   issn = {2472-7687},
   doi = {10.23919/EAEEIE55804.2023.10181316},
-  url = {https://ieeexplore.ieee.org/abstract/document/10181316},
   urldate = {2023-10-02},
   abstract = {The teaching of diverse programming topics and languages is a fundamental component of electrical engineering education. However, it is a complex task facing many challenges such as the need to accommodate students with very different programming backgrounds and with very different levels of motivation for the programming field -- these are classical difficulties with electrical engineering candidates.A web-based collaborative tool and a methodology to support student interaction and assistance in classroom teaching of programming languages in electrical engineering courses were presented in [1]. The main technological choices and functionalities, as well as examples of implemented courses, were described. The tool was developed aiming to be flexible, scalable and with high evolution potential.In this paper, we present the evolution of the basis tool through technology improvements, a set of new functionalities and added programming languages. The main technology upgrades include the full integration of the Visual Studio Code for the Web editor, a new web interface we called iWeb-TD and the expansion of the multi-user capabilities. In terms of functionalities, in addition to PHP and Octave, it is now possible to teach C/C++, Python, Java, and ipynb. A new major component is the integration of active debugging with step execution for all supported languages. This is a fundamental aspect that allows students with weak programming skills to evolve in a structured form. The tool can also allow the teacher to develop and test pedagogical elements in Python or Octave and publish them for student access in read-only mode using the Jupyter-Notebook technology.The paper includes example pedagogical elements for two electrical engineering courses taught using the enhanced platform, including the compilation of associated student feedback.},
   file = {/home/charlotte/sync/Zotero/storage/WPW7AYUK/Fonseca et al. - 2023 - A web-based platform and a methodology to teach pr.pdf;/home/charlotte/sync/Zotero/storage/SIFC9TWB/10181316.html}
@@ -1468,7 +1398,6 @@
   publisher = {{Vilnius University Institute of Data Science and Digital Technologies}},
   issn = {1648-5831, 2335-8971},
   doi = {10.15388/infedu.2006.05},
-  url = {https://www.infedu.vu.lt/journal/INFEDU/article/570},
   urldate = {2022-08-16},
   abstract = {For many programming tasks we would be glad to have some kind of automatic evaluation process. As an example, most of the programming contests use an automatic evaluation of the contestants' submissions. While this approach is clearly highly efficient, it also has some drawbacks. Often it is the case that the test inputs are not able to ``break'' all flawed submissions. In this article we show that the situation is not pleasant at all - for some programming tasks it is impossible to design good test inputs. Moreover, we discuss some ways how to recognize such tasks, and discuss other possibilities for doing the evaluation. The discussion is focused on programming contests, but the results can be applied for any programming tasks, e.g., assignments in school.},
   langid = {english},
@@ -1487,7 +1416,6 @@
   publisher = {SAGE Publications Inc},
   issn = {0091-0260},
   doi = {10.1177/009102600903800103},
-  url = {https://doi.org/10.1177/009102600903800103},
   urldate = {2021-04-30},
   abstract = {Applicants for the jobs of engineering aide and plumber with a large public employer were asked to provide their assessments of the perceived fairness of two different HR selection devices---a background information form and a written job knowledge test. Significant differences were found in the applicants' perceptions of the fairness of the two selection devices. In addition, the differences found depended on the classification of the job for which individuals were applying. Specifically, engineering aide applicants saw the background information inventory as more just, while plumber applicants preferred the written exam. Implications of the results for HR selection are discussed.},
   langid = {english},
@@ -1505,7 +1433,6 @@
   pages = {275--278},
   issn = {0001-0782},
   doi = {10.1145/364914.364937},
-  url = {https://dl.acm.org/doi/10.1145/364914.364937},
   urldate = {2024-02-06},
   file = {/home/charlotte/sync/Zotero/storage/4WT3CXI4/Forsythe and Wirth - 1965 - Automatic grading programs.pdf;/home/charlotte/sync/Zotero/storage/6E4CRJY3/forsythe1965.pdf.pdf}
 }
@@ -1519,7 +1446,6 @@
   pages = {3--31},
   publisher = {University of Gloucestershire},
   issn = {1742-240X},
-  url = {https://eprints.glos.ac.uk/3609/},
   urldate = {2022-02-21},
   abstract = {Much evaluation of teaching focuses on what teachers do in class. This article focuses on the evaluation of assessment arrangements and the way they affect student learning out of class. It is assumed that assessment has an overwhelming influence on what, how and how much students study. The article proposes a set of `conditions under which assessment supports learning' and justifies these with reference to theory, empirical evidence and practical experience. These conditions are offered as a framework for teachers to review the effectiveness of their own assessment practice.},
   langid = {english},
@@ -1538,7 +1464,6 @@
   pages = {304--315},
   issn = {0959-4752},
   doi = {10.1016/j.learninstruc.2009.08.007},
-  url = {https://www.sciencedirect.com/science/article/pii/S0959475209000759},
   urldate = {2021-09-15},
   abstract = {The present study examined the effectiveness of (a) peer feedback for learning, more specifically of certain characteristics of the content and style of the provided feedback, and (b) a particular instructional intervention to support the use of the feedback. A quasi-experimental repeated measures design was adopted. Writing assignments of 43 students of Grade 7 in secondary education showed that receiving `justified' comments in feedback improves performance, but this effect diminishes for students with better pretest performance. Justification was superior to the accuracy of comments. The instructional intervention of asking assessees to reflect upon feedback after peer assessment did not increase learning gains significantly.},
   langid = {english},
@@ -1558,7 +1483,6 @@
   pages = {585--594},
   issn = {1877-0428},
   doi = {10.1016/j.sbspro.2012.11.450},
-  url = {https://www.sciencedirect.com/science/article/pii/S1877042812054365},
   urldate = {2021-09-15},
   abstract = {The present study examines the added value of peer assessment in a computer-supported collaborative learning environment (CSCL) in higher education by focusing on (1) the learning effect, (2) wiki product improvement and (3) students' perception of peer feedback in a CSCL-environment. The present study involved two conditions: structured peer feedback (S-PFB) and non-structured (control). The results do not indicate a significant learning effect between pretest and posttest or between the conditions. However, for both conditions the peer feedback process improved significantly the quality of the wiki product from draft to final version, although no significant differences between the control and the experimental group (S-PFB) were found. Furthermore, the S-PFB group adopted a more critical attitude when providing and receiving peer feedback. The S-PFB group also perceived the received peer feedback as being more profound and detailed.},
   langid = {english},
@@ -1597,7 +1521,6 @@
   publisher = {Routledge},
   issn = {0144-3410},
   doi = {10.1080/01443410.2020.1802645},
-  url = {https://doi.org/10.1080/01443410.2020.1802645},
   urldate = {2022-08-16},
   abstract = {Performance on homework questions was compared with performance on related exam questions querying the same fact or principle, was used to assess the effect of answering online homework questions on subsequent exam performance. A distinctive pattern of performance was found for some students in which superior performance on online homework questions resulted in poorer exam performance. When assessed over an eleven-year period, for 2433 students in 12 different college lecture courses, the percent of students who did not benefit from correctly answering homework questions increased from 14\% in 2008 to 55\% in 2017. During the most recent two years of the study, when students were asked how they did their homework, students who benefitted from homework reported generating their own answers and students who reported copying the answers from another source did not benefit from homework.},
   keywords = {Generation effect,long-term memory,testing effect}
@@ -1614,7 +1537,6 @@
   pages = {39--43},
   issn = {0097-8418},
   doi = {10.1145/355354.355369},
-  url = {https://doi.org/10.1145/355354.355369},
   urldate = {2022-02-17},
   abstract = {Performances are analysed over successive semesters for a cohort of first-year students doing computer programming. Attainment is related to performance in other studies. However, many factors have roles. Learning style and problem-solving skills are important in information technology in Semester I. Gender and secondary school outcomes matter in introductory programming, also in Semester I. Dislike of programming influences outcomes in introductory programming and in Data Structures and Algorithms in Semester II. For a number of indicators, influence fluctuates over time and across area of study.},
   file = {/home/charlotte/sync/Zotero/storage/C5V99SA8/Goold and Rimmer - 2000 - Factors affecting performance in first-year comput.pdf}
@@ -1631,7 +1553,6 @@
   pages = {103--109},
   publisher = {{International Society for Exploring Teaching and Learning}},
   issn = {1812-9129},
-  url = {https://eric.ed.gov/?id=EJ1016536},
   urldate = {2022-10-03},
   abstract = {In this paper, we provide an in-depth view of the Undergraduate Teaching Assistant (UTA) program  at Virginia Commonwealth University as a potential model for other large research universities who might wish to implement similar learner-centered initiatives in their first-year experience courses. Unlike graduate teaching assistants, whose primary objective in the classroom is to assist the professor, the UTAs assist the students by facilitating student engagement, offering peer-to-peer assistance, and modeling successful academic practices. The UTA program, founded in 2008, is integrated through all levels of VCU's University College. This paper explores the benefits offered to all stakeholders: faculty, students, and undergraduate teaching assistants.},
   langid = {english},
@@ -1674,7 +1595,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/2445196.2445368},
-  url = {https://doi.org/10.1145/2445196.2445368},
   urldate = {2022-08-16},
   abstract = {This paper presents Online Python Tutor, a web-based program visualization tool for Python, which is becoming a popular language for teaching introductory CS courses. Using this tool, teachers and students can write Python programs directly in the web browser (without installing any plugins), step forwards and backwards through execution to view the run-time state of data structures, and share their program visualizations on the web. In the past three years, over 200,000 people have used Online Python Tutor to visualize their programs. In addition, instructors in a dozen universities such as UC Berkeley, MIT, the University of Washington, and the University of Waterloo have used it in their CS1 courses. Finally, Online Python Tutor visualizations have been embedded within three web-based digital Python textbook projects, which collectively attract around 16,000 viewers per month and are being used in at least 25 universities. Online Python Tutor is free and open source software, available at pythontutor.com.},
   isbn = {978-1-4503-1868-6},
@@ -1713,7 +1633,6 @@
   publisher = {Routledge},
   issn = {0899-3408},
   doi = {10.1080/08993408.2011.579808},
-  url = {https://doi.org/10.1080/08993408.2011.579808},
   urldate = {2022-08-16},
   abstract = {This article provides a review of educational research literature focused on pair programming in the undergraduate computer science curriculum. Research suggests that the benefits of pair programming include increased success rates in introductory courses, increased retention in the major, higher quality software, higher student confidence in solutions, and improvement in learning outcomes. Moreover, there is some evidence that women, in particular, benefit from pair programming. The literature also provides evidence that the transition from paired to solo programming is easy for students. The greatest challenges for paired students appear to concern scheduling and partner compatibility. This review also considers practical issues such as assigning partners, teaching students to work in pairs, and assessing individual contributions, and concludes with a discussion of open research questions.},
   keywords = {collaborative learning,pair programming}
@@ -1730,7 +1649,6 @@
   pages = {2795--2828},
   issn = {1573-7616},
   doi = {10.1007/s10664-017-9579-0},
-  url = {https://doi.org/10.1007/s10664-017-9579-0},
   urldate = {2023-11-16},
   abstract = {Program comprehension is an important skill for programmers -- extending and debugging existing source code is part of the daily routine. Syntax highlighting is one of the most common tools used to support developers in understanding algorithms. However, most research in this area originates from a time when programmers used a completely different tool chain. We examined the influence of syntax highlighting on novices' ability to comprehend source code. Additional analyses cover the influence of task type and programming experience on the code comprehension ability itself and its relation to syntax highlighting. We conducted a controlled experiment with 390 undergraduate students in an introductory Java programming course. We measured the correctness with which they solved small coding tasks. Each test subject received some tasks with syntax highlighting and some without. The data provided no evidence that syntax highlighting improves novices' ability to comprehend source code. There are very few similar experiments and it is unclear as of yet which factors impact the effectiveness of syntax highlighting. One major limitation may be the types of tasks chosen for this experiment. The results suggest that syntax highlighting squanders a feedback channel from the IDE to the programmer that can be used more effectively.},
   langid = {english},
@@ -1747,7 +1665,6 @@
   number = {RFC 6749},
   institution = {Internet Engineering Task Force},
   doi = {10.17487/RFC6749},
-  url = {https://datatracker.ietf.org/doc/rfc6749},
   urldate = {2022-08-16},
   abstract = {The OAuth 2.0 authorization framework enables a third-party application to obtain limited access to an HTTP service, either on behalf of a resource owner by orchestrating an approval interaction between the resource owner and the HTTP service, or by allowing the third-party application to obtain access on its own behalf. This specification replaces and obsoletes the OAuth 1.0 protocol described in RFC 5849. [STANDARDS-TRACK]},
   file = {/home/charlotte/sync/Zotero/storage/WAFXIJ6P/Hardt - 2012 - The OAuth 2.0 Authorization Framework.pdf}
@@ -1765,7 +1682,6 @@
   publisher = {American Educational Research Association},
   issn = {0034-6543},
   doi = {10.3102/003465430298487},
-  url = {https://doi.org/10.3102/003465430298487},
   urldate = {2022-02-21},
   abstract = {Feedback is one of the most powerful influences on learning and achievement, but this impact can be either positive or negative. Its power is frequently mentioned in articles about learning and teaching, but surprisingly few recent studies have systematically investigated its meaning. This article provides a conceptual analysis of feedback and reviews the evidence related to its impact on learning and achievement. This evidence shows that although feedback is among the major influences, the type of feedback and the way it is given can be differentially effective. A model of feedback is then proposed that identifies the particular properties and circumstances that make it effective, and some typically thorny issues are discussed, including the timing of feedback and the effects of positive and negative feedback. Finally, this analysis is used to suggest ways in which feedback can be used to enhance its effectiveness in classrooms.},
   langid = {english},
@@ -1786,7 +1702,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3501709.3544278},
-  url = {https://dl.acm.org/doi/10.1145/3501709.3544278},
   urldate = {2024-02-09},
   isbn = {978-1-4503-9195-5},
   keywords = {Adaptive Learning Systems,Computer Programming Practice,Scaffolding},
@@ -1805,7 +1720,6 @@
   pages = {1209--1230},
   issn = {1955-2505},
   doi = {10.1007/s12008-022-00930-0},
-  url = {https://doi.org/10.1007/s12008-022-00930-0},
   urldate = {2023-10-04},
   abstract = {Learning Analytics is a field that measures, analyses, and reports data about students and their contexts to understand/improve learning and the place in which it occurs. Educational institutions have different motivations to use Learning Analytics. Some want to improve students' outcomes or optimize their educational technology and reduce the dropout rate and others. This concept is presented with practical experiences that have been acquired and validated by 16 institutions. Besides, an analysis of the results, challenges, and expectations was performed. It was found that the majority of initiatives use Learning Analytics to improve retention of students; few are focused merely on improving the teaching/learning process or academic issues. The organizations invest their resources in acquiring Learning Analytics software; however, most universities develop their technology. The technology helps organizations be preventive and not reactive as various models determine students at risk of failing. This information allows them to make suitable interventions, which increases the success of the initiative. CoViD19 pandemic is also put in context in this research; Learning Analytics could be a great approach to help the educational community adapt effectively to the new forms of educational delivery. Based on an exhaustive bibliographic review, various educational projects and experiences were analyzed, presenting an overview detailing applications, results, and potentialities and opportunities, hoping that this article will be a useful reference for researchers and faculty to exploit Learning Analytics education.},
   langid = {english},
@@ -1824,7 +1738,6 @@
   pages = {272--275},
   issn = {0001-0782},
   doi = {10.1145/362946.362981},
-  url = {https://dl.acm.org/doi/10.1145/362946.362981},
   urldate = {2024-02-06},
   abstract = {A discussion is given of alterations that were made to a typical university operating system to record the results of programming exercises in three different languages, includeing assembly language. In this computer-controlled grading scheme provision is made for testing with programmer-supplied data and for final runs with system-supplied data. Exercises run under the scheme may be mixed with other programs, and no special recognition of exercises by the operators is necessary.},
   keywords = {automatic grading program,programming exercises},
@@ -1843,7 +1756,6 @@
   pages = {287--304},
   issn = {1573-7608},
   doi = {10.1023/A:1026364126982},
-  url = {https://doi.org/10.1023/A:1026364126982},
   urldate = {2024-02-09},
   abstract = {This document reports on the results of re-designing and re-implementing the Ceilidh courseware system. It highlights the limitations identified in the thirteen years of Ceilidh's use at the University of Nottingham. It also illustrates how most of these limitations have been resolved by re-designing Ceilidh's architecture and improving various aspects of the marking and administrating processes. The new system, entitled CourseMarker, offers enhanced functionality by adding useful features that have long been needed by Ceilidh's community. The paper concludes with an evaluation of the changes and a brief report on the experience of CourseMarker's use over the last three years. Finally, recent developments and future directions are discussed.},
   langid = {english},
@@ -1856,7 +1768,6 @@
   shorttitle = {Programming in {{Introductory Physics}}},
   author = {Hole, Niklas Molnes},
   year = {2020},
-  url = {https://ntnuopen.ntnu.no/ntnu-xmlui/handle/11250/2777908},
   urldate = {2023-10-02},
   abstract = {Den norske regjeringen har bestemt at fra 2021 vil programmering v{\ae}re lagt til fysikk planen ved norske videreg{\aa}ende skoler (USS). L{\ae}rere har v{\ae}rt i stand til {\aa} delta p{\aa} ProFag (https://www.mn.uio.no/kurt/livslang-lering/profag) og andre tiltak for {\aa} l{\ae}re {\aa} bruke programmering i sine respektive kurs. Dermed kan utvikling av verkt{\o}y for {\aa} hjelpe i denne overgangen v{\ae}re avgj{\o}rende for {\aa} lykkes i avanserte kurs som fysikk. Imidlertid mangler der verkt{\o}y spesielt for {\aa} introdusere programmering i helt grunnleggende fysikk kurs. I tillegg har de tilgjengelige verkt{\o}yene som introduserer programmering generelt, mangler i funksjonalitet for {\aa} lage tilpassede oppgaver. For {\aa} hjelpe til med {\aa} l{\o}se disse problemene ble det gjennomf{\o}rt en Design Science Research (DSR) prosess. Ved {\aa} designe et verkt{\o}y, en online l{\ae}ringsplattform (OLP), som kunne introdusere programmering i et introduksjonskurs i fysikk, var det mulig {\aa} identifisere hvilke elementer som var viktige i et slikt verkt{\o}y. Det ble ogs{\aa} designet designet inn en funksjon for {\aa} la brukeren lage egne programmeringsoppgaver. Dette gjorde det mulig {\aa} finne ut hvordan et brukergrensesnitt (UI) kan tilpasses fysikkl{\ae}rere. For {\aa} f{\aa} et godt svar p{\aa} begge disse problemene, ble designet, elaborert og evaluert p{\aa} tre forskjellige m{\aa}lgrupper: universitetsstudenter (pilot), eksperter som jobber med dette feltet (expert), og fysikkl{\ae}rere uten noe tidligere kompetanse p{\aa} dette feltet (main). Totalt 18 elementer ble funnet {\aa} v{\ae}re avgj{\o}rende i utformingen av et verkt{\o}y som pr{\o}ver {\aa} introdusere programmering i et grunnleggende fysikk kurs. Det ble ogs{\aa} avdekket at ved {\aa} opprettet et brukergrensesnitt for {\aa} lage programmeringsoppgaver som passer til slik grunnleggende fysikk, var det viktig {\aa} ha muligheten til {\aa} skjule distraherende kode og for {\aa} kunne teste oppgaven i et realistisk milj{\o} mens man former oppgaven. I tillegg var det viktig at man laget brukergrensesnittet slik at utformingen av oppgaver tilsvarer m{\aa}ten oppgave-skaperne vanligvis lager oppgaver. For gruppen av fysikkl{\ae}rere var det mest interessant {\aa} bruke forh{\aa}ndsoppgaver enn {\aa} lage dem selv. Imidlertid var de interessert i {\aa} endre eksisterende oppgaver. Bortsett fra det som ble funnet ut i fra evalueringen, har OLPen som ble utviklet her ogs{\aa} som m{\aa}l {\aa} inspirere til forskningsarbeid og utvikling av verkt{\o}y som eksplisitt er laget for {\aa} introdusere programmering i spesifikke emner. OLP-artefakten som ble designet og utviklet i l{\o}pet av arbeid med denne oppgaven er tilgjengelig som en online demo (https://master-thesis-artifact.now.sh/) og som kildekode (https://github.com/niklasmh/master-thesis-artifact)},
   langid = {english},
@@ -1876,7 +1787,6 @@
   pages = {528--529},
   issn = {0001-0782},
   doi = {10.1145/367415.367422},
-  url = {https://doi.org/10.1145/367415.367422},
   urldate = {2022-08-16},
   abstract = {Fifteen months ago the first version of an ``automatic grader'' was tried with a group of twenty students taking a formal course in programming. The first group of twenty programs took only five minutes on the computer (an IBM 650). With such a satisfactory beginning, the grader was then used for the entire course with this group of students and have been used at Rensselaer ever since. For all exercises, the average time spent on the computer has run from half a minute to a minute for each student. In general only an eighth as much computer time is required when the grader is used as is required when each student is expected to run his own program, probably less than a third as much staff time, and considerably less student time. The grader easily justifies itself on economic grounds. It accomplishes more than savings in time and money; it makes possible the teaching of programming to large numbers of students. This spring we had 80 students taking a full semester course in programming; over 120 are expected next spring. We could not accommodate such numbers without the use of the grader. Even though the grader makes the teaching of programming to large numbers of students possible and economically feasible, a most serious question remains, how well did the students learn? After fifteen months, our experience leads us to believe that students learn programming not only as well but probably better than they did under the method we did use---laboratory groups of four or five students. They are not as skilled in machine operation, however, since they get only a brief introduction to it late in the course. After learning programming, very little time is needed for each student to become at least an adequate machine operator. Students seem to like the grader and are not reluctant to suggest improvements!},
   file = {/home/charlotte/sync/Zotero/storage/JS69Q9SX/Hollingsworth - 1960 - Automatic graders for programming classes.pdf}
@@ -1894,7 +1804,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/330560.331081},
-  url = {https://dl.acm.org/doi/10.1145/330560.331081},
   urldate = {2024-02-09},
   isbn = {978-0-89791-969-2},
   keywords = {Ceilidh,classroom,collaboration,composition,writing},
@@ -1912,7 +1821,6 @@
   pages = {183--190},
   issn = {0360-1315},
   doi = {10.1016/0360-1315(93)90086-X},
-  url = {https://www.sciencedirect.com/science/article/pii/036013159390086X},
   urldate = {2024-02-07},
   abstract = {Software metrics have been used extensively to provide quantitative measures of software characteristics. This paper aims at evaluating the relevance of using software metrics as means of assessing students' performance in programming. The study focusses on the use of four basic software metrics which are combined to form a single assessment score. The four metrics are respectively those which measure programming skill, complexity, programming style and programming efficiency. Measurements suggested that the lines of code metric is a good candidate for measuring programming skill. McCabe's cyclomatic complexity metrics have been adopted for measuring program complexity. Program execution times are used as the measuring yardsticks for programming efficiency. To facilitate automatic assessment, a program analyzer has been constructed which can provide measures of all the relevant software metrics together with the appropriate assessment scores. The tool was tested with sample assignments of Pascal programs and good distribution of marks has been obtained.},
   file = {/home/charlotte/sync/Zotero/storage/TI5Q63PJ/hung1993.pdf.pdf;/home/charlotte/sync/Zotero/storage/37JRXCS4/036013159390086X.html}
@@ -1929,7 +1837,6 @@
   pages = {350--353},
   issn = {0001-0782},
   doi = {10.1145/359581.359603},
-  url = {https://doi.org/10.1145/359581.359603},
   urldate = {2022-08-23},
   abstract = {Previously published algorithms for finding the longest common subsequence of two sequences of length n have had a best-case running time of O(n2). An algorithm for this problem is presented which has a running time of O((r + n) log n), where r is the total number of ordered pairs of positions at which the two sequences match. Thus in the worst case the algorithm has a running time of O(n2 log n). However, for those applications where most positions of one sequence match relatively few positions in the other sequence, a running time of O(n log n) can be expected.},
   keywords = {efficient algorithms,longest common subsequence},
@@ -1953,7 +1860,6 @@
   number = {3},
   pages = {13:1--13:23},
   doi = {10.1145/2499947.2499950},
-  url = {https://doi.org/10.1145/2499947.2499950},
   urldate = {2021-09-30},
   abstract = {POGIL has been successfully implemented in a scientific computing course to teach science students how to program in Python. Following POGIL guidelines, the authors have developed guided inquiry activities that lead student teams to discover and understand programming concepts. With each iteration of the scientific computing course, the authors have refined the activities and learned how to better adapt POGIL for the computer science classroom. This article details how POGIL activities differ from both traditional computer science labs and other active-learning pedagogies. Background is provided on POGIL's effectiveness. The article then includes a full description of how POGIL activities were used in the scientific computing course, as well as an example POGIL activity on recursion. Discussion is provided on how to facilitate and develop POGIL activities. Quotes from student evaluations and an assessment on how well students learned to program are provided.},
   keywords = {active learning,inquiry-based learning,POGIL,process skills,process-oriented guided inquiry learning},
@@ -1978,7 +1884,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/1930464.1930480},
-  url = {https://doi.org/10.1145/1930464.1930480},
   urldate = {2022-08-16},
   abstract = {This paper presents a systematic literature review of the recent (2006--2010) development of automatic assessment tools for programming exercises. We discuss the major features that the tools support and the different approaches they are using both from the pedagogical and the technical point of view. Examples of these features are ways for the teacher to define tests, resubmission policies, security issues, and so forth. We have also identified a list of novel features, like assessing web software, that are likely to get more research attention in the future. As a conclusion, we state that too many new systems are developed, but also acknowledge the current reasons for the phenomenon. As one solution we encourage opening up the existing systems and joining efforts on developing those further. Selected systems from our survey are briefly described in Appendix A.},
   isbn = {978-1-4503-0520-4},
@@ -1995,7 +1900,6 @@
   pages = {59--72},
   issn = {1477-8424},
   doi = {10.1016/j.cl.2018.01.004},
-  url = {https://www.sciencedirect.com/science/article/pii/S1477842417301045},
   urldate = {2023-10-05},
   abstract = {Assessment is an integral part of education often used to evaluate students, but also to provide them with feedback. It is essential to ensure that assessment is fair, objective, and equally applied to all students. This holds, for instance, in multiple-choice tests, but, unfortunately, it is not ensured in the assessment of source code, which is still a manual and error-prone task. In this paper, we present JavAssess , a Java library with an API composed of around 200 methods to automatically inspect, test, mark, and correct Java code. It can be used to produce both black-box (based on output comparison) and white-box (based on the internal properties of the code) assessment tools. This means that it allows for marking the code even if it is only partially correct. We describe the library, how to use it, and we provide a complete example to automatically mark and correct a student's code. We also report the use of this system in a real university context to compare manual and automatic assessment in university courses. The study reports the average error in the marks produced by teachers when assessing source code manually, and it shows that the system automatically assesses around 50\% of the work.},
   keywords = {Assessment,Java},
@@ -2037,7 +1941,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/268084.268210},
-  url = {https://doi.org/10.1145/268084.268210},
   urldate = {2022-08-16},
   abstract = {The task of grading solutions to student programming exercises is laborious and error-prone. We have developed a software tool called ASSYST that is designed to relieve a tutor of much of the burden of assessing such programs. ASSYST offers a graphical interface that can be used to direct all aspects of the grading process, and it considers a wide range of criteria in its automatic assessment. Experience with the system has been encouraging.},
   isbn = {978-0-89791-889-3},
@@ -2055,7 +1958,6 @@
   pages = {164--167},
   issn = {0097-8418},
   doi = {10.1145/353519.343160},
-  url = {https://doi.org/10.1145/353519.343160},
   urldate = {2022-09-09},
   abstract = {Desirable though fully automated assessment of student programming assignments is, it is an area that is beset by difficulties. While it is not contested that some aspects of assessment can be performed much more efficiently and accurately by computer, there are many others that still require human involvement. We have therefore designed a system that combines the strengths of the two approaches, the assessment software calling upon the skills of the human tutor where necessary to make sensible judgements. The technique has been used successfully on a systems programming course for several years, and student feedback has been supportive.},
   file = {/home/charlotte/sync/Zotero/storage/ERKK7ECV/Jackson - 2000 - A semi-automated approach to online assessment.pdf}
@@ -2072,7 +1974,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/1352135.1352315},
-  url = {https://doi.org/10.1145/1352135.1352315},
   urldate = {2022-03-03},
   abstract = {Coercing new programmers to adopt disciplined development practices such as thorough unit testing is a challenging endeavor. Test-driven development (TDD) has been proposed as a solution to improve both software design and testing. Test-driven learning (TDL) has been proposed as a pedagogical approach for teaching TDD without imposing significant additional instruction time. This research evaluates the effects of students using a test-first (TDD) versus test-last approach in early programming courses, and considers the use of TDL on a limited basis in CS1 and CS2. Software testing, programmer productivity, programmer performance, and programmer opinions are compared between test-first and test-last programming groups. Results from this research indicate that a test-first approach can increase student testing and programmer performance, but that early programmers are very reluctant to adopt a test-first approach, even after having positive experiences using TDD. Further, this research demonstrates that TDL can be applied in CS1/2, but suggests that a more pervasive implementation of TDL may be necessary to motivate and establish disciplined testing practice among early programmers.},
   isbn = {978-1-59593-799-5},
@@ -2102,7 +2003,6 @@
   pages = {198},
   publisher = {Multidisciplinary Digital Publishing Institute},
   doi = {10.3390/educsci10080198},
-  url = {https://www.mdpi.com/2227-7102/10/8/198},
   urldate = {2021-10-01},
   abstract = {Immersive technologies are rapidly transforming the field of education. Amongst them, Augmented Reality (AR) has shown promise as a resource, particularly for education in Science, Technology, Engineering, Arts, and Mathematics (STEAM). There are, however, few teachers deploying this new medium in the classroom directly, and, consequently, only a few, elect students benefit from the AR-enriched offers. Curricula are already overloaded, and schools generally lack developmental resources, thus leaving no room for experimentation. This situation is further aggravated by the too few educational applications available with sufficient learning content. In this article, we investigate the method of Active Learning for the teaching of STEAM subjects, using a format where students are tasked with building an AR application as part of their learning. We evaluate the applicability of the Active Learning for STEAM subjects with a qualitative, case study approach, applying the workshop format as an extracurricular activity in our work with students from a range of secondary schools in Oxford. We discuss how the format works, so it can be embedded into regular curricula, not just as an extracurricular activity, also providing an overview on the involved teaching units and rationale. All teams in our preview audience of the case study succeeded in building working applications, several of impressive complexity. Students found that the lessons were enjoyable and AR technology can enhance their learning experience. The Active Learning method served as a catalyst for students\&rsquo; skills development, with the case study providing evidence of learning to code, working with a physics simulation engine, ray-tracing, and geometry, learning how to manage teams and interact with other students/instructors, and engineering a working prototype of a game. We consequentially argue that combining the STEM subjects and the arts, using the proposed Active Learning format, is able to provide a more holistic and engaging education.},
   copyright = {http://creativecommons.org/licenses/by/3.0/},
@@ -2132,7 +2032,6 @@
   publisher = {Routledge},
   issn = {1049-4820},
   doi = {10.1080/10494820.2023.2294774},
-  url = {https://doi.org/10.1080/10494820.2023.2294774},
   urldate = {2024-01-05},
   abstract = {Higher vocational education has been on a trajectory of rapid development. However, the challenge of fostering effective learning in students persists. In response to this, a study was undertaken to explore the impact of an optimized model of SPOC-based blended learning (SPOC-BL) on student presence, learning satisfaction, learning motivation, and academic performance in C language programming course. This quasi-experimental study spanned a period of three months and involved 92 students from two classes in a higher vocational school. Quantitative research methods were employed to analyze students' academic performance and their interrelationship among student presence, learning satisfaction, learning motivation in SPOC-BL approach. Analysis of Covariance, paired samples t-tests, and multiple regression analysis were conducted to analyze students' performance on achievement tests and student surveys. The results showed substantial improvements in student presence, learning motivation, learning satisfaction, and academic achievement as direct outcomes of the innovative instructional approach. The findings illuminated significant effects for all factors, with student presence exerting the most significant influence, followed closely by learning motivation, and with learning satisfaction playing a smaller yet still meaningful role. The study will inform the design and implementation of blended learning in higher vocational education.},
   keywords = {academic achievement,Blended learning,higher vocational school,learning motivation,learning satisfaction,SPOC-BL,student presence},
@@ -2162,7 +2061,6 @@
   pages = {2--es},
   issn = {1531-4278},
   doi = {10.1145/1163405.1163407},
-  url = {https://dl.acm.org/doi/10.1145/1163405.1163407},
   urldate = {2024-02-09},
   abstract = {Computer programming lends itself to automated assessment. With appropriate software tools, program correctness can be measured, along with an indication of quality according to a set of metrics. Furthermore, the regularity of program code allows plagiarism detection to be an integral part of the tools that support assessment. In this paper, we describe a submission and assessment system, called BOSS, that supports coursework assessment through collecting submissions, performing automatic tests for correctness and quality, checking for plagiarism, and providing an interface for marking and delivering feedback. We describe how automated assessment is incorporated into BOSS such that it supports, rather than constrains, assessment. The pedagogic and administrative issues that are affected by the assessment process are also discussed.},
   keywords = {automated assessment,Online submission,programming languages},
@@ -2179,7 +2077,6 @@
   number = {4},
   pages = {18:1--18:21},
   doi = {10.1145/2906362},
-  url = {https://doi.org/10.1145/2906362},
   urldate = {2021-09-30},
   abstract = {Educational technology offers several potential benefits for programming education. Still, to facilitate the technology properly, integration into a course must be carefully designed. In this article, we present a redesign of an object-oriented university-level programming course. In the redesign, a collaborative education tool was utilized to enhance active learning, facilitate communication between students and teachers, and remodel the evaluation procedure by utilizing automatically assessed tasks. The redesign was based on the best practices found in our own earlier research and that of the research community, with a focus on facilitating active learning methods and student collaboration. The redesign was evaluated by comparing two instances of the redesigned course against two instances using the old methodology. The drop-out rate decreased statistically significantly in the redesigned course instances. Moreover, there was a trend toward higher grade averages in the redesigned instances. Based on the results, we can conclude that the utilization of educational technology has a highly positive effect on student performance. Still, making major changes to course methodology does not come without certain difficulties. Hence, we also present our experiences and suggestions for the course redesign to help other educators and researchers perform similar design changes.},
   keywords = {course methodology,course redesign,Object-oriented programming,programming education},
@@ -2223,7 +2120,6 @@
   number = {1},
   pages = {3:1--3:43},
   doi = {10.1145/3231711},
-  url = {https://doi.org/10.1145/3231711},
   urldate = {2022-10-03},
   abstract = {Formative feedback, aimed at helping students to improve their work, is an important factor in learning. Many tools that offer programming exercises provide automated feedback on student solutions. We have performed a systematic literature review to find out what kind of feedback is provided, which techniques are used to generate the feedback, how adaptable the feedback is, and how these tools are evaluated. We have designed a labelling to classify the tools, and use Narciss' feedback content categories to classify feedback messages. We report on the results of coding a total of 101 tools. We have found that feedback mostly focuses on identifying mistakes and less on fixing problems and taking a next step. Furthermore, teachers cannot easily adapt tools to their own needs. However, the diversity of feedback types has increased over the past decades and new techniques are being applied to generate feedback that is increasingly helpful for students.},
   keywords = {automated feedback,learning programming,programming tools,Systematic literature review},
@@ -2259,7 +2155,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3180155.3180187},
-  url = {https://dl.acm.org/doi/10.1145/3180155.3180187},
   urldate = {2023-11-23},
   abstract = {Code search is an unavoidable activity in software development. Various approaches and techniques have been explored in the literature to support code search tasks. Most of these approaches focus on serving user queries provided as natural language free-form input. However, there exists a wide range of use-case scenarios where a code-to-code approach would be most beneficial. For example, research directions in code transplantation, code diversity, patch recommendation can leverage a code-to-code search engine to find essential ingredients for their techniques. In this paper, we propose FaCoY, a novel approach for statically finding code fragments which may be semantically similar to user input code. FaCoY implements a query alternation strategy: instead of directly matching code query tokens with code in the search space, FaCoY first attempts to identify other tokens which may also be relevant in implementing the functional behavior of the input code. With various experiments, we show that (1) FaCoY is more effective than online code-to-code search engines; (2) FaCoY can detect more semantic code clones (i.e., Type-4) in BigCloneBench than the state-of-the-art; (3) FaCoY, while static, can detect code fragments which are indeed similar with respect to runtime execution behavior; and (4) FaCoY can be useful in code/patch recommendation.},
   isbn = {978-1-4503-5638-1},
@@ -2277,7 +2172,6 @@
   publisher = {Springer},
   address = {New York, NY},
   doi = {10.1007/978-1-4757-4108-7_1},
-  url = {https://doi.org/10.1007/978-1-4757-4108-7_1},
   urldate = {2021-02-19},
   abstract = {This introduction to logistic regression describes the reasons for the popularity of the logistic model, the model form, how the model may be applied, and several of its key features, particularly how an odds ratio can be derived and computed for this model.},
   isbn = {978-1-4757-4108-7},
@@ -2307,7 +2201,6 @@
   title = {Predicting Student Success by Mining Enrolment Data.},
   author = {Kovacic, Z.},
   year = {2012},
-  url = {https://repository.openpolytechnic.ac.nz/handle/11072/1486},
   urldate = {2021-02-19},
   abstract = {This paper explores the socio-demographic variables (age, gender, ethnicity, education, work status, and disability) and study environment (course programme and course block), that may influence persistence or dropout of the distance education students at the Open Polytechnic. It examines to what extent these factors, i.e. enrolment data help us in preidentifying successful and unsuccessful students. The data stored in the Open Polytechnic student management system from 2006 to 2009, covering over 450 students who enrolled to Information Systems course was used to perform a quantitative analysis of study outcome. Based on a data mining techniques (such as feature selection and classification trees) and logistic regression the most important factors for student success and a profile of the typical successful and unsuccessful students are identified. The empirical results show the following: (i) the most important factors separating successful from unsuccessful students are: ethnicity, course programme and course block; (ii) among classification tree growing methods Classification and Regression Tree (CART) was the most successful in growing the tree with an overall percentage of correct classification of 60.5\%; (iii) both the risk estimated by the cross-validation and the gain diagram suggests that all trees, based only on enrolment data, are not quite good in separating successful from unsuccessful students, and (iv) the same conclusion was reached using the logistic regression. The implications of these results for academic and administrative staff are discussed.},
   langid = {english},
@@ -2327,7 +2220,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3159450.3159602},
-  url = {https://dl.acm.org/doi/10.1145/3159450.3159602},
   urldate = {2023-08-21},
   abstract = {The increasing number of students in computer science courses leads to high efforts in manual assessment of exercises. Existing assessment systems are not designed for exercises with immediate feedback in large classes. In this paper, we present an AuTomated assEssment Management System for interactive learning. ArTEMiS assesses solutions to programming exercises automatically and provides instant feedback so that students can iteratively solve the exercise. It is open source and highly scalable based on version control, regression testing and continuous integration. ArTEMiS offers an online code editor with interactive exercise instructions, is programming language independent and applicable to a variety of computer science courses. By using it, students gain experiences in version control, dependency management and continuous integration. We used ArTEMiS in 3 university and 1 online courses and report about our experiences. We figured out that ArTEMiS is suitable for beginners, helps students to realize their progress and to gradually improve their solutions. It reduces the effort of instructors and enhances the learning experience of students.},
   isbn = {978-1-4503-5103-4},
@@ -2346,7 +2238,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/2491411.2491452},
-  url = {https://doi.org/10.1145/2491411.2491452},
   urldate = {2022-06-30},
   abstract = {Previous version of a program can be a powerful enabler for program analysis by defining new relative specifications and making the results of current program analysis more relevant. In this paper, we describe the approach of differential assertion checking (DAC) for comparing different versions of a program with respect to a set of assertions. DAC provides a natural way to write relative specifications over two programs. We introduce a novel modular approach to DAC by reducing it to safety checking of a composed program, which can be accomplished by standard program verifiers. In particular, we leverage automatic invariant generation to synthesize relative specifications for pairs of loops and procedures. We provide a preliminary evaluation of a prototype implementation within the SymDiff tool along two directions (a) soundly verifying bug fixes in the presence of loops and (b) providing a knob for suppressing alarms when checking a new version of a program.},
   isbn = {978-1-4503-2237-9},
@@ -2374,7 +2265,6 @@
   publisher = {Routledge},
   issn = {1040-0419},
   doi = {10.1080/10400419.2013.752297},
-  url = {https://doi.org/10.1080/10400419.2013.752297},
   urldate = {2022-08-16},
   abstract = {The main goal of this study was to examine the effects of authors' name and gender on judges' assessment of product creativity in 4 different domains (art, science, music, and poetry). A total of 119 participants divided into 5 groups assessed products signed with a fictional author's name (unique vs. typical, male vs. female) or in an anonymous condition. It was observed that depending on the domain, the uniqueness of the author's name and her or his gender was associated with the assessment of creativity of the product. A poem and painting signed with an unusual name and a piece of music whose authorship was attributed to a man with a unique name were assessed as especially creative. In case of scientific theory, works attributed to men were assessed as significantly more creative than those of women. The results are discussed in light of the attributional approach to creativity.}
 }
@@ -2389,7 +2279,6 @@
   pages = {101250},
   issn = {0191-491X},
   doi = {10.1016/j.stueduc.2023.101250},
-  url = {https://www.sciencedirect.com/science/article/pii/S0191491X23000160},
   urldate = {2024-01-10},
   abstract = {Educators in large-scale online courses tend to lack the necessary resources to generate and provide adequate feedback for all students, especially when students' learning outcomes are evaluated through student writing. As a result, students welcome peer feedback and sometimes generate self-feedback to widen their perspectives and obtain feedback, but often lack the support to do so. This study, as part of a larger project, sought to address this prevalent problem in large-scale courses by allowing students to write essays as an expression of their opinions and response to others, conduct peer and self-evaluation, using provided rubric and Artificial Intelligence (AI)-enabled evaluation to aid the giving and receiving of feedback. A total of 605 undergraduate students were part of a large-scale online course and contributed over 2500 short essays during a semester. The research design uses a mixed-methods approach, consisting qualitative measures used during essay coding, and quantitative methods from the application of machine learning algorithms. With limited instructors and resources, students first use instructor-developed rubric to conduct peer and self-assessment, while instructors qualitatively code a subset of essays that are used as inputs for training a machine learning model, which is subsequently used to provide automated scores and an accuracy rate for the remaining essays. With AI-enabled evaluation, the provision of feedback can become a sustainable process with students receiving and using meaningful feedback for their work, entailing shared responsibility from teachers and students, and becoming more effective.},
   keywords = {Artificial intelligence,Formative assessment,Machine learning,Online course,Peer and self-feedback},
@@ -2422,7 +2311,6 @@
   number = {3},
   pages = {18:1--18:19},
   doi = {10.1145/3277569},
-  url = {https://doi.org/10.1145/3277569},
   urldate = {2021-09-16},
   abstract = {As enrollments and class sizes in postsecondary institutions have increased, instructors have sought automated and lightweight means to identify students who are at risk of performing poorly in a course. This identification must be performed early enough in the term to allow instructors to assist those students before they fall irreparably behind. This study describes a modeling methodology that predicts student final exam scores in the third week of the term by using the clicker data that is automatically collected for instructors when they employ the Peer Instruction pedagogy. The modeling technique uses a support vector machine binary classifier, trained on one term of a course, to predict outcomes in the subsequent term. We applied this modeling technique to five different courses across the computer science curriculum, taught by three different instructors at two different institutions. Our modeling approach includes a set of strengths not seen wholesale in prior work, while maintaining competitive levels of accuracy with that work. These strengths include using a lightweight source of student data, affording early detection of struggling students, and predicting outcomes across terms in a natural setting (different final exams, minor changes to course content), across multiple courses in a curriculum, and across multiple institutions.},
   keywords = {at-risk students,clicker data,cross-term,machine learning,multi-institution,Peer instruction,prediction},
@@ -2449,7 +2337,6 @@
   publisher = {SAGE Publications Inc},
   issn = {0735-6331},
   doi = {10.1177/0735633117752614},
-  url = {https://doi.org/10.1177/0735633117752614},
   urldate = {2021-09-16},
   abstract = {Educational data mining constitutes a recent research field which gained popularity over the last decade because of its ability to monitor students' academic performance and predict future progression. Numerous machine learning techniques and especially supervised learning algorithms have been applied to develop accurate models to predict student's characteristics which induce their behavior and performance. In this work, we examine and evaluate the effectiveness of two wrapper methods for semisupervised learning algorithms for predicting the students' performance in the final examinations. Our preliminary numerical experiments indicate that the advantage of semisupervised methods is that the classification accuracy can be significantly improved by utilizing a few labeled and many unlabeled data for developing reliable prediction models.},
   langid = {english},
@@ -2483,7 +2370,6 @@
   number = {OOPSLA},
   pages = {152:1--152:28},
   doi = {10.1145/3360578},
-  url = {https://dl.acm.org/doi/10.1145/3360578},
   urldate = {2023-11-23},
   abstract = {Programmers often write code that has similarity to existing code written somewhere. A tool that could help programmers to search such similar code would be immensely useful. Such a tool could help programmers to extend partially written code snippets to completely implement necessary functionality, help to discover extensions to the partial code which are commonly included by other programmers, help to cross-check against similar code written by other programmers, or help to add extra code which would fix common mistakes and errors. We propose Aroma, a tool and technique for code recommendation via structural code search. Aroma indexes a huge code corpus including thousands of open-source projects, takes a partial code snippet as input, searches the corpus for method bodies containing the partial code snippet, and clusters and intersects the results of the search to recommend a small set of succinct code snippets which both contain the query snippet and appear as part of several methods in the corpus. We evaluated Aroma on 2000 randomly selected queries created from the corpus, as well as 64 queries derived from code snippets obtained from Stack Overflow, a popular website for discussing code. We implemented Aroma for 4 different languages, and developed an IDE plugin for Aroma. Furthermore, we conducted a study where we asked 12 programmers to complete programming tasks using Aroma, and collected their feedback. Our results indicate that Aroma is capable of retrieving and recommending relevant code snippets efficiently.},
   keywords = {clone detection,clustering,code recommendation,feature-based code representation,structural code search},
@@ -2501,7 +2387,6 @@
   primaryclass = {cs},
   publisher = {arXiv},
   doi = {10.48550/arXiv.2102.04664},
-  url = {http://arxiv.org/abs/2102.04664},
   urldate = {2023-11-23},
   abstract = {Benchmark datasets have a significant impact on accelerating research in programming language tasks. In this paper, we introduce CodeXGLUE, a benchmark dataset to foster machine learning research for program understanding and generation. CodeXGLUE includes a collection of 10 tasks across 14 datasets and a platform for model evaluation and comparison. CodeXGLUE also features three baseline systems, including the BERT-style, GPT-style, and Encoder-Decoder models, to make it easy for researchers to use the platform. The availability of such data and baselines can help the development and validation of new methods that can be applied to various program understanding and generation problems.},
   archiveprefix = {arxiv},
@@ -2521,7 +2406,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3576123.3576124},
-  url = {https://dl.acm.org/doi/10.1145/3576123.3576124},
   urldate = {2023-12-02},
   abstract = {Automated assessment is commonly used across the spectrum of computing courses offered by Tertiary institutions. Such assessment is frequently intended to address the scalability of feedback that is essential for learning, and assessment for accreditation purposes. Although many reviews of automated assessment have been reported, the voices of teachers are not present. In this paper we present a variety of cases that illustrate some of the varied motivations and experiences of teaching using automated assessment.},
   isbn = {978-1-4503-9941-8},
@@ -2541,7 +2425,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3293881.3295779},
-  url = {https://doi.org/10.1145/3293881.3295779},
   urldate = {2022-02-25},
   abstract = {As computing becomes a mainstream discipline embedded in the school curriculum and acts as an enabler for an increasing range of academic disciplines in higher education, the literature on introductory programming is growing. Although there have been several reviews that focus on specific aspects of introductory programming, there has been no broad overview of the literature exploring recent trends across the breadth of introductory programming. This paper is the report of an ITiCSE working group that conducted a systematic review in order to gain an overview of the introductory programming literature. Partitioning the literature into papers addressing the student, teaching, the curriculum, and assessment, we explore trends, highlight advances in knowledge over the past 15 years, and indicate possible directions for future research.},
   isbn = {978-1-4503-6223-8},
@@ -2559,7 +2442,6 @@
   pages = {101755},
   issn = {2352-7110},
   doi = {10.1016/j.softx.2024.101755},
-  url = {https://www.sciencedirect.com/science/article/pii/S2352711024001262},
   urldate = {2024-05-13},
   abstract = {Source code plagiarism is a significant issue in educational practice, and educators need user-friendly tools to cope with such academic dishonesty. This article introduces the latest version of Dolos, a state-of-the-art ecosystem of tools for detecting and preventing plagiarism in educational source code. In this new version, the primary focus has been on enhancing the user experience. Educators can now run the entire plagiarism detection pipeline from a new web app in their browser, eliminating the need for any installation or configuration. Completely redesigned analytics dashboards provide an instant assessment of whether a collection of source files contains suspected cases of plagiarism and how widespread plagiarism is within the collection. The dashboards support hierarchically structured navigation to facilitate zooming in and out of suspect cases. Clusters are an essential new component of the dashboard design, reflecting the observation that plagiarism can occur among larger groups of students. To meet various user needs, the Dolos software stack for source code plagiarism detection now includes a self-hostable web app, a JSON application programming interface (API), a command line interface (CLI), a JavaScript library and a preconfigured Docker container. Clear documentation and a free-to-use instance of the web app can be found at https://dolos.ugent.be. The source code is also available on GitHub.},
   keywords = {Academic dishonesty,Cheating,Educational data mining,Learning analytics,Online learning,Plagiarism,Programming language,Source code,Web app},
@@ -2574,7 +2456,6 @@
   journal = {{Journal of Computer Assisted Learning}},
   issn = {1365-2729},
   doi = {10.1111/jcal.12662},
-  url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jcal.12662},
   urldate = {2022-03-25},
   abstract = {Background Learning to code is increasingly embedded in secondary and higher education curricula, where solving programming exercises plays an important role in the learning process and in formative and summative assessment. Unfortunately, students admit that copying code from each other is a common practice and teachers indicate they rarely use plagiarism detection tools. Objectives We want to lower the barrier for teachers to detect plagiarism by introducing a new source code plagiarism detection tool (Dolos) that is powered by state-of-the art similarity detection algorithms, offers interactive visualizations, and uses generic parser models to support a broad range of programming languages. Methods Dolos is compared with state-of-the-art plagiarism detection tools in a benchmark based on a standardized dataset. We describe our experience with integrating Dolos in a programming course with a strong focus on online learning and the impact of transitioning to remote assessment during the COVID-19 pandemic. Results and Conclusions Dolos outperforms other plagiarism detection tools in detecting potential cases of plagiarism and is a valuable tool for preventing and detecting plagiarism in online learning environments. It is available under the permissive MIT open-source license at https://dolos.ugent.be. Implications Dolos lowers barriers for teachers to discover, prove and prevent plagiarism in programming courses. This helps to enable a shift towards open and online learning and assessment environments, and opens up interesting avenues for more effective learning and better assessment.},
   langid = {english},
@@ -2594,7 +2475,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3587103.3594166},
-  url = {https://dl.acm.org/doi/10.1145/3587103.3594166},
   urldate = {2023-11-16},
   abstract = {With the increasing demand for programming skills comes a trend towards more online programming courses and assessments. While this allows educators to teach larger groups of students, it also opens the door to dishonest student behaviour, such as copying code from other students. When teachers use assignments where all students write code for the same problem, source code similarity tools can help to combat plagiarism. Unfortunately, teachers often do not use these tools to prevent such behaviour. In response to this challenge, we have developed a new source code plagiarism detection tool named Dolos. Dolos is open-source, supports a wide range of programming languages, and is designed to be user-friendly. It enables teachers to detect, prove and prevent plagiarism in programming courses by using fast algorithms and powerful visualisations. We present further enhancements to Dolos and discuss how it can be integrated into modern computing education courses to meet the challenges of online learning and assessment. By lowering the barriers for teachers to detect, prove and prevent plagiarism in programming courses, Dolos can help protect academic integrity and ensure that students earn their grades honestly.},
   isbn = {9798400701399},
@@ -2614,7 +2494,6 @@
   pages = {285--305},
   issn = {2211-1670},
   doi = {10.1007/s10758-016-9286-8},
-  url = {https://doi.org/10.1007/s10758-016-9286-8},
   urldate = {2024-02-14},
   abstract = {Learning analytics and digital badges are emerging research fields in educational science. They both show promise for enhancing student retention in higher education, where withdrawals prior to degree completion remain at about 30~\% in Organisation for Economic Cooperation and Development member countries. This integrative review provides an overview of the theoretical literature as well as current practices and experience with learning analytics and digital badges in higher education with regard to their potential impact on student retention to enhance students' first-year experience. Learning analytics involves measuring and analyzing dynamic student data in order to gain insight into students' learning processes and optimize learning and teaching. One purpose of learning analytics is to construct predictive models to identify students who risk failing a course and thus are more likely to drop out of higher education. Personalized feedback provides students with information about academic support services, helping them to improve their skills and therefore be successful in higher education. Digital badges are symbols for certifying knowledge, skills, and competencies on web-based platforms. The intention is to encourage student persistence by motivating them, recognizing their generic skills, signaling their achievements, and capturing their learning paths. This article proposes a model that synthesizes learning analytics, digital badges, and generic skills such as academic competencies. The main idea is that generic skills can be represented as digital badges, which can be used for learning analytics algorithms to predict student success and to provide students with personalized feedback for improvement. Moreover, this model may serve as a platform for discussion and further research on learning analytics and digital badges to increase student retention in higher education.},
   langid = {english},
@@ -2632,7 +2511,6 @@
   number = {4},
   pages = {16:1--16:15},
   doi = {10.1145/1868358.1868363},
-  url = {https://dl.acm.org/doi/10.1145/1868358.1868363},
   urldate = {2024-02-21},
   abstract = {Scratch is a visual programming environment that allows users (primarily ages 8 to 16) to learn computer programming while working on personally meaningful projects such as animated stories and games. A key design goal of Scratch is to support self-directed learning through tinkering and collaboration with peers. This article explores how the Scratch programming language and environment support this goal.},
   keywords = {programming environment,programming language,Scratch,visual programming language},
@@ -2652,7 +2530,6 @@
   publisher = {SAGE Publications Ltd},
   issn = {0004-9441},
   doi = {10.1177/0004944116664618},
-  url = {https://doi.org/10.1177/0004944116664618},
   urldate = {2022-08-16},
   abstract = {This article provides a meta-analysis of experimental research findings on the existence of bias in subjective grading of student work such as essay writing. Twenty-three analyses, from 20 studies, with a total of 1935 graders, met the inclusion criteria for the meta-analysis. All studies involved graders being exposed to a specific type of information about a student other than the student's performance on a task. The hypothesized biasing characteristics included different race/ethnic backgrounds, education-related deficiencies, physical unattractiveness and poor quality of prior performance. The statistically significant overall between-groups effect size was g\,=\,0.36. Moderator analyses showed no significant difference in effect size related to whether the work graded was from a primary school student or a university student. No one type of biasing characteristic showed a significantly higher effect size than other types. The results suggest that bias can occur in subjective grading when graders are aware of irrelevant information about the students.},
   langid = {english},
@@ -2671,7 +2548,6 @@
   publisher = {SAGE Publications Inc},
   issn = {0098-6283},
   doi = {10.1177/0098628313487425},
-  url = {https://doi.org/10.1177/0098628313487425},
   urldate = {2022-08-16},
   abstract = {Experts have advocated anonymous grading as a means of eliminating actual or perceived evaluator bias in subjective student assessment. The utility of anonymity in assessment rests on whether information derived from student identity can unduly influence evaluation. The halo effect provides a conceptual background for why a bias might occur. In the present study examining the halo effect, psychology faculty members and teaching assistants were randomly assigned to grade a student giving a poor oral presentation or the same student giving a good oral presentation. All graders then assessed an unrelated piece of written work by the student. As hypothesized, the graders assigned significantly higher scores to written work following the better oral presentation. The results provide strong evidence of a halo effect in that prior experience with a student biased the grading of written work completed by the student. The findings suggest the need to keep students anonymous when feasible in order to minimize the risk of unfair grading.},
   langid = {english},
@@ -2687,7 +2563,6 @@
   pages = {176--183},
   publisher = {SciTePress},
   doi = {10.5220/0004842801760183},
-  url = {https://upcommons.upc.edu/handle/2117/28174},
   urldate = {2022-08-16},
   copyright = {Open Access},
   isbn = {978-989-758-021-5},
@@ -2709,7 +2584,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/1067445.1067451},
-  url = {https://doi.org/10.1145/1067445.1067451},
   urldate = {2022-02-25},
   abstract = {The complexity of languages like Java and C++ can make introductory programming classes in these languages extremely challenging for many students. Part of the complexity comes from the large number of concepts and language features that students are expected to learn while having little time for adequate practice or examples. A second source of difficulty is the emphasis that object-oriented programming places on abstraction. We believe that by placing a larger emphasis on testing in programming assignments in these introductory courses, students have an opportunity for extra practice with the language, and this affords them a gentler transition into the abstract thinking needed for programming. In this paper we describe how we emphasized testing in introductory programming assignments by requiring that students design and implement tests before starting on the program itself. We also provide some preliminary results and student reactions.},
   isbn = {978-1-59593-024-8},
@@ -2728,7 +2602,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3448139.3448160},
-  url = {https://doi.org/10.1145/3448139.3448160},
   urldate = {2022-09-15},
   abstract = {Large courses act as gateways for college students and often have poor outcomes, particularly in STEM fields where the pace of improvement has been glacial. Students encounter barriers to persistence like low grades, competitive cultures, and a lack of motivation and belonging. Tailored technology systems offer one promising path forward. In this observational study, we report on the use of one such system, called ECoach, that provides students resources based on their psychosocial profile, performance metrics, and pattern of ECoach usage. We investigated ECoach efficacy in five courses enrolling 3,599 students using a clustering method to group users by engagement level and subsequent regression analyses. We present results showing significant positive relationships with small effect sizes between ECoach engagement and final course grade as well as grade anomaly, a performance measure that takes into account prior course grades. The courses with the strongest relationship between ECoach engagement and performance offered nominal extra credit incentives yet show improved grades well above this ``investment'' from instructors. Such small incentives may act as a catalyst that spurs deeper engagement with the platform. The impact of specific ECoach features and areas for future study are discussed.},
   isbn = {978-1-4503-8935-8},
@@ -2763,7 +2636,6 @@
   number = {1},
   pages = {2:1--2:18},
   doi = {10.1145/2747008},
-  url = {https://doi.org/10.1145/2747008},
   urldate = {2021-09-30},
   abstract = {In this article, we address the question of why computing students choose to learn computing topics on their own. A better understanding of why some students choose to learn on their own may help us to motivate other students to develop this important skill. In addition, it may help in curriculum design; if we need to leave some topics out of our expanding curriculum, a good choice might be those topics that students readily learn on their own. Based on a thematic analysis of 17 semistructured interviews, we found that computing students' motivations for self-directed learning fall into four general themes: projects, social and peer interactions, joy of learning, and fear. Under these, we describe several more specific subthemes, illustrated in the words of the students. The project-related and social motivations are quite prominent. Although these motivations appear in the literature, they received greater emphasis from our interviewees. Perhaps most characteristic of computing is the motivation to learn to complete some project, both projects done for fun and projects required for school or work.},
   keywords = {informal learning,Motivation,self-directed learning},
@@ -2781,7 +2653,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/572133.572137},
-  url = {https://doi.org/10.1145/572133.572137},
   urldate = {2022-02-24},
   abstract = {In computer science, an expected outcome of a student's education is programming skill. This working group investigated the programming competency students have as they complete their first one or two courses in computer science. In order to explore options for assessing students, the working group developed a trial assessment of whether students can program. The underlying goal of this work was to initiate dialog in the Computer Science community on how to develop these types of assessments. Several universities participated in our trial assessment and the disappointing results suggest that many students do not know how to program at the conclusion of their introductory courses. For a combined sample of 216 students from four universities, the average score was 22.89 out of 110 points on the general evaluation criteria developed for this study. From this trial assessment we developed a framework of expectations for first-year courses and suggestions for further work to develop more comprehensive assessments.},
   isbn = {978-1-4503-7359-3},
@@ -2800,7 +2671,6 @@
   publisher = {Routledge},
   issn = {1539-1523},
   doi = {10.1080/15391523.2010.10782552},
-  url = {https://doi.org/10.1080/15391523.2010.10782552},
   urldate = {2021-04-30},
   abstract = {This study examined technology implementation practices associated with student learning gains. Interviews and observations were conducted with staff at schools where teachers using reading or mathematics software with their students attained above-average achievement gains and at schools where software-using teachers had below-average gains. The findings highlight the importance of school practices in the areas of principal support and teacher collaboration around software use and of teacher practices concerning classroom management and use of software-generated student performance data. The issues of instructional coherence and competition for instructional time are highlighted as challenges to software implementation.},
   keywords = {implementation,software,Technology},
@@ -2831,7 +2701,6 @@
   journal = {{arXiv:1908.09635 [cs]}},
   eprint = {1908.09635},
   primaryclass = {cs},
-  url = {http://arxiv.org/abs/1908.09635},
   urldate = {2021-04-30},
   abstract = {With the widespread use of AI systems and applications in our everyday lives, it is important to take fairness issues into consideration while designing and engineering these types of systems. Such systems can be used in many sensitive environments to make important and life-changing decisions; thus, it is crucial to ensure that the decisions do not reflect discriminatory behavior toward certain groups or populations. We have recently seen work in machine learning, natural language processing, and deep learning that addresses such challenges in different subdomains. With the commercialization of these systems, researchers are becoming aware of the biases that these applications can contain and have attempted to address them. In this survey we investigated different real-world applications that have shown biases in various ways, and we listed different sources of biases that can affect AI applications. We then created a taxonomy for fairness definitions that machine learning researchers have defined in order to avoid the existing bias in AI systems. In addition to that, we examined different domains and subdomains in AI showing what researchers have observed with regard to unfair outcomes in the state-of-the-art methods and how they have tried to address them. There are still many future directions and solutions that can be taken to mitigate the problem of bias in AI systems. We are hoping that this survey will motivate researchers to tackle these issues in the near future by observing existing work in their respective fields.},
   archiveprefix = {arxiv},
@@ -2865,7 +2734,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3472673.3473958},
-  url = {https://doi.org/10.1145/3472673.3473958},
   urldate = {2022-07-06},
   abstract = {Research on source code mining has been explored to discover interesting structural regularities, API usage patterns, refactoring opportunities, bugs, crosscutting concerns, code clones and systematic changes. In this paper we present a pattern mining algorithm that uses frequent tree mining to mine for interesting good, bad or ugly coding idioms made by undergraduate students taking an introductory programming course. We do so by looking for patterns that distinguish positive examples, corresponding to the more correct answers to a question, from negative examples, corresponding to solutions that failed the question. We report promising initial results of this algorithm applied to the source code of over 500 students. Even though more work is needed to fine-tune and validate the algorithm further, we hope that it can lead to interesting insights that can eventually be integrated into an intelligent recommendation system to help students learn from their errors.},
   isbn = {978-1-4503-8624-1},
@@ -2921,7 +2789,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3545945.3569734},
-  url = {https://dl.acm.org/doi/10.1145/3545945.3569734},
   urldate = {2024-01-05},
   abstract = {Automated programming assignment grading tools have become integral to CS courses at introductory as well as advanced levels. However such tools have their own custom approaches to setting up assignments and describing how solutions should be tested, requiring instructors to make a significant learning investment to begin using a new tool. In addition, differences between tools mean that initial investment must be repeated when switching tools or adding a new one. Worse still, tool-specific strategies further reduce the ability of educators to share and reuse their assignments. This paper describes an early experiences with PEML, the Programming Exercise Markup Language, which provides an easy to use, instructor friendly approach for writing programming assignments. Unlike tool-oriented data interchange formats, PEML is designed to provide a human friendly authoring format that has been developed to be intuitive, expressive and not be a technological or notational barrier to instructors. We describe the design and implementation of PEML, both as a programming library and also a public-access web microservice that provides full parsing and rendering capabilities for easy integration into any tools or scripting libraries. We also describe experiences using PEML to describe a full range of programming assignments, laboratory exercises, and small coding questions of varying complexity in demonstrating the practicality of the notation. The aim is to develop PEML as a community resource to reduce the barriers to entry for automated assignment tools while widening the scope of programming assignment sharing and reuse across courses and institutions.},
   isbn = {978-1-4503-9431-4},
@@ -2947,7 +2814,6 @@
   title = {Interpretable {{Machine Learning}}},
   author = {Molnar, Christoph},
   year = {2019},
-  url = {https://christophm.github.io/interpretable-ml-book/},
   urldate = {2021-08-24},
   abstract = {Machine learning algorithms usually operate as black boxes and it is unclear how they derived a certain decision. This book is a guide for practitioners to make machine learning decisions interpretable.},
   file = {/home/charlotte/sync/Zotero/storage/HV4YWAEG/interpretable-ml-book.html}
@@ -2964,7 +2830,6 @@
   pages = {100086},
   issn = {2666-5573},
   doi = {10.1016/j.caeo.2022.100086},
-  url = {https://www.sciencedirect.com/science/article/pii/S2666557322000143},
   urldate = {2024-02-22},
   abstract = {Feedback has been recognized as a crucial element in the learning and teaching process. Although teachers know and accept this, they are not always eager to engage in this tedious and time-consuming activity. This study investigates how computers can work together with teachers to make the process of giving feedback more efficient by introducing a semi-automated approach (SA) with reusable feedback: when a teacher writes feedback for a student, the computer saves it, so it can be reused when following students make similar mistakes. We devised the concept of atomic feedback, a set of form requirements that could enhance feedback's reusability. To write atomic feedback, teachers have to identify the independent errors and write brief feedback items for each separate error. Our SA approach with reusable feedback was implemented in Moodle. During a crossover experiment with math teachers (n~=~36~+~9 in pilot study), we examined (1) whether SA saves time or changes the amount of feedback, as compared to traditional, paper-based correction work, (2) the extent to which the feedback was atomic, (3) whether atomic feedback enhances the reusability of feedback and (4) how teachers used and perceived the SA system. In light of the results, which suggest that atomic feedback is indeed reusable, we propose formal requirements for writing reusable feedback. Nevertheless, teachers did not save time using the SA system, but they provided significantly more feedback.},
   keywords = {Architectures for educational technology system,Distributed learning environments,Evaluation methodologies,Human-computer interface,Improving classroom teaching},
@@ -2979,7 +2844,6 @@
   number = {n/a},
   issn = {1467-8535},
   doi = {10.1111/bjet.13447},
-  url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/bjet.13447},
   urldate = {2024-03-05},
   abstract = {In this crossover experiment, we investigated the impact of a statement bank, enabling the reuse of previously written feedback (SA condition), on 45 math teachers' feedback for 60 completed linear equation tests, compared to traditional pen-and-paper feedback (PP condition). In the SA condition, teachers were encouraged to use atomic feedback, a set of formulation requirements that makes feedback items significantly more reusable. A previous study found that significantly more feedback was written in the SA condition but did not investigate the content of the feedback. To address this gap, we employed a novel approach of combining text mining with qualitative methods. Results indicate similar wording and sentiments in both conditions. However, SA feedback was more elaborate yet general, focusing on major and minor strengths and deficits, while PP feedback was shorter but more concrete, emphasising main issues. Despite low feedback quality in both conditions, the statement bank led to less effective diagnostic activities, implying that teachers' careless use of statement banks, although convenient, might lead to lower-quality feedback. Practitioner notes What is already known about this topic High-quality feedback should strike a balance between the volume and focus on the main issues, as more feedback does not necessarily equate to better feedback. Feedback should analyse a student's solution whenever possible: interpreting mistakes and communicating that interpretation as feedback. Text mining identifies meaningful patterns and new insights in text using computer algorithms. When teachers can reuse already given feedback using a software tool (statement bank), they tend to write more feedback instead of saving time. What this paper adds Feedback is compared when teachers could use a tool to reuse already given feedback (referred to as `statement banks') versus a scenario without such a tool. Both approaches observed similar word frequencies, sentiments and amounts of erroneous, descriptive and corrective feedback. However, feedback with a statement bank tended to be more elaborate yet less specific to individual student solutions. In contrast, feedback without the tool was shorter but more concrete, focusing on main issues. Overall, the tool for reusing feedback directed teachers towards less effective diagnostic activities. The paper introduces a novel methodological approach by combining text mining with qualitative techniques in educational research. While text mining provides an overall understanding of differences and similarities in feedback approaches, qualitative methods are essential for in-depth analysis of content characteristics and feedback quality. Implications for practice and/or policy Statement banks can support teachers by giving more feedback, but in order to improve feedback quality, further measures are necessary (eg, improving pedagogical content knowledge). Teachers may not confuse handiness with quality: statement banks can help, but when used carelessly, teachers tend to describe and correct students' work instead of analysing underlying (mis-)conceptions using it. Continued attention to feedback quality remains necessary when using such tools.},
   copyright = {{\copyright} 2024 The Authors. British Journal of Educational Technology published by John Wiley \& Sons Ltd on behalf of British Educational Research Association.},
@@ -2999,7 +2863,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3183440.3183453},
-  url = {https://doi.org/10.1145/3183440.3183453},
   urldate = {2022-06-30},
   abstract = {As more aspects of our daily lives rely on technology, the software that enables the technology must be secure. Developers rely on practices such as threat modeling, static and dynamic analyses, code review, and fuzz and penetration testing to engineer secure software. These practices, while effective at identifying vulnerabilities in software, are limited in their ability to describe the potential reasons for the existence of vulnerabilities. In order to overcome this limitation, researchers have proposed empirically validated metrics to identify factors that may have led to the introduction of vulnerabilities in the past. Developers must be made aware of these factors so that they can proactively consider the security implications of each line of code that they contribute. The goal of our research is to assist developers in engineering secure software by providing a technique that generates scientific, interpretable, and actionable feedback on security as the software evolves. In this paper, we provide an overview of our proposed approach to accomplish this research goal through a series of three research studies in which we (1) systematize the knowledge on vulnerability discovery metrics, (2) leverage the metrics to generate feedback on security, and (3) implement a framework for providing automatically generated feedback on security using code reviews as a medium.},
   isbn = {978-1-4503-5663-3},
@@ -3017,7 +2880,6 @@
   pages = {251--266},
   issn = {1432-0541},
   doi = {10.1007/BF01840446},
-  url = {https://doi.org/10.1007/BF01840446},
   urldate = {2022-08-16},
   abstract = {The problems of finding a longest common subsequence of two sequencesA andB and a shortest edit script for transformingA intoB have long been known to be dual problems. In this paper, they are shown to be equivalent to finding a shortest/longest path in an edit graph. Using this perspective, a simpleO(ND) time and space algorithm is developed whereN is the sum of the lengths ofA andB andD is the size of the minimum edit script forA andB. The algorithm performs well when differences are small (sequences are similar) and is consequently fast in typical applications. The algorithm is shown to haveO(N+D2) expected-time performance under a basic stochastic model. A refinement of the algorithm requires onlyO(N) space, and the use of suffix trees leads to anO(N logN+D2) time variation.},
   langid = {english},
@@ -3036,7 +2898,6 @@
   publisher = {Routledge},
   issn = {0158-7919},
   doi = {10.1080/01587919.2012.667957},
-  url = {https://doi.org/10.1080/01587919.2012.667957},
   urldate = {2022-08-16},
   abstract = {Fully online courses are becoming progressively more popular because of their ``anytime anywhere'' learning flexibility. One of the ways students interact with each other and with the instructors within fully online learning environments is via asynchronous discussion forums. However, student engagement in online discussion forums does not always take place automatically and there is a lack of clarity about the ideal role of the instructors in them. In this article, we report on our research on the quality of discussion in fully online courses through analysis of discussion forum activities. We have conducted our research on two large fully online subjects for computing students over two consecutive semesters and used a grounded theoretic approach for data analysis. Our results reveal what students and instructors consider as quality interaction in fully online courses. We also propose two frameworks based on our findings that can be used to ensure effective online interaction.},
   keywords = {asynchronous discussion forums,fully online course,quality framework}
@@ -3053,7 +2914,6 @@
   pages = {177--188},
   issn = {0006-3835},
   doi = {10.1007/BF01956028},
-  url = {https://doi.org/10.1007/BF01956028},
   urldate = {2024-02-06},
   abstract = {The report describes an experiment on automatic grading of student algorithms, using an ALGOL compiler. The experiment is based on an evaluation of the efficiency and logical completeness of the algorithms, not on their formal correctness, which is supposed to be checked in advance by the individual student. The technique used is to embed the student algorithms within a larger grading program structure, which supplies test cases and performs checks and evaluation. The complete text of such a grading program is given. The experience gained through the experiment, and suggestions for further developments, are discussed.},
   keywords = {Complete Text,Computational Mathematic,Formal Correctness,Individual Student,Program Structure},
@@ -3071,7 +2931,6 @@
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/2659532.2659628},
-  url = {https://doi.org/10.1145/2659532.2659628},
   urldate = {2021-02-22},
   abstract = {Although interactive e-learning environments are increasingly used in university courses, traditional types of examination still dominate the way how students are assessed for grades. In this paper, we examined how student data from online interactive learning environment for programming exercises can be used for summative assessment at the end of the course. Using data from three different university courses, we calibrated the two parameter logistic regression model, ranked students according to their ability of solving problems, and matched them to final grades. Results indicate we can predict grades within 0.57 to 1.02 level of accuracy, suggesting that careful use of interactive e-learning environments in university courses can substitute existing assessment methods, opening further possibilities for innovation in instructional process.},
   isbn = {978-1-4503-2753-4},
@@ -3087,7 +2946,6 @@
   journal = {{ACM Transactions on Computing Education (TOCE)}},
   publisher = {ACM},
   doi = {10.1145/3517133},
-  url = {https://dl.acm.org/doi/abs/10.1145/3517133},
   urldate = {2022-04-01},
   abstract = {In many countries, computer programming is becoming an integral part of the secondary school curriculum. However, many teachers, especially in the first years of Flemish secondary school, have limited experience with teaching programming. To improve their ...},
   langid = {english},
@@ -3108,7 +2966,6 @@ New York, NY},
   publisher = {The University of Chicago Press},
   issn = {0013-5984},
   doi = {10.1086/461747},
-  url = {https://www.journals.uchicago.edu/doi/abs/10.1086/461747},
   urldate = {2022-08-16},
   abstract = {This study investigated students' perceptions of their teachers and classmates in relation to reported academic help seeking. 177 students at grades 3, 5, and 7 were interviewed individually using a structured questionnaire to assess who, why, and in what situations they asked for help when they had problems in math class. Results indicated that students generally preferred the teacher to classmates as helpers and saw the teacher, in comparison to classmates, as more likely to facilitate learning and less likely to think they were "dumb" for asking questions. Several grade-related differences emerged. Fifth and seventh graders' help-seeking intentions reflected more concern about social comparison than did third graders'. At seventh grade only, a concern that the teacher might think students were "dumb" for asking questions was negatively related to the self-reported likelihood of seeking help. Perceptions of teacher support varied with grade level. Although perception of a strong personal relationship with the teacher was associated with students' intentions to seek help at all grades, perception of teacher encouragement of questioning was related only at fifth and seventh grades.}
 }
@@ -3126,7 +2983,6 @@ New York, NY},
   publisher = {Routledge},
   issn = {0307-5079},
   doi = {10.1080/03075070600572090},
-  url = {https://doi.org/10.1080/03075070600572090},
   urldate = {2022-02-21},
   abstract = {The research on formative assessment and feedback is reinterpreted to show how these processes can help students take control of their own learning, i.e. become self-regulated learners. This reformulation is used to identify seven principles of good feedback practice that support self-regulation. A key argument is that students are already assessing their own work and generating their own feedback, and that higher education should build on this ability. The research underpinning each feedback principle is presented, and some examples of easy-to-implement feedback strategies are briefly described. This shift in focus, whereby students are seen as having a proactive rather than a reactive role in generating and using feedback, has profound implications for the way in which teachers organise assessments and support learning.},
   file = {/home/charlotte/sync/Zotero/storage/KPAM4LT7/Nicol and Macfarlane‐Dick - 2006 - Formative assessment and self‐regulated learning .pdf;/home/charlotte/sync/Zotero/storage/P2LCKCBM/03075070600572090.html}
@@ -3149,7 +3005,6 @@ New York, NY},
   author = {Nievergelt, J.},
   year = {1976},
   month = aug,
-  url = {https://eric.ed.gov/?id=ED134229},
   urldate = {2024-02-07},
   abstract = {The Automated Computer Science Educational System (ACSES) has been developed at the University of Illinois for the purpose of providing improved education for the large number of students taking introductory computer science courses. The major components of this system are: a library of instructional lessons, an interactive programing system with excellent error diagnostics, an information retrieval system, an automated exam and quiz system, and several lessons which judge student programs. This report briefly describes each of these components, as well as some ideas on programing language design resulting from this experience, and presents an evaluation of the use of the system over the past three years. (Author)},
   langid = {english},
@@ -3171,7 +3026,6 @@ New York, NY},
   publisher = {American Society of Civil Engineers},
   issn = {1052-3928},
   doi = {10.1061/(ASCE)1052-3928(2007)133:1(31)},
-  url = {https://ascelibrary.org/doi/abs/10.1061/%28ASCE%291052-3928%282007%29133%3A1%2831%29},
   urldate = {2021-10-01},
   abstract = {In this case study, we present a teaching approach that promotes active learning in engineering classes. Students are provided with a combination of physical, mathematical, and computer simulation models that allow them to participate, act, react, and reflect, rather than just listen to lectures, as in traditional classes. We illustrate important aspects of our pedagogy with one of 12 modules that have been implemented in an undergraduate hydraulic engineering course. Student evaluations indicate that this approach is appealing to most students. Data are presented to show that this approach has contributed to improved student learning and achievement.},
   langid = {english},
@@ -3190,7 +3044,6 @@ New York, NY},
   pages = {437--461},
   issn = {2073-4859},
   doi = {10.32614/RJ-2020-007},
-  url = {https://journal.r-project.org/archive/2020/RJ-2020-007/index.html},
   urldate = {2021-02-26},
   langid = {english},
   file = {/home/charlotte/sync/Zotero/storage/ACQBCGCE/Nüst et al. - 2020 - The Rockerverse Packages and Applications for Con.pdf;/home/charlotte/sync/Zotero/storage/6JE8GK6V/index.html}
@@ -3213,8 +3066,7 @@ New York, NY},
   type = {Doi:{{https://doi.org/10.1787/589b283f-en}}},
   title = {{{OECD Digital Education Outlook}} 2021},
   author = {OECD},
-  year = {2021},
-  url = {https://www.oecd-ilibrary.org/content/publication/589b283f-en}
+  year = {2021}
 }
 
 @misc{oreillyWhatWebDesign2007,
@@ -3226,7 +3078,6 @@ New York, NY},
   month = aug,
   number = {1008839},
   address = {Rochester, NY},
-  url = {https://papers.ssrn.com/abstract=1008839},
   urldate = {2024-02-08},
   abstract = {This paper was the first initiative to try to define Web 2.0 and understand its implications for the next generation of software, looking at both design patterns and business modes. Web 2.0 is the network as platform, spanning all connected devices; Web 2.0 applications are those that make the most of the intrinsic advantages of that platform: delivering software as a continually-updated service that gets better the more people use it, consuming and remixing data from multiple sources, including individual users, while providing their own data and services in a form that allows remixing by others, creating network effects through an architecture of participation, and going beyond the page metaphor of Web 1.0 to deliver rich user experiences.},
   langid = {english},
@@ -3255,7 +3106,6 @@ New York, NY},
   number = {2},
   pages = {168--175},
   publisher = {{International Society for Exploring Teaching and Learning}},
-  url = {https://eric.ed.gov/?id=EJ1111131},
   urldate = {2024-01-10},
   abstract = {This article provides an evaluation of the redesign of a research methods course intended to enhance students' learning for understanding and transfer. Drawing on principles of formative assessment from the existing academic literature, the instructor introduced a number of increasingly complex low-stakes assignments for students to complete prior to submitting their final project. Concrete, constructive feedback from either the instructor or peers or both was offered at each stage of the project so that students could have the opportunity to review their work and improve particular aspects prior to moving on to the next assignment. Student performance on each subsequent submission was assessed through the use of a scoring rubric. Although there was significant improvement from one draft of a given assignment (T1) to the next (T2), the instructor's decision not to require a preliminary draft of the final project ultimately yielded mixed results at the end of the course (T3); this serves to highlight the importance of providing multiple active learning opportunities for students by using a progressive scaffolding approach.},
   langid = {english},
@@ -3275,7 +3125,6 @@ New York, NY},
   number = {3},
   pages = {34:1--34:40},
   doi = {10.1145/3513140},
-  url = {https://doi.org/10.1145/3513140},
   urldate = {2022-08-16},
   abstract = {Practical programming competencies are critical to the success in computer science (CS) education and go-to-market of fresh graduates. Acquiring the required level of skills is a long journey of discovery, trial and error, and optimization seeking through a broad range of programming activities that learners must perform themselves. It is not reasonable to consider that teachers could evaluate all attempts that the average learner should develop multiplied by the number of students enrolled in a course, much less in a timely, deep, and fair fashion. Unsurprisingly, exploring the formal structure of programs to automate the assessment of certain features has long been a hot topic among CS education practitioners. Assessing a program is considerably more complex than asserting its functional correctness, as the proliferation of tools and techniques in the literature over the past decades indicates. Program efficiency, behavior, and readability, among many other features, assessed either statically or dynamically, are now also relevant for automatic evaluation. The outcome of an evaluation evolved from the primordial Boolean values to information about errors and tips on how to advance, possibly taking into account similar solutions. This work surveys the state of the art in the automated assessment of CS assignments, focusing on the supported types of exercises, security measures adopted, testing techniques used, type of feedback produced, and the information they offer the teacher to understand and optimize learning. A new era of automated assessment, capitalizing on static analysis techniques and containerization, has been identified. Furthermore, this review presents several other findings from the conducted review, discusses the current challenges of the field, and proposes some future research directions.},
   keywords = {Automated assessment,computer science,feedback,learning analytics,programming},
@@ -3294,7 +3143,6 @@ New York, NY},
   publisher = {Multidisciplinary Digital Publishing Institute},
   issn = {2078-2489},
   doi = {10.3390/info13020045},
-  url = {https://www.mdpi.com/2078-2489/13/2/45},
   urldate = {2023-10-02},
   abstract = {E-learning tools are gaining increasing relevance as facilitators in the task of learning how to program. This is mainly a result of the pandemic situation and consequent lockdown in several countries, which forced distance learning. Instant and relevant feedback to students, particularly if coupled with gamification, plays a pivotal role in this process and has already been demonstrated as an effective solution in this regard. However, teachers still struggle with the lack of tools that can adequately support the creation and management of online gamified programming courses. Until now, there was no software platform that would be simultaneously open-source and general-purpose (i.e., not integrated with a specific course on a specific programming language) while featuring a meaningful selection of gamification components. Such a solution has been developed as a part of the Framework for Gamified Programming Education (FGPE) project. In this paper, we present its two front-end components: FGPE AuthorKit and FGPE PLE, explain how they can be used by teachers to prepare and manage gamified programming courses, and report the results of the usability evaluation by the teachers using the platform in their classes.},
   copyright = {http://creativecommons.org/licenses/by/3.0/},
@@ -3314,7 +3162,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3059009.3059026},
-  url = {https://dl.acm.org/doi/10.1145/3059009.3059026},
   urldate = {2023-10-05},
   abstract = {We present GradeIT, a system that combines the dual objectives of automated grading and program repairing for introductory programming courses (CS1). Syntax errors pose a significant challenge for testcase-based grading as it is difficult to differentiate between a submission that is almost correct and has some minor syntax errors and another submission that is completely off-the-mark. GradeIT also uses program repair to help in grading submissions that do not compile. This enables running testcases on submissions containing minor syntax errors, thereby awarding partial marks for these submissions (which, without repair, do not compile successfully and, hence, do not pass any testcase). Our experiments on 15613 submissions show that GradeIT results are comparable to manual grading by teaching assistants (TAs), and do not suffer from unintentional variability that happens when multiple TAs grade the same assignment. The repairs performed by GradeIT enabled successful compilation of 56\% of the submissions having compilation errors, and resulted in an improvement in marks for 11\% of these submissions.},
   isbn = {978-1-4503-4704-4},
@@ -3334,7 +3181,6 @@ New York, NY},
   pages = {122--126},
   issn = {0097-8418},
   doi = {10.1145/169073.169362},
-  url = {https://dl.acm.org/doi/10.1145/169073.169362},
   urldate = {2022-02-24},
   langid = {english},
   file = {/home/charlotte/sync/Zotero/storage/PJBF6F66/Pattis - 1993 - The “procedures early” approach in CS 1 a heresy.pdf}
@@ -3360,7 +3206,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3340531.3412026},
-  url = {https://doi.org/10.1145/3340531.3412026},
   urldate = {2022-07-01},
   abstract = {Hierarchically structured data are commonly represented as trees and have given rise to popular data formats like XML or JSON. An interesting query computes the difference between two versions of a tree, expressed as the minimum set of node edits (deletion, insertion, label rename) that transform one tree into another, commonly known as the tree edit distance. Unfortunately, the fastest tree edit distance algorithms run in cubic time and quadratic space and are therefore not feasible for large inputs. In this paper, we leverage the fact that the difference between two versions of a tree is typically much smaller than the overall tree size. We propose a new tree edit distance algorithm that is linear in the tree size for similar trees. Our algorithm is based on the new concept of top node pairs and avoids redundant distance computations, the main issue with previous solutions for tree diffs. We empirically evaluate the runtime of our algorithm on large synthetic and real-world trees; our algorithm clearly outperforms the state of the art, often by orders of magnitude.},
   isbn = {978-1-4503-6859-9},
@@ -3377,7 +3222,6 @@ New York, NY},
   volume = {12},
   number = {85},
   pages = {2825--2830},
-  url = {http://jmlr.org/papers/v12/pedregosa11a.html},
   urldate = {2021-04-01},
   abstract = {Scikit-learn is a Python module integrating a wide range of state-of-the-art machine learning algorithms for medium-scale supervised and unsupervised problems. This package focuses on bringing machine learning to non-specialists using a general-purpose high-level language.  Emphasis is put on ease of use, performance, documentation, and API consistency.  It has minimal dependencies and is distributed under the simplified BSD license, encouraging its use in both academic and commercial settings.  Source code, binaries, and documentation can be downloaded from http://scikit-learn.sourceforge.net.},
   file = {/home/charlotte/sync/Zotero/storage/BSENXYMJ/Pedregosa et al. - 2011 - Scikit-learn Machine Learning in Python.pdf}
@@ -3387,8 +3231,8 @@ New York, NY},
   title = {Teach {{Yourself Programming}} in {{Ten Years}}},
   author = {{Peter Norvig}},
   year = {2001},
-  url = {http://norvig.com/21-days.html},
   urldate = {2022-08-16},
+  howpublished = {http://norvig.com/21-days.html},
   file = {/home/charlotte/sync/Zotero/storage/VTNWVKYG/21-days.html}
 }
 
@@ -3404,7 +3248,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/2157136.2157267},
-  url = {https://doi.org/10.1145/2157136.2157267},
   urldate = {2021-10-07},
   abstract = {Jutge.org is an open access educational online programming judge where students can try to solve more than 800 problems using 22 programming languages. The verdict of their solutions is computed using exhaustive test sets run under time, memory and security restrictions. By contrast to many popular online judges, Jutge.org is designed for students and instructors: On one hand, the problem repository is mainly aimed to beginners, with a clear organization and gradding. On the other hand, the system is designed as a virtual learning environment where instructors can administer their own courses, manage their roster of students and tutors, add problems, attach documents, create lists of problems, assignments, contests and exams. This paper presents Jutge.org and offers some case studies of courses using it.},
   isbn = {978-1-4503-1098-7},
@@ -3423,7 +3266,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3287324.3287507},
-  url = {https://doi.org/10.1145/3287324.3287507},
   urldate = {2022-08-16},
   abstract = {With the continued growth of enrollment within computer science courses, it has become an increasing necessity to utilize autograding systems. These systems have historically graded assignments through either a jailed sandbox environment or within a virtual machine (VM). For a VM, each submission is given its own instantiation of a guest operating system and virtual hardware that runs atop the host system, preventing anything that runs within the VM communicating with any other VM or the host. However, using these VMs are costly in terms of system resources, making it less than ideal for running student submissions given reasonable, limited resources. Jailed sandboxes, on the other hand, run on the host itself, thus taking up minimal resources, and utilize a security model that restricts the process to specified directories on the system. However, due to running on the host machine, the approach suffers as new courses utilize autograding and bring their own set of potentially conflicting requirements for programming languages and system packages. Over the past several years, {\textbackslash}em containers have seen growing popularity in usage within the software engineering industry as well as within autograding systems. Containers provide similar benefits of isolation as a VM while maintaining similar resource cost to running within a jailed sandbox environment. We present the implementation of both a jailed sandbox and container-based autograder, compare the running time and memory usage of the two implementations, and discuss the overall resource usage.},
   isbn = {978-1-4503-5890-3},
@@ -3458,7 +3300,6 @@ New York, NY},
   pages = {1--1},
   issn = {2169-3536},
   doi = {10.1109/ACCESS.2024.3365368},
-  url = {https://ieeexplore.ieee.org/document/10433192},
   urldate = {2024-02-15},
   abstract = {Literature reviews on artificial intelligence (AI) have focused on the different applications of AI in higher education, the AI techniques used, and the benefits/risks of the use of AI. One of the greatest potentials of AI is to personalize higher education to the needs of students and offer timely feedback. This could benefit students with disabilities tremendously if their needs are also considered in the development of new AI educational technologies (EdTech). However, current reviews have failed to address the perspective of students with disabilities, which prompts ethical concerns. For instance, AI could treat people with disabilities as outliers in the data and end up discriminating against them. For that reason, this systematic literature review raises the following two questions: To what extent are ethical concerns considered in articles presenting AI applications assessing students (with disabilities) in higher education? What are the potential risks of using AI that assess students with disabilities in higher education? This scoping review highlights the lack of ethical reflection on AI technologies and an absence of discussion and inclusion of people with disabilities. Moreover, it identifies eight risks associated with the use of AI EdTech for students with disabilities. The review concludes with suggestions on how to mitigate these potential risks. Specifically, it advocates for increased attention to ethics within the field, the involvement of people with disabilities in research and development, as well as careful adoption of AI EdTech in higher education.},
   keywords = {Artificial intelligence,Artificial Intelligence,Assistive technologies,Bibliographies,Disabilities,Education,Educational technologies (EdTech),Educational technology,Ethics,Higher Education,Privacy,Protocols,Risk Assessment,Risk management},
@@ -3475,7 +3316,6 @@ New York, NY},
   pages = {3--12},
   issn = {1536-0768},
   doi = {10.1002/tl.37219956304},
-  url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/tl.37219956304},
   urldate = {2022-09-09},
   abstract = {Self-regulated learning is an important component of learning for college students. Students can learn how to become self-regulated learners, and faculty can foster self-regulated learning in their classrooms.},
   langid = {english},
@@ -3493,7 +3333,6 @@ New York, NY},
   pages = {76},
   issn = {1662-5196},
   doi = {10.3389/fninf.2017.00076},
-  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5778115/},
   urldate = {2022-06-14},
   pmcid = {PMC5778115},
   pmid = {29403370},
@@ -3529,7 +3368,6 @@ New York, NY},
   publisher = {Routledge},
   issn = {0260-2938},
   doi = {10.1080/02602930601127869},
-  url = {https://doi.org/10.1080/02602930601127869},
   urldate = {2024-05-07},
   abstract = {While effective feedback has frequently been identified as a key strategy in learning and teaching, little known research has focused on students' perceptions of feedback and the contribution feedback makes to students' learning and teaching. This reported qualitative study aims to enrich our understanding of these perceptions and importantly to provide insight into the meaning of `effective' when related to feedback. The study involved four focus groups of undergraduate students of varying levels and from a range of Schools completing degrees in the Faculty of Health Sciences, University of Sydney. Students' perceptions relating to a definition of feedback, how they use it and preferences for delivery were prompted by the facilitators. Thematic analysis resulted in three key dimensions: perceptions of feedback, impact of feedback and credibility of feedback. The analysis demonstrated that effectiveness of feedback extends beyond mode of delivery and timeliness to include the credibility of the lecturer giving the feedback. The role of effective feedback includes not only enhancing learning and teaching but also facilitating the transition between school and university.},
   file = {/home/charlotte/sync/Zotero/storage/ZK345SIL/poulos2008.pdf.pdf}
@@ -3546,7 +3384,6 @@ New York, NY},
   pages = {223--231},
   issn = {2168-9830},
   doi = {10.1002/j.2168-9830.2004.tb00809.x},
-  url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/j.2168-9830.2004.tb00809.x},
   urldate = {2021-08-26},
   abstract = {This study examines the evidence for the effectiveness of active learning. It defines the common forms of active learning most relevant for engineering faculty and critically examines the core element of each method. It is found that there is broad but uneven support for the core elements of active, collaborative, cooperative and problem-based learning.},
   langid = {english},
@@ -3565,7 +3402,6 @@ New York, NY},
   number = {3},
   pages = {19:1--19:35},
   doi = {10.1145/3394963},
-  url = {https://doi.org/10.1145/3394963},
   urldate = {2021-09-30},
   abstract = {This article provides a survey of methods and paradigms for teaching Computer Networks (CN). Since the theoretical concepts are rather abstract in this subject, and students often find them too technical and difficult to understand, many authors attempt to answer the question on how to improve students' motivation and interest for the complex teaching material of CN. In this work, we follow a rigorous paper collection methodology and extract a large number of previous studies that relate to the stated research questions. Also, we find that there is no review article in the current literature that would provide a clear systematization or a guided study on this topic. Hence, this work provides a literature overview by binding all the previously used methods for teaching CN in one place, and brings contribution by classifying the existing approaches into four basic classes: methods based on using visualization objects such as network simulators, multimedia applications, packet-tracing tools or visual analogies; methods based on using the virtualization techniques; methods precipitating active learning paradigm and methods based on the practical hands-on laboratory exercises. Moreover, the research in this article goes beyond the proposed classification. The classes of methods and tools are compared and contrasted based on the findings from the literature. Methods are evaluated by a detailed cross-comparison based on their advantages, disadvantages and challenges in the perspective of both teachers and students. The review is additionally strengthened by comparing the educational effectiveness of the classified methods. We examine, classify, and contrast the usual approaches used in teaching CN, provide useful insights on how appropriate they are in achieving specific educational goals and determine the future research directions.},
   keywords = {Computer networks,literature survey,network simulators,teaching methods},
@@ -3584,7 +3420,6 @@ New York, NY},
   publisher = {Routledge},
   issn = {1050-8406},
   doi = {10.1080/10508406.2013.836656},
-  url = {https://doi.org/10.1080/10508406.2013.836656},
   urldate = {2021-09-15},
   abstract = {As secondary students' interest in science is decreasing, schools are faced with the challenging task of providing adequate instruction to engage students---and more particularly the disadvantaged students---to learn science and improve their science inquiry skills. In this respect, the integration of Web-based collaborative inquiry can be seen as a possible answer. However, the differential effects of Web-based inquiry on disadvantaged students have barely been studied. To bridge this gap, this study deals with the implementation of a Web-based inquiry project in 19 secondary classes and focuses specifically on gender, achievement level, and academic track. Multilevel analysis was applied to uncover the effects on knowledge acquisition, inquiry skills, and interest in science. The study provides quantitative evidence not only that a Web-based collaborative inquiry project is an effective approach for science learning, but that this approach can also offer advantages for students who are not typically successful in science or who are not enrolled in a science track. This approach can contribute to narrowing the gap between boys and girls in science and can give low-achieving students and general-track students an opportunity to develop confidence and skills for learning science, bringing them to a performance level that is closer to that of high-achieving students.},
   file = {/home/charlotte/sync/Zotero/storage/5A6P3CLX/Raes et al. - 2014 - Web-based Collaborative Inquiry to Bridge Gaps in .pdf;/home/charlotte/sync/Zotero/storage/ECGB3N97/10508406.2013.html}
@@ -3613,7 +3448,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/65293.71198},
-  url = {https://dl.acm.org/doi/10.1145/65293.71198},
   urldate = {2024-02-07},
   abstract = {This paper discusses TRY, a software package for the UNIX1 operating system that tests student programs. The motivation for developing the system is established by describing problems associated with traditional grading methods and electronic program submission. The design and use of the TRY system is discussed, along with the advantages it provides to both the student and the instructor.},
   isbn = {978-0-89791-298-3},
@@ -3631,7 +3465,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3377814.3381712},
-  url = {https://dl.acm.org/doi/10.1145/3377814.3381712},
   urldate = {2024-01-10},
   abstract = {Introductory programming is challenging for many students, requiring them to engage with a deep approach to learning concepts in order to succeed. These challenges compound for online students who do not have direct face-to-face interactions with teaching staff. With the growing demand for online education, we need to examine approaches that assist in building supportive learning environments for these students. A growing body of work from other education disciplines indicates that audio feedback provides an opportunity for developing stronger relationships with students. Further studies recommend an integrated implementation of audio recording into the virtual learning environment. To evaluate audio feedback for use in programming education, we developed an integrated, cross-browser audio feedback feature into the open-source Doubtfire learning management system. Doubtfire is used to support and scale a task-oriented teaching and learning system built upon the principles of constructive alignment and has been shown to help students engage with programming concepts in campus-only units. Our findings from experimental and observational activities indicate that programming tutors can use a blended approach of audio and text feedback via the learning management system to better support student learning. The blended approach provides more nuanced feedback, conveying personality and feelings of connectedness to students, while retaining the benefits of specificity for code-specific issues.},
   isbn = {978-1-4503-7124-7},
@@ -3651,7 +3484,6 @@ New York, NY},
   pages = {37--64},
   issn = {1560-4306},
   doi = {10.1007/s40593-015-0070-z},
-  url = {https://doi.org/10.1007/s40593-015-0070-z},
   urldate = {2022-08-03},
   abstract = {To provide personalized help to students who are working on code-writing problems, we introduce a data-driven tutoring system, ITAP (Intelligent Teaching Assistant for Programming). ITAP uses state abstraction, path construction, and state reification to automatically generate personalized hints for students, even when given states that have not occurred in the data before. We provide a detailed description of the system's implementation and perform a technical evaluation on a small set of data to determine the effectiveness of the component algorithms and ITAP's potential for self-improvement. The results show that ITAP is capable of producing hints for almost any given state after being given only a single reference solution, and that it can improve its performance by collecting data over time.},
   langid = {english},
@@ -3672,7 +3504,6 @@ New York, NY},
   publisher = {Routledge},
   issn = {0899-3408},
   doi = {10.1076/csed.13.2.137.14200},
-  url = {https://doi.org/10.1076/csed.13.2.137.14200},
   urldate = {2022-02-24},
   abstract = {In this paper we review the literature relating to the psychological/educational study of programming. We identify general trends comparing novice and expert programmers, programming knowledge and strategies, program generation and comprehension, and object-oriented versus procedural programming. (We do not cover research relating specifically to other programming styles.) The main focus of the review is on novice programming and topics relating to novice teaching and learning. Various problems experienced by novices are identified, including issues relating to basic program design, to algorithmic complexity in certain language features, to the ``fragility'' of novice knowledge, and so on. We summarise this material and suggest some practical implications for teachers. We suggest that a key issue that emerges is the distinction between effective and ineffective novices. What characterises effective novices? Is it possible to identify the specific deficits of ineffective novices and help them to become effective learners of programming?},
   file = {/home/charlotte/sync/Zotero/storage/VIAWJTPE/Robins et al. - 2003 - Learning and Teaching Programming A Review and Di.pdf;/home/charlotte/sync/Zotero/storage/WJFWFIHW/csed.13.2.137.html}
@@ -3690,7 +3521,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/2556325.2567876},
-  url = {https://doi.org/10.1145/2556325.2567876},
   urldate = {2022-08-16},
   abstract = {Coding style is important to teach to beginning programmers, so that bad habits don't become permanent. This is often done manually at the University level because automated Python static analyzers cannot accurately grade based on a given rubric. However, even manual analysis of coding style encounters problems, as we have seen quite a bit of inconsistency among our graders. We introduce ACCE--Automated Coding Composition Evaluator--a module that automates grading for the composition of programs. ACCE, given certain constraints, assesses the composition of a program through static analysis, conversion from code to AST, and clustering (unsupervised learning), helping automate the subjective process of grading based on style and identifying common mistakes. Further, we create visual representations of the clusters to allow readers and students understand where a submission falls, and the overall trends. We have applied this tool to CS61A--a CS1 level course at UC, Berkeley experiencing rapid growth in student enrollment--in an attempt to help expedite the involved process as well as reduce human grader inconsistencies.},
   isbn = {978-1-4503-2669-8},
@@ -3710,7 +3540,6 @@ New York, NY},
   pages = {368--384},
   issn = {0360-1315},
   doi = {10.1016/j.compedu.2007.05.016},
-  url = {https://www.sciencedirect.com/science/article/pii/S0360131507000590},
   urldate = {2024-02-13},
   abstract = {Educational data mining is an emerging discipline, concerned with developing methods for exploring the unique types of data that come from the educational context. This work is a survey of the specific application of data mining in learning management systems and a case study tutorial with the Moodle system. Our objective is to introduce it both theoretically and practically to all users interested in this new research area, and in particular to online instructors and e-learning administrators. We describe the full process for mining e-learning data step by step as well as how to apply the main data mining techniques used, such as statistics, visualization, classification, clustering and association rule mining of Moodle data. We have used free data mining tools so that any user can immediately begin to apply data mining without having to purchase a commercial tool or program a specific personalized tool.},
   keywords = {Data mining,Distance education and telelearning,E-learning,Evaluation of CAL systems,Web mining},
@@ -3745,7 +3574,6 @@ New York, NY},
   pages = {e1355},
   issn = {1942-4795},
   doi = {10.1002/widm.1355},
-  url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/widm.1355},
   urldate = {2024-05-08},
   abstract = {This survey is an updated and improved version of the previous one published in 2013 in this journal with the title ``data mining in education''. It reviews in a comprehensible and very general way how Educational Data Mining and Learning Analytics have been applied over educational data. In the last decade, this research area has evolved enormously and a wide range of related terms are now used in the bibliography such as Academic Analytics, Institutional Analytics, Teaching Analytics, Data-Driven Education, Data-Driven Decision-Making in Education, Big Data in Education, and Educational Data Science. This paper provides the current state of the art by reviewing the main publications, the key milestones, the knowledge discovery cycle, the main educational environments, the specific tools, the free available datasets, the most used methods, the main objectives, and the future trends in this research area. This article is categorized under: Application Areas {$>$} Education and Learning},
   copyright = {{\copyright} 2020 Wiley Periodicals, Inc.},
@@ -3765,7 +3593,6 @@ New York, NY},
   pages = {142--166},
   issn = {0097-8418},
   doi = {10.1145/1473195.1473239},
-  url = {https://doi.org/10.1145/1473195.1473239},
   urldate = {2022-08-16},
   abstract = {Many individual instructors -- and, in some cases, entire universities -- are gravitating towards the use of comprehensive learning management systems (LMSs), such as Blackboard and Moodle, for managing courses and enhancing student learning. As useful as LMSs are, they are short on features that meet certain needs specific to computer science education. On the other hand, computer science educators have developed--and continue to develop-computer-based software tools that aid in management, teaching, and/or learning in computer science courses. In this report we provide an overview of current CS specific on-line learning resources and guidance on how one might best go about extending an LMS to include such tools and resources. We refer to an LMS that is extended specifically for computer science education as a Computing Augmented Learning Management System, or CALMS. We also discuss sound pedagogical practices and some practical and technical principles for building a CALMS. However, we do not go into details of creating a plug-in for some specific LMS. Further, the report does not favor one LMS over another as the foundation for a CALMS.},
   keywords = {CALMS,computer science education,computing augmented learning management system,learning management system,LMS},
@@ -3783,7 +3610,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/1044550.1041669},
-  url = {https://doi.org/10.1145/1044550.1041669},
   urldate = {2021-02-19},
   abstract = {The factors that contribute to success and failure in introductory programming courses continue to be a topic of lively debate, with recent conference panels and papers devoted to the subject (e.g. Rountree et al. 2004, Ventura et al., 2004, Gal-Ezer et al., 2003). Most work in this area has concentrated on the ability of single factors (e.g. gender, math background, etc.) to predict success, with the exception of Wilson et al. (2001), which used a general linear model to gauge the effect of combined factors. In Rountree et al. (2002) we presented the results of a survey of our introductory programming class that considered factors (such as student expectations of success, among other things) in isolation. In this paper, we reassess the data from that survey by using a decision tree classifier to identify combinations of factors that interact to predict success or failure more strongly than single, isolated factors.},
   isbn = {978-1-4503-7794-2},
@@ -3805,7 +3631,6 @@ New York, NY},
   year = {2020},
   journal = {{instname:Universidad de los Andes}},
   publisher = {Universidad de los Andes},
-  url = {https://repositorio.uniandes.edu.co/handle/1992/44754},
   urldate = {2022-07-06},
   abstract = {"Determinar si dos programas son similares no es una tarea simple. En este trabajo exploramos e implementamos un acercamiento hacia determinar qu{\'e} tan similares son dos programas de Python usando arboles sint{\'a}cticos abstractos similar al trabajo realizado por Avery et al. [1]. Luego se us{\'o} esta implementaci{\'o}n para analizar los programas previamente recopilados y clasificados por la herramienta Senecode con la intenci{\'o}n de poder dar retroalimentaci{\'o}n autom{\'a}tica significativa." -- Tomado del Formato de Documento de Grado.},
   copyright = {Al consultar y hacer uso de este recurso, est{\'a} aceptando las condiciones de uso establecidas por los autores.},
@@ -3826,7 +3651,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3287324.3287384},
-  url = {https://doi.org/10.1145/3287324.3287384},
   urldate = {2022-03-03},
   abstract = {Traditionally, students learn about software testing during intermediate or advanced computing courses. However, it is widely advocated that testing should be addressed beginning in introductory programming courses. In this context, testing practices can help students think more critically while working on programming assignments. At the same time, students can develop testing skills throughout the computing curriculum. Considering this scenario, we conducted a systematic mapping of the literature about software testing in introductory programming courses, resulting in 293 selected papers. We mapped the papers to categories with respect to their investigated topic (curriculum, teaching methods, programming assignments, programming process, tools, program/test quality, concept understanding, and students' perceptions and behaviors) and evaluation method (literature review, exploratory study, descriptive/persuasive study, survey, qualitative study, experimental and experience report). We also identified the benefits and drawbacks of this teaching approach, as pointed out in the selected papers. The goal is to provide an overview of research performed in the area, highlighting gaps that should be further investigated.},
   isbn = {978-1-4503-5890-3},
@@ -3846,7 +3670,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/872757.872770},
-  url = {https://doi.org/10.1145/872757.872770},
   urldate = {2022-08-16},
   abstract = {Digital content is for copying: quotation, revision, plagiarism, and file sharing all create copies. Document fingerprinting is concerned with accurately identifying copying, including small partial copies, within large sets of documents.We introduce the class of local document fingerprinting algorithms, which seems to capture an essential property of any finger-printing technique guaranteed to detect copies. We prove a novel lower bound on the performance of any local algorithm. We also develop winnowing, an efficient local fingerprinting algorithm, and show that winnowing's performance is within 33\% of the lower bound. Finally, we also give experimental results on Web data, and report experience with MOSS, a widely-used plagiarism detection service.},
   isbn = {978-1-58113-634-0},
@@ -3875,7 +3698,6 @@ New York, NY},
   publisher = {Routledge},
   issn = {0899-3408},
   doi = {10.1076/csed.13.4.269.17492},
-  url = {https://doi.org/10.1076/csed.13.4.269.17492},
   urldate = {2022-02-24},
   abstract = {Although many professionals in education believe that an `objects first' approach is the best method of introducing object-oriented technology, there is no common agreement on how to start such courses. Current study programs often begin by teaching a chosen object-oriented programing language, where students are confronted by a large amount of syntactical detail. Instead of focusing on the basics of object-oriented technology, namely objects and their collaborations, difficulties in handling the details of the programing language lead to a very scattered knowledge of object-oriented concepts. This is dangerous, as learners are left with a set of unconnected knowledge fragments. Approaches which embed different knowledge fragments in an overall knowledge view are known as ``cognitive apprenticeship'' approaches. The main idea of cognitive apprenticeship is continuous practice. We present a learning environment for introducing object-oriented technology in upper secondary schools based on cognitive apprenticeship. We use a visual programing language to away from the details and provide tool support to aid practice. We present the learning sequencewhich is used and show the impacts it makes on the course structure in our experiment in the chosen object-oriented programming language The Joint Task Force on Computing Curricula IEEE Computer Society.},
   file = {/home/charlotte/sync/Zotero/storage/SJNFC4UR/csed.13.4.269.html}
@@ -3892,7 +3714,6 @@ New York, NY},
   title = {{TESTed: programmeertaal-onafhankelijk testen van oplossingen voor programmeeroefeningen: Eenvoudig oefeningen opstellen met een DSL}},
   author = {Sels, Boris and Dawyndt, Peter and Mesuere, Bart and Strijbol, Niko and Van Petegem, Charlotte},
   year = {2021},
-  url = {http://lib.ugent.be/catalog/rug01:003008250},
   langid = {und},
   school = {Ghent University},
   file = {/home/charlotte/sync/Zotero/storage/34VD6QYR/Sels et al. - 2021 - TESTed programmeertaal-onafhankelijk testen van o.pdf}
@@ -3930,7 +3751,6 @@ New York, NY},
   pages = {201--219},
   publisher = {Informing Science Institute},
   issn = {1539-3585},
-  url = {https://www.learntechlib.org/p/111541/},
   urldate = {2021-04-30},
   abstract = {In search of better, more cost effective ways to deliver instruction and training, universities and corporations have expanded their use of e-learning. Although several studies suggest that online education and blended instruction (a ``blend'' of online and traditional approaches) can be as effective as traditional classroom models, few studies have focused on learner satisfaction with online instruction, particularly in the transition to online learning from traditional approaches. This study examines students' perceptions of integrating online components in two undergraduate business...},
   langid = {english},
@@ -3946,7 +3766,6 @@ New York, NY},
   pages = {40--67},
   publisher = {IGI Global},
   doi = {10.4018/978-1-6684-9039-6.ch003},
-  url = {https://www.igi-global.com/chapter/codeflex-20/www.igi-global.com/chapter/codeflex-20/333551},
   urldate = {2023-12-01},
   abstract = {This work presents the design and implementation of Codeflex, a web-based platform and repository of programming problems, that enables the learning and practice of competitive programming in multiple programming language paradigms. The Codeflex programming platform performs automatic evaluation of...},
   copyright = {Access limited to members},
@@ -3995,7 +3814,6 @@ New York, NY},
   number = {RFC 2445},
   institution = {Internet Engineering Task Force},
   doi = {10.17487/RFC2445},
-  url = {https://datatracker.ietf.org/doc/rfc2445},
   urldate = {2022-08-16},
   abstract = {This memo has been defined to provide the definition of a common format for openly exchanging calendaring and scheduling information across the Internet. [STANDARDS-TRACK]},
   file = {/home/charlotte/sync/Zotero/storage/FUXY8PH9/Stenerson and Dawson - 1998 - Internet Calendaring and Scheduling Core Object Sp.pdf}
@@ -4012,7 +3830,6 @@ New York, NY},
   pages = {491--535},
   issn = {1573-7616},
   doi = {10.1007/s10664-018-9644-3},
-  url = {https://doi.org/10.1007/s10664-018-9644-3},
   urldate = {2022-07-06},
   abstract = {Change distilling algorithms compute a sequence of fine-grained changes that, when executed in order, transform a given source AST into a given target AST. The resulting change sequences are used in the field of mining software repositories to study source code evolution. Unfortunately, detecting and specifying source code evolutions in such a change sequence is cumbersome. We therefore introduce a tool-supported approach that identifies minimal executable subsequences in a sequence of distilled changes that implement a particular evolution pattern, specified in terms of intermediate states of the AST that undergoes each change. This enables users to describe the effect of multiple changes, irrespective of their execution order, while ensuring that different change sequences that implement the same code evolution are recalled. Correspondingly, our evaluation is two-fold. We show that our approach is able to recall different implementation variants of the same source code evolution in histories of different software projects. We also evaluate the expressiveness and ease-of-use of our approach in a user study.},
   langid = {english},
@@ -4040,7 +3857,6 @@ New York, NY},
   author = {Streibel, Michael J.},
   year = {1985},
   month = apr,
-  url = {https://eric.ed.gov/?id=ED263881},
   urldate = {2024-02-07},
   abstract = {Three major approaches to the use of computers in education are examined, serious limitations of each are presented, and questions are raised as to the efficacy of technologolizing education. The drill and practice approach is shown to embody a deterministic, behavioral technology that turns learning into a systematically-designed and quality-controlled form of work. Computerized tutorial programs are shown to extend the behavioral and technological approach to learning even further by shaping interactions via an external agent's intentions in order to maximize the learner's performance gains. Most seriously, computerized tutorial interactions pre-empt the personal intellectual agency and ultimately inner-directed learning. Finally, the use of computers is shown to limit the  learner's mental landscape to objective, quantitative, and procedural tools. A list of references completes the document. (JB)},
   langid = {english},
@@ -4077,7 +3893,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3587103.3594189},
-  url = {https://dl.acm.org/doi/10.1145/3587103.3594189},
   urldate = {2023-11-16},
   abstract = {Debugging is an important aspect of programming. Most programming languages have some features and tools to facilitate debugging. As the debugging process is also frustrating, it requires good scaffolding, in which a debugger can be a useful tool [3]. Scratch is a visual block-based programming language that is commonly used to teach programming to children, aged 10--14 [4]. It comes with its own integrated development environment (IDE), where children can edit and run their code. This IDE misses some of the tools that are available in traditional IDEs, such as a debugger. In response to this challenge, we developed Blink. Blink is a debugger for Scratch with the aim of being usable to the young audience that typically uses Scratch. We present the currently implemented features of the debugger, and the challenges we faced while implementing those, both from a user-experience standpoint and a technical standpoint.},
   isbn = {9798400701399},
@@ -4096,7 +3911,6 @@ New York, NY},
   publisher = {Elsevier},
   issn = {2352-7110},
   doi = {10.1016/j.softx.2023.101617},
-  url = {https://www.softxjournal.com/article/S2352-7110(23)00313-8/fulltext},
   urldate = {2024-03-29},
   langid = {english},
   keywords = {Block-based programming,Debugging,Programming,Scratch},
@@ -4113,7 +3927,6 @@ New York, NY},
   pages = {101404},
   issn = {2352-7110},
   doi = {10.1016/j.softx.2023.101404},
-  url = {https://www.sciencedirect.com/science/article/pii/S2352711023001000},
   urldate = {2023-05-16},
   abstract = {In educational contexts, automated assessment tools (AAT) are commonly used to provide formative feedback on programming exercises. However, designing exercises for AAT remains a laborious task or imposes limitations on the exercises. Most AAT use either output comparison, where the generated output is compared against an expected output, or unit testing, where the tool has access to the code of the submission under test. While output comparison has the advantage of being programming language independent, the testing capabilities are limited to the output. Conversely, unit testing can generate more granular feedback, but is tightly coupled with the programming language of the submission. In this paper, we introduce TESTed, which enables the best of both worlds: combining the granular feedback of unit testing with the programming language independence of output comparison. Educators can save time by designing exercises that can be used across programming languages. Finally, we report on using TESTed in educational practice.},
   langid = {english},
@@ -4125,7 +3938,6 @@ New York, NY},
   title = {{TESTed: one judge to rule them all}},
   author = {Strijbol, Niko and Dawyndt, Peter and Mesuere, Bart},
   year = {2020},
-  url = {http://lib.ugent.be/catalog/rug01:002836313},
   langid = {dutch},
   school = {Ghent University},
   file = {/home/charlotte/sync/Zotero/storage/4QXZ2HIJ/Strijbol et al. - 2020 - TESTed one judge to rule them all.pdf}
@@ -4143,7 +3955,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/2950290.2950321},
-  url = {https://dl.acm.org/doi/10.1145/2950290.2950321},
   urldate = {2023-11-23},
   abstract = {Detecting ``similar code'' is useful for many software engineering tasks. Current tools can help detect code with statically similar syntactic and--or semantic features (code clones) and with dynamically similar functional input/output (simions). Unfortunately, some code fragments that behave similarly at the finer granularity of their execution traces may be ignored. In this paper, we propose the term ``code relatives'' to refer to code with similar execution behavior. We define code relatives and then present DyCLINK, our approach to detecting code relatives within and across codebases. DyCLINK records instruction-level traces from sample executions, organizes the traces into instruction-level dynamic dependence graphs, and employs our specialized subgraph matching algorithm to efficiently compare the executions of candidate code relatives. In our experiments, DyCLINK analyzed 422+ million prospective subgraph matches in only 43 minutes. We compared DyCLINK to one static code clone detector from the community and to our implementation of a dynamic simion detector. The results show that DyCLINK effectively detects code relatives with a reasonable analysis time.},
   isbn = {978-1-4503-4218-6},
@@ -4164,7 +3975,6 @@ New York, NY},
   publisher = {American Chemical Society},
   issn = {0095-2338},
   doi = {10.1021/ci034160g},
-  url = {https://doi.org/10.1021/ci034160g},
   urldate = {2021-02-19},
   abstract = {A new classification and regression tool, Random Forest, is introduced and investigated for predicting a compound's quantitative or categorical biological activity based on a quantitative description of the compound's molecular structure. Random Forest is an ensemble of unpruned classification or regression trees created by using bootstrap samples of the training data and random feature selection in tree induction. Prediction is made by aggregating (majority vote or averaging) the predictions of the ensemble. We built predictive models for six cheminformatics data sets. Our analysis demonstrates that Random Forest is a powerful tool capable of delivering performance that is among the most accurate methods to date. We also present three additional features of Random Forest:\, built-in performance assessment, a measure of relative importance of descriptors, and a measure of compound similarity that is weighted by the relative importance of descriptors. It is the combination of relatively high prediction accuracy and its collection of desired features that makes Random Forest uniquely suited for modeling in cheminformatics.},
   file = {/home/charlotte/sync/Zotero/storage/NXAQCTYB/Svetnik et al. - 2003 - Random Forest  A Classification and Regression To.pdf;/home/charlotte/sync/Zotero/storage/KLIJU6B7/ci034160g.html}
@@ -4181,7 +3991,6 @@ New York, NY},
   pages = {368--373},
   issn = {0010-4620},
   doi = {10.1093/comjnl/10.4.368},
-  url = {https://doi.org/10.1093/comjnl/10.4.368},
   urldate = {2024-02-07},
   abstract = {A procedure to supply test data for a number of undergraduate programming exercises in the PL/1 language and check the validity of the programs is described. The procedure provides diagnostic information to the student and performs all necessary output, as well as maintaining complete records of student performance on magnetic disc storage. The procedure differs from many previous grading routines in being called as a precompiled library subroutine, and is the first known grading procedure for PL/1. The initial set of class problems and specimen output listings are appended.},
   file = {/home/charlotte/sync/Zotero/storage/KUPEZD7J/temperly1968.pdf.pdf;/home/charlotte/sync/Zotero/storage/YN8PSZBD/Temperly and Smith - 1968 - A Grading Procedure for PL1 Student Exercises.pdf;/home/charlotte/sync/Zotero/storage/DDYSUANU/463937.html}
@@ -4208,7 +4017,6 @@ New York, NY},
   journal = {{Conference proceedings : Frontiers in Education Conference}},
   publisher = {IEEE},
   doi = {10.1109/FIE44824.2020.9274123},
-  url = {https://jyx.jyu.fi/handle/123456789/73098},
   urldate = {2022-09-15},
   abstract = {This work-in-progress research investigates teacher-student communication via Learning Management Systems (LMS) in highly populated courses. An LMS called TIM (The Interactive Material) includes a specific commenting technology that attempts to make teacher-student dialog effortless. The research goal is to explore students' willingness to use the technology and identify patterns of usage. To these ends, a survey with both Likert and open-ended questions was issued to CS1 and CS2 students. A favorable student evaluation was observed while several critical viewpoints that inform technology development were revealed. We noticed that besides appreciating the possibility of making comments, many students found benefit from peripheral participation without being active in commenting themselves. Informal communication appared to be preferred, and the commenting technology was considered second to best channel in this regard, following face-to-face interaction. The results are discussed in the light of Transactional Distance Theory and related literature to inform basic research.},
   copyright = {In Copyright},
@@ -4240,7 +4048,6 @@ New York, NY},
   publisher = {Routledge},
   issn = {1356-2517},
   doi = {10.1080/13562517.2011.611870},
-  url = {https://doi.org/10.1080/13562517.2011.611870},
   urldate = {2024-02-05},
   abstract = {The lived experience of academic teachers as they engage in feedback has received relatively little attention compared to student perspectives on feedback. The present study used an ethnographically informed methodology to investigate the everyday practices around undergraduates' writing of fourteen UK HE teachers, in a range of disciplines and institutions, focusing on teachers' perspectives. This paper presents analysis of interviews conducted as part of the study, in which feedback-giving emerged as significant, understood by participants in several potentially dissonant ways: as institutional requirement, as work and as dialogue. Findings suggest participants sometimes managed to reconcile these conflicts and carve out small spaces for dialogue with students, and also indicate that attempts to create greater opportunities for such work, by offering greater support and recognition at institutional level, must take account of teachers' need for a sense of personal investment in student writing in their disciplinary contexts.},
   keywords = {academic literacies,dialogue,feedback,marking,student writing},
@@ -4257,7 +4064,6 @@ New York, NY},
   pages = {58--78},
   issn = {1465-3435},
   doi = {10.1111/ejed.12019},
-  url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/ejed.12019},
   urldate = {2022-10-03},
   abstract = {The extremely rapid expansion of open educational resource (OER) initiatives and the millions of learners they attract can be understood as an indicator of an emerging revolution in education and learning. This article describes recent developments in this area and develops conceptual foundations for studies and policies on OER. We describe four different types of OER, locate these in a field of learning theories, and discuss how the wide adoption of OER may constrain and accelerate the transformation of learning and education in the knowledge society.},
   langid = {english},
@@ -4276,7 +4082,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3287324.3287463},
-  url = {https://doi.org/10.1145/3287324.3287463},
   urldate = {2022-02-25},
   abstract = {The introductory programming lab, with small cycles of teaching, coding, testing, and critique from instructors, is an extraordinarily productive learning experience for novice programmers. We wish to extend the availability of such critique through automation, capturing the essence of interaction between student and instructor as closely as possible. Integrated Development Environments and Automated Grading Systems provide constant feedback through static analysis and unit testing. But we also wish to tailor automated feedback to acknowledge commonly recurring issues with novice programmers, in keeping with the practice of a human instructor. We argue that the kinds of mistakes that novice programmers make, and the way they are reported to the novices, deserve special care. In this paper we provide examples of early programming antipatterns that have arisen from our teaching experience, and describe different ways of identifying and dealing with them automatically through our tool WebTA. Novice students may produce code that is close to a correct solution but contains syntactic errors; WebTA attempts to salvage the promising portions of the student's submission and suggest repairs that are more meaningful than typical compiler error messages. Alternatively, a student misunderstanding may result in well-formed code that passes unit tests yet contains clear design flaws; through additional analysis, WebTA can identify and flag them. Finally, certain types of antipattern can be anticipated and flagged by the instructor, based on the context of the course and the programming exercise; WebTA allows for customizable critique triggers and messages.},
   isbn = {978-1-4503-5890-3},
@@ -4291,7 +4096,6 @@ New York, NY},
   year = {2002},
   pages = {2577--2578},
   publisher = {Association for the Advancement of Computing in Education (AACE)},
-  url = {https://www.learntechlib.org/primary/p/9603/},
   urldate = {2024-01-31},
   abstract = {The starting point of this contribution is the concept of Powerful Learning. It contains 6 characteristics which we transformed into a structured model. The centre of the model consists of student-centred education and it's accompanying coaching component. Surrounding the centre, there are three satellites: Content, problem-based learning and collaborative learning which are linked and in constant interaction with each other and the centre part. The improved concept is then illustrated with cases from different teacher education programmes at the university of Antwerp. In these cases we...},
   isbn = {978-1-880094-46-4},
@@ -4302,7 +4106,6 @@ New York, NY},
   title = {{Computationele benaderingen voor deductie van de computationele complexiteit van computerprogramma's}},
   author = {Van Petegem, Charlotte and Dawyndt, Peter},
   year = {2018},
-  url = {http://lib.ugent.be/catalog/rug01:002479652},
   langid = {dutch},
   school = {Ghent University}
 }
@@ -4318,7 +4121,6 @@ New York, NY},
   pages = {101578},
   issn = {2352-7110},
   doi = {10.1016/j.softx.2023.101578},
-  url = {https://www.sciencedirect.com/science/article/pii/S2352711023002741},
   urldate = {2023-11-16},
   abstract = {Dodona () is an intelligent tutoring system for computer programming. It provides real-time data and feedback to help students learn better and teachers teach better. Dodona is free to use and has more than 61 thousand registered users across many educational and research institutes, including 20 thousand new users in the last year. The source code of Dodona is available on GitHub under the permissive MIT open-source license. This paper presents Dodona and its design and look-and-feel. We highlight some of the features built into Dodona that make it possible to shorten feedback loops, and discuss an example of how these features can be used in practice. We also highlight some of the research opportunities that Dodona has opened up and present some future developments.},
   keywords = {Computer-assisted instruction,Education,Interactive learning environments},
@@ -4337,7 +4139,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3587103.3594165},
-  url = {https://dl.acm.org/doi/10.1145/3587103.3594165},
   urldate = {2023-11-16},
   abstract = {Dodona (dodona.ugent.be) is an intelligent tutoring system for learning computer programming, statistics and data science. It bridges the gap between assessment and learning by providing real-time data and feedback to help students learn better, teachers teach better and educational technology become more effective. We show how Dodona can be used as a virtual co-teacher to stimulate active learning and support challenge-based education in open and collaborative learning environments. We also highlight some of the opportunities and challenges we have faced in practice. Dodona is free to use and has more than 50 thousand registered users across many educational and research institutions, including 15 thousand new users in the last year. Dodona's source code is available on GitHub under the permissive MIT open-source license.},
   isbn = {9798400701399},
@@ -4355,7 +4156,6 @@ New York, NY},
   publisher = {SAGE Publications Inc},
   issn = {0735-6331},
   doi = {10.1177/07356331221085595},
-  url = {https://doi.org/10.1177/07356331221085595},
   urldate = {2022-08-16},
   abstract = {We present a privacy-friendly early-detection framework to identify students at risk of failing in introductory programming courses at university. The framework was validated for two different courses with annual editions taken by higher education students (N = 2\,080) and was found to be highly accurate and robust against variation in course structures, teaching and learning styles, programming exercises and classification algorithms. By using interpretable machine learning techniques, the framework also provides insight into what aspects of practising programming skills promote or inhibit learning or have no or minor effect on the learning process. Findings showed that the framework was capable of predicting students' future success already early on in the semester.},
   langid = {english},
@@ -4373,7 +4173,6 @@ New York, NY},
   number = {1},
   publisher = {University of Southern Queensland},
   issn = {1324-0781},
-  url = {https://eric.ed.gov/?id=EJ850351},
   urldate = {2024-01-31},
   abstract = {For centuries traditional university education has primarily focused on building foundational skills in particular disciplines via the transfer of knowledge from instructor to student. Today however, simply being able to reproduce knowledge is no longer adequate; students must also be able to apply their knowledge to changing, real world contexts. By sharing lessons learned and drawing parallels across cultural boundaries, the University of Antwerp, Belgium and the University of Arkansas, USA provide additional insight into how to effectively teach students these skills. Topics that are addressed include: the concepts of powerful learning, interactivity, adequate dialogue, and the post-development evaluation and effective use of e learning environments. (Contains 2 figures and 4 tables.)},
   langid = {english},
@@ -4401,7 +4200,6 @@ New York, NY},
   volume = {3426},
   pages = {406--420},
   issn = {1613-0073},
-  url = {https://ceur-ws.org/Vol-3426/paper32.pdf},
   urldate = {2023-10-04},
   file = {/home/charlotte/sync/Zotero/storage/LPSH3EJT/Vasyliuk and Lytvyn - 2023 - Design and Implementation of a Ukrainian-Language .pdf}
 }
@@ -4440,7 +4238,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/2462476.2462501},
-  url = {https://dl.acm.org/doi/10.1145/2462476.2462501},
   urldate = {2023-08-21},
   abstract = {As programming is the basis of many CS courses, meaningful activities in supporting students on their journey towards being better programmers is a matter of utmost importance. Programming is not only about learning simple syntax constructs and their applications, but about honing practical problem-solving skills in meaningful contexts. In this article, we describe our current work on an automated assessment system called Test My Code (TMC), which is one of the feedback and support mechanisms that we use in our programming courses. TMC is an assessment service that (1) enables building of scaffolding into programming exercises; (2) retrieves and updates tasks into the students' programming environment as students work on them, and (3) causes no additional overhead to students' programming process. Instructors benefit from TMC as it can be used to perform code reviews, and collect and send feedback even on fully on-line courses.},
   isbn = {978-1-4503-2078-8},
@@ -4468,7 +4265,6 @@ New York, NY},
   pages = {159--185},
   issn = {1560-4306},
   doi = {10.1007/s40593-020-00230-2},
-  url = {https://doi.org/10.1007/s40593-020-00230-2},
   urldate = {2024-01-10},
   abstract = {Massive open online courses (MOOCs) provide hundreds of students with teaching materials, assessment tools, and collaborative instruments. The assessment activity, in particular, is demanding in terms of both time and effort; thus, the use of artificial intelligence can be useful to address and reduce the time and effort required. This paper reports on a system and related experiments finalised to improve both the performance and quality of formative and summative assessments in specific data science courses. The system is developed to automatically grade assignments composed of R commands commented with short sentences written in natural language. In our opinion, the use of the system can (i) shorten the correction times and reduce the possibility of errors and (ii) support the students while solving the exercises assigned during the course through automated feedback. To investigate these aims, an ad-hoc experiment was conducted in three courses containing the specific topic of statistical analysis of health data. Our evaluation demonstrated that automated grading has an acceptable correlation with human grading. Furthermore, the students who used the tool did not report usability issues, and those that used it for more than half of the exercises obtained (on average) higher grades in the exam. Finally, the use of the system reduced the correction time and assisted the professor in identifying correction errors.},
   langid = {english},
@@ -4487,7 +4283,6 @@ New York, NY},
   pages = {65--84},
   publisher = {Society for Information Technology \& Teacher Education},
   issn = {1059-7069},
-  url = {https://www.learntechlib.org/primary/p/18892/},
   urldate = {2021-10-01},
   abstract = {The purpose of this qualitative case study was to examine preservice teachers' experiences and the meaning they gave to their experiences in a "Technology Applications in Education" online course. The theoretical framework was based on the "Rich Environments for Active Learning" proposed by Grabinger and Dunlap (2000). The attributes of rich learning environments for active learning are student responsibility and initiative, generative learning activities, authentic learning contexts, authentic assessment strategies, and cooperative support. The study findings imply that the online learning...},
   langid = {english}
@@ -4513,7 +4308,6 @@ New York, NY},
   pages = {3:1--3:34},
   issn = {0360-0300},
   doi = {10.1145/3143560},
-  url = {https://doi.org/10.1145/3143560},
   urldate = {2021-08-24},
   abstract = {Online judges are systems designed for the reliable evaluation of algorithm source code submitted by users, which is next compiled and tested in a homogeneous environment. Online judges are becoming popular in various applications. Thus, we would like to review the state of the art for these systems. We classify them according to their principal objectives into systems supporting organization of competitive programming contests, enhancing education and recruitment processes, facilitating the solving of data mining challenges, online compilers and development platforms integrated as components of other custom systems. Moreover, we introduce a formal definition of an online judge system and summarize the common evaluation methodology supported by such systems. Finally, we briefly discuss an Optil.io platform as an example of an online judge system, which has been proposed for the solving of complex optimization problems. We also analyze the competition results conducted using this platform. The competition proved that online judge systems, strengthened by crowdsourcing concepts, can be successfully applied to accurately and efficiently solve complex industrial- and science-driven challenges.},
   keywords = {challenge,contest,crowdsourcing,evaluation as a service,Online judge},
@@ -4531,7 +4325,6 @@ New York, NY},
   publisher = {EDP Sciences},
   issn = {2261-2424},
   doi = {10.1051/shsconf/20207701004},
-  url = {https://www.shs-conferences.org/articles/shsconf/abs/2020/05/shsconf_etltc2020_01004/shsconf_etltc2020_01004.html},
   urldate = {2023-10-02},
   abstract = {With the rapid development of information technology, programming has become a vital skill. An online judge system can be used as a programming education platform, where the daily activities of users and judges are used to generate useful learning objects (e.g., tasks, solution codes, evaluations). Intelligent software agents can utilize such objects to create an ecosystem. To implement such an ecosystem, a generic architecture that covers the whole lifecycle of data on the platform and the functionalities of an e-learning system should take into account the particularities of the online judge system. In this paper, an architecture that implements such an ecosystem based on an online judge system is proposed. The potential benefits and research challenges are discussed.},
   copyright = {{\copyright} The Authors, published by EDP Sciences, 2020},
@@ -4550,7 +4343,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/2591708.2591749},
-  url = {https://doi.org/10.1145/2591708.2591749},
   urldate = {2021-02-19},
   abstract = {Whilst working on an upcoming meta-analysis that synthesized fifty years of research on predictors of programming performance, we made an interesting discovery. Despite several studies citing a motivation for research as the high failure rates of introductory programming courses, to date, the majority of available evidence on this phenomenon is at best anecdotal in nature, and only a single study by Bennedsen and Caspersen has attempted to determine a worldwide pass rate of introductory programming courses. In this paper, we answer the call for further substantial evidence on the CS1 failure rate phenomenon, by performing a systematic review of introductory programming literature, and a statistical analysis on pass rate data extracted from relevant articles. Pass rates describing the outcomes of 161 CS1 courses that ran in 15 different countries, across 51 institutions were extracted and analysed. An almost identical mean worldwide pass rate of 67.7\% was found. Moderator analysis revealed significant, but perhaps not substantial differences in pass rates based upon: grade level, country, and class size. However, pass rates were found not to have significantly differed over time, or based upon the programming language taught in the course. This paper serves as a motivation for researchers of introductory programming education, and provides much needed quantitative evidence on the potential difficulties and failure rates of this course.},
   isbn = {978-1-4503-2833-3},
@@ -4569,7 +4361,6 @@ New York, NY},
   pages = {138--143},
   issn = {0097-8418},
   doi = {10.1145/953055.5701},
-  url = {https://dl.acm.org/doi/10.1145/953055.5701},
   urldate = {2024-01-22},
   abstract = {This study investigated the relationship between the student's grade in a beginning computer science course and their sex, age, high school and college academic performance, number of mathematics courses, and work experience. Standard measures of cognitive development, cognitive style, and personality factors were also given to 58 students in three sections of the beginning Pascal programming class. Significant relationships were found between the letter grade and the students' college grades, the number of hours worked and the number of high school mathematics classes. Both the Group Embedded Figures Test (GEFT) and the measure of Piagetian intellectual development stages were also significantly correlated with grade in the course. There was no relationship between grade and the personality type, as measured by the Myers-Briggs Type Indicator (MBTI); however, an interesting and distinctive personality profile was evident.},
   file = {/home/charlotte/sync/Zotero/storage/4FDEYD73/werth1986.pdf.pdf;/home/charlotte/sync/Zotero/storage/VYZFFEPK/Werth - 1986 - Predicting student performance in a beginning comp.pdf}
@@ -4581,7 +4372,6 @@ New York, NY},
   author = {Wickham, Hadley and Chang, Winston and Henry, Lionel and Pedersen, Thomas Lin and Takahashi, Kohske and Wilke, Claus and Woo, Kara and Yutani, Hiroaki and Dunnington, Dewey and Posit and PBC},
   year = {2023},
   month = oct,
-  url = {https://cran.r-project.org/web/packages/ggplot2/index.html},
   urldate = {2023-12-08},
   abstract = {A system for 'declaratively' creating graphics, based on "The Grammar of Graphics". You provide the data, tell 'ggplot2' how to map variables to aesthetics, what graphical primitives to use, and it takes care of the details.},
   copyright = {MIT + file LICENSE},
@@ -4606,7 +4396,6 @@ New York, NY},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/2839509.2844616},
-  url = {https://doi.org/10.1145/2839509.2844616},
   urldate = {2022-08-16},
   abstract = {Enrollments in introductory computer science courses are growing rapidly, thereby taxing scarce teaching resources and motivating the increased use of automated tools for program grading. Such tools commonly rely on regression testing methods from industry. However, the goals of automated grading differ from those of testing for software production. In academia, a primary motivation for testing is to provide timely and accurate feedback to students so that they can understand and fix defects in their programs. Testing strategies for program grading are therefore distinct from those of traditional software testing. This paper enumerates and describes a number of testing strategies that improve the quality of feedback for different types of programming assignments.},
   isbn = {978-1-4503-3685-7},
@@ -4625,7 +4414,6 @@ New York, NY},
   publisher = {Springer},
   address = {New York, NY},
   doi = {10.1007/978-1-4614-3185-5_63},
-  url = {https://doi.org/10.1007/978-1-4614-3185-5_63},
   urldate = {2022-10-03},
   abstract = {This chapter begins by reviewing the many definitions of the term open educational resources and concludes by discussing challenges and opportunities for the approach. Open educational resources (OER) are educational materials either licensed under an open copyright license or in the public domain. Neither the term ``open educational resources'' nor the term ``open'' itself has an agreed upon definition in the literature. Research regarding open educational resources focuses on methods of producing OER, methods of sharing OER, and the benefits of OER. Significant issues relating to OER remain unresolved, including business model and discovery problems.},
   isbn = {978-1-4614-3185-5},
@@ -4645,7 +4433,6 @@ New York, NY},
   pages = {3--14},
   issn = {0191-491X},
   doi = {10.1016/j.stueduc.2011.03.001},
-  url = {https://www.sciencedirect.com/science/article/pii/S0191491X11000149},
   urldate = {2021-08-10},
   abstract = {The idea that assessment is intrinsic to effective instruction is traced from early experiments in the individualization of learning through the work of Benjamin Bloom to reviews of the impact of feedback on learners in classrooms. While many of these reviews detailed the adverse impact of assessment on learning, they also indicated that under certain conditions assessment had considerable potential to enhance learning. It is shown that understanding the impact that assessment has on learning requires a broader focus than the feedback intervention itself, particularly the learner's responses to the feedback, and the learning milieu in which the feedback operates. Different definitions of the terms ``formative assessment'' and ``assessment for learning'' are discussed, and subsumed within a broad definition that focuses on the extent to which instructional decisions are supported by evidence. The paper concludes by exploring some of the consequences of this definition for classroom practice.},
   langid = {english},
@@ -4665,7 +4452,6 @@ New York, NY},
   publisher = {Nature Publishing Group},
   issn = {2052-4463},
   doi = {10.1038/sdata.2016.18},
-  url = {https://www.nature.com/articles/sdata201618},
   urldate = {2021-08-24},
   abstract = {There is an urgent need to improve the infrastructure supporting the reuse of scholarly data. A diverse set of stakeholders---representing academia, industry, funding agencies, and scholarly publishers---have come together to design and jointly endorse a concise and measureable set of principles that we refer to as the FAIR Data Principles. The intent is that these may act as a guideline for those wishing to enhance the reusability of their data holdings. Distinct from peer initiatives that focus on the human scholar, the FAIR Principles put specific emphasis on enhancing the ability of machines to automatically find and use the data, in addition to supporting its reuse by individuals. This Comment is the first formal publication of the FAIR Principles, and includes the rationale behind them, and some exemplar implementations in the community.},
   copyright = {2016 The Author(s)},
@@ -4690,7 +4476,6 @@ Subject\_term\_id: publication-characteristics;research-data},
   publisher = {Routledge},
   issn = {0899-3408},
   doi = {10.1076/csed.12.3.197.8618},
-  url = {https://doi.org/10.1076/csed.12.3.197.8618},
   urldate = {2022-08-16},
   abstract = {A formal pair programming experiment was run at North Carolina to empirically assess the educational efficacy of the technique in a CS1 course. Results indicate that students who practice pair programming perform better on programming projects and are more likely to succeed by completing the class with a C or better. Student pairs are more self-sufficient which reduces their reliance on the teaching staff. Qualitatively, paired students demonstrate higher order thinking skills than students who work alone. These results are supportive of pair programming as a collaborative learning technique.}
 }
@@ -4719,7 +4504,6 @@ Subject\_term\_id: publication-characteristics;research-data},
   publisher = {Routledge},
   issn = {0307-5079},
   doi = {10.1080/03075079.2015.1130032},
-  url = {https://doi.org/10.1080/03075079.2015.1130032},
   urldate = {2022-03-04},
   abstract = {For feedback to be effective, it must be used by the receiver. Prior research has outlined numerous reasons why students' use of feedback is sometimes limited, but there has been little systematic exploration of these barriers. In 11 activity-oriented focus groups, 31 undergraduate Psychology students discussed how they use assessment feedback. The data revealed many barriers that inhibit use of feedback, ranging from students' difficulties with decoding terminology, to their unwillingness to expend effort. Thematic analysis identified four underlying psychological processes: awareness, cognisance, agency, and volition. We argue that these processes should be considered when designing interventions to encourage students' engagement with feedback. Whereas the barriers identified could all in principle be removed, we propose that doing so would typically require -- or would at least benefit from -- a sharing of responsibility between teacher and student. The data highlight the importance of training students to be proactive receivers of feedback.},
   keywords = {communication,feedback,focus groups,interventions,proactivity,student engagement},
@@ -4737,7 +4521,6 @@ Subject\_term\_id: publication-characteristics;research-data},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/611892.611952},
-  url = {https://doi.org/10.1145/611892.611952},
   urldate = {2022-08-16},
   abstract = {For five academic years we have engaged in an on-going study of the effectiveness of online assessment of student programming abilities for introductory programming courses in Computer Science. Our results show that online evaluation can be implemented securely, efficiently, and can result in increased student motivation and programming efficacy; however, unless online components are integrated throughout the course evaluations, student competence will be underestimated. Our data reveals disadvantages of online evaluations, but also shows that both students and faculty benefit when online evaluations are implemented appropriately.},
   isbn = {978-1-58113-648-7},
@@ -4757,7 +4540,6 @@ Subject\_term\_id: publication-characteristics;research-data},
   publisher = {Routledge},
   issn = {1356-2517},
   doi = {10.1080/13562510220144833},
-  url = {https://doi.org/10.1080/13562510220144833},
   urldate = {2022-08-16},
   abstract = {This paper calls for a debate on effects that the current education system is having on our learners. Many students entering Higher Education struggle to rise to its rigorous academic demands. The need for support services is on the increase with greater focus on key skills, study skills and self-management. Students undertaking higher study can face financial hardship and emotional turmoil in striving to achieve, but the problems do not start here. Much of the trepidation felt by students comes as a result of earlier educational experiences, and is merely exacerbated as their learning experience progresses. It is time to re-assess the whole educational process and to question whether the system exists to encourage learning or to measure failure.}
 }
@@ -4775,7 +4557,6 @@ Subject\_term\_id: publication-characteristics;research-data},
   publisher = {SAGE Publications Inc},
   issn = {0735-6331},
   doi = {10.1177/0735633118757015},
-  url = {https://doi.org/10.1177/0735633118757015},
   urldate = {2021-09-16},
   abstract = {Massive open online courses (MOOCs) show great potential to transform traditional education through the Internet. However, the high attrition rates in MOOCs have often been cited as a scale-efficacy tradeoff. Traditional educational approaches are usually unable to identify such large-scale number of at-risk students in danger of dropping out in time to support effective intervention design. While building dropout prediction models using learning analytics are promising in informing intervention design for these at-risk students, results of the current prediction model construction methods do not enable personalized intervention for these students. In this study, we take an initial step to optimize the dropout prediction model performance toward intervention personalization for at-risk students in MOOCs. Specifically, based on a temporal prediction mechanism, this study proposes to use the deep learning algorithm to construct the dropout prediction model and further produce the predicted individual student dropout probability. By taking advantage of the power of deep learning, this approach not only constructs more accurate dropout prediction models compared with baseline algorithms but also comes up with an approach to personalize and prioritize intervention for at-risk students in MOOCs through using individual drop out probabilities. The findings from this study and implications are then discussed.},
   langid = {english},
@@ -4795,7 +4576,6 @@ Subject\_term\_id: publication-characteristics;research-data},
   publisher = {World Scientific Publishing Co.},
   issn = {1793-9623},
   doi = {10.1142/S1793962323410015},
-  url = {https://www.worldscientific.com/doi/abs/10.1142/S1793962323410015},
   urldate = {2024-01-10},
   abstract = {Artificial Intelligence (AI) systems have evolved with digital learning developments to provide thriving soft groups with digital opportunities in response to feedback. When it comes to learning environments, educators' training feedback is often used as a response recourse. Through the use of final evaluations, students receive feedback that improves their education abilities. To improve academic achievement and explore knowledge in the learning process, this section provides an AI-assisted personalized feedback system (AI-PFS). An individualized feedback system is implemented to learn more about the student's lack of academic experience interactivity and different collaboration behaviors. According to their benchmark, PFS aims to establish a personalized and reliable feedback process for each class based on their collaborative process and learn analytics modules. It has been proposed to use multi-objective implementations to evaluate students regarding the learning results and teaching methods. With different series of questions sessions for students, AI-PFS has been designed, and the findings showed that it greatly enhances the performance rate of 95.32\% with personalized and reasonable predictive.},
   keywords = {artificial intelligence,feedback system,Students feedback},
@@ -4814,7 +4594,6 @@ Subject\_term\_id: publication-characteristics;research-data},
   pages = {1572--1585},
   issn = {0360-1315},
   doi = {10.1016/j.compedu.2007.04.006},
-  url = {https://www.sciencedirect.com/science/article/pii/S0360131507000255},
   urldate = {2021-09-15},
   abstract = {This experimental study investigated the effectiveness of structured Web-Based Bulletin Board (WBB) discussions in improving the critical thinking (CT) skills of learners involved in veterinary distance learning, as well as their attitudes toward learning via WBBs. The two dependent variables were learners' CT skills and their attitudes toward learning via WBBs. The learners' CT skills were examined in different ways: (a) quantitative method: California Critical Thinking Skills Test (CCTST) to holistically investigate the changes in learners' CT skills, and (b) qualitative method: Interaction Analysis Model to investigate learners' interaction patterns in different phases of the WBB discussions. Detailed information about inter-rater reliability, the training of the coders, and the coding process is provided. The findings indicated that structured WBBs significantly improved learners' CT skills and attitudes toward learning via WBBs.},
   langid = {english},
@@ -4833,7 +4612,6 @@ Subject\_term\_id: publication-characteristics;research-data},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/2568225.2568319},
-  url = {https://doi.org/10.1145/2568225.2568319},
   urldate = {2022-06-30},
   abstract = {This paper introduces iProperty, a novel approach that facilitates incremental checking of programs based on a property differencing technique. Specifically, iProperty aims to reduce the cost of checking properties as they are initially developed and as they co-evolve with the program. The key novelty of iProperty is to compute the differences between the new and old versions of expected properties to reduce the number and size of the properties that need to be checked during the initial development of the properties. Furthermore, property differencing is used in synergy with program behavior differencing techniques to optimize common regression scenarios, such as detecting regression errors or checking feature additions for conformance to new expected properties. Experimental results in the context of symbolic execution of Java programs annotated with properties written as assertions show the effectiveness of iProperty in utilizing change information to enable more efficient checking.},
   isbn = {978-1-4503-2756-5},
@@ -4852,7 +4630,6 @@ Subject\_term\_id: publication-characteristics;research-data},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/3383923.3383939},
-  url = {https://dl.acm.org/doi/10.1145/3383923.3383939},
   urldate = {2023-08-21},
   abstract = {Automated programming assessment systems (APAS) are useful supporting tools adopted in novice programming courses. They allow educators to reduce the amount of work required for homework assessment as well as students to have feedback and correct their code. In this study, we analyzed students' learning behavior from an APAS, called ProgEdu, which provides an iterative learning environment for object-oriented programming courses. Answers to research questions are obtained by mean of a quantitative research. Analysis results showed that all expectations about the system effectiveness are satisfied: 1) almost students agree with the assessment method and feedback given by the system; 2) iterative learning is helpful in improving students' programming skill; 3) and it facilitates students to pay more attention to code quality. This study also points out issues of the current system and propose suggestions to improve system performance.},
   isbn = {978-1-4503-7508-5},
@@ -4865,7 +4642,6 @@ Subject\_term\_id: publication-characteristics;research-data},
   author = {Yourdon, Edward and Constantine, Larry L.},
   year = {1979},
   journal = {{Englewood Cliffs: Yourdon Press}},
-  url = {https://ui.adsabs.harvard.edu/abs/1979sdfd.book.....Y/abstract},
   urldate = {2022-08-16},
   langid = {english},
   file = {/home/charlotte/sync/Zotero/storage/9GD5BKV2/abstract.html}
@@ -4882,7 +4658,6 @@ Subject\_term\_id: publication-characteristics;research-data},
   publisher = {Association for Computing Machinery},
   address = {New York, NY, USA},
   doi = {10.1145/775047.775058},
-  url = {https://doi.org/10.1145/775047.775058},
   urldate = {2022-09-01},
   abstract = {Mining frequent trees is very useful in domains like bioinformatics, web mining, mining semistructured data, and so on. We formulate the problem of mining (embedded) subtrees in a forest of rooted, labeled, and ordered trees. We present TREEMINER, a novel algorithm to discover all frequent subtrees in a forest, using a new data structure called scope-list. We contrast TREEMINER with a pattern matching tree mining algorithm (PATTERNMATCHER). We conduct detailed experiments to test the performance and scalability of these methods. We find that TREEMINER outperforms the pattern matching approach by a factor of 4 to 20, and has good scaleup properties. We also present an application of tree mining to analyze real web logs for usage patterns.},
   isbn = {978-1-58113-567-1},
@@ -4916,7 +4691,6 @@ Subject\_term\_id: publication-characteristics;research-data},
   pages = {1--21},
   issn = {1929-7750},
   doi = {10.18608/jla.2024.7979},
-  url = {https://learning-analytics.info/index.php/JLA/article/view/7979},
   urldate = {2024-01-29},
   abstract = {Predictive learning analytics has been widely explored in educational research to improve student retention and academic success in an introductory programming course in computer science (CS1). General-purpose and interpretable dropout predictions still pose a challenge. Our study aims to reproduce and extend the data analysis of a privacy-first student pass--fail prediction approach proposed by Van Petegem and colleagues (2022) in a different CS1 course. Using student submission and self-report data, we investigated the reproducibility of the original approach, the effect of adding self-reports to the model, and the interpretability of the model features. The results showed that the original approach for student dropout prediction could be successfully reproduced in a different course context and that adding self-report data to the prediction model improved accuracy for the first four weeks. We also identified relevant features associated with dropout in the CS1 course, such as timely submission of tasks and iterative problem solving. When analyzing student behaviour, submission data and self-report data were found to complement each other. The results highlight the importance of transparency and generalizability in learning analytics and the need for future research to identify other factors beyond self-reported aptitude measures and student behaviour that can enhance dropout prediction.},
   copyright = {Copyright (c) 2024 Journal of Learning Analytics},
@@ -4937,7 +4711,6 @@ Subject\_term\_id: publication-characteristics;research-data},
   publisher = {IOP Publishing},
   issn = {1742-6596},
   doi = {10.1088/1742-6596/1840/1/012029},
-  url = {https://dx.doi.org/10.1088/1742-6596/1840/1/012029},
   urldate = {2023-10-02},
   abstract = {This study analyzes various publications of scientists on the training of future IT specialists and the features of training programming using online simulators. The authors of the article made a comparative description of different online platforms for teaching programming according to certain criteria, selected interesting tasks from the online platform hackerrank.com, which have already been used to teach students. Online programming simulators have significant potential in organizing an effective distance learning system in Ukrainian universities. It is important to use online simulators in the learning process as an additional tool for the formation of professional competencies, which provides more intensive involvement of students in the process of writing code and practical (situational) application of existing knowledge. Gamification of the process of training future IT specialists helps to increase cognitive activity, and hence -- the quality of the educational process and distance learning in particular. The authors recommend the use of online programming simulators as an additional tool for teaching computer science disciplines, taking into account their functionality, as well as the level of preparation of students and the expected learning outcomes.},
   langid = {english},
diff --git a/book.org b/book.org
index 7e93954..a80d284 100644
--- a/book.org
+++ b/book.org
@@ -104,7 +104,7 @@ Christophe, Wesley, Frank, Kim en Raija, bedankt!
 Verder zou ik graag Dominique willen bedanken om de rol als voorzitter van mijn jury op te nemen.
 
 Een eerder atypische bedanking gaat uit naar alle artiesten waarvan ik de muziek gebruikt heb om tijdens het schrijven van mijn doctoraat de concentratie te behouden.[fn::
-Ik limiteer met tot de periode van het schrijven van mijn doctoraat, want als ik alles had opgelijst dat die rol vervuld heeft in de voorbije zes jaar zou dit boek een stuk dikker geworden zijn.
+Ik limiteer me tot de periode van het schrijven van mijn doctoraat, want als ik alles had opgelijst dat die rol vervuld heeft in de voorbije zes jaar zou dit boek een stuk dikker geworden zijn.
 ]
 Dit zijn Anohni, Boygenius[fn::En ook het solo-werk van Lucy Dacus, Phoebe Bridgers en Julien Baker.], Charlotte Cardin, Eliza McLamb, Jan Swerts, Katy Kirby, Marika Hackman, Pinegrove, SOPHIE, Spinvis en Tate McRae.
 
@@ -657,11 +657,12 @@ Early in its development, we met with the Data Protection Officer of Ghent Unive
 We also only keep the data required for running the platform.
 This results in very little personal information being stored; only the users' names, usernames, and email addresses are stored in their profile.
 The only other data stored is data generated in the platform: submissions, evaluations, questions, answers, etc.
-In this case also, we only keep the information required for the correct functioning of these features.
+In this case too, we only keep the information required for the correct functioning of these features.
 The development of Dodona is also done in the open: the platform has been open-source since August 2019.
 
 The same philosophy has been extended to our research.
-All data used in Chapter\nbsp{}[[#chap:passfail]] was pseudonymized before the analysis was started.
+All data used in Chapter\nbsp{}[[#chap:passfail]] was pseudonymized before the analysis was started and no data was collected specifically to enable this research.
+Conversely, the research was restricted to data that was already collected by Dodona for its regular operations.
 The data used in the study was also not published.
 This is of course not conducive to the verifiability of the research, which is why we were very happy to see that our method could be reproduced in another context.
 The research presented in Chapter\nbsp{}[[#chap:feedback]] also doesn't rely on any personal information: only the IDs and locations of the saved feedback items were used, in addition to the relevant code.
@@ -677,7 +678,7 @@ In this chapter, we will give an overview of Dodona's most important features.
 This chapter answers the question what features a platform like Dodona needs.
 The most important feature is automated assessment, but as we show in this chapter, a lot more features than that are needed.
 
-This chapter is partially based on *Van Petegem, C.*, Maertens, R., Strijbol, N., Van Renterghem, J., Van der Jeugt, F., De Wever, B., Dawyndt, P., Mesuere, B., 2023. Dodona: Learn to code with a virtual co-teacher that supports active learning. /SoftwareX/ 24, 101578. https://doi.org/10.1016/j.softx.2023.101578
+This chapter is partially based on *Van Petegem, C.*, Maertens, R., Strijbol, N., Van Renterghem, J., Van der Jeugt, F., De Wever, B., Dawyndt, P., Mesuere, B., 2023. Dodona: Learn to code with a virtual co-teacher that supports active learning. /SoftwareX/ 24, 101578.
 The work described in this chapter was performed by the whole Dodona team.
 It is difficult to pinpoint who did what.
 The code and its history can be looked at[fn:: https://github.com/dodona-edu/dodona/commits/main/], but it will never give a full view of the true collaborative effort of Dodona.
@@ -991,7 +992,7 @@ We start by mentioning some facts and figures, and discussing a user study we pe
 We then explain how Dodona can be used on the basis of a case study.
 This case study also provides insight into the educational context for the research described in Chapters\nbsp{}[[#chap:passfail]]\nbsp{}and\nbsp{}[[#chap:feedback]].
 
-This chapter is partially based on *Van Petegem, C.*, Maertens, R., Strijbol, N., Van Renterghem, J., Van der Jeugt, F., De Wever, B., Dawyndt, P., Mesuere, B., 2023. Dodona: Learn to code with a virtual co-teacher that supports active learning. /SoftwareX/ 24, 101578. https://doi.org/10.1016/j.softx.2023.101578
+This chapter is partially based on *Van Petegem, C.*, Maertens, R., Strijbol, N., Van Renterghem, J., Van der Jeugt, F., De Wever, B., Dawyndt, P., Mesuere, B., 2023. Dodona: Learn to code with a virtual co-teacher that supports active learning. /SoftwareX/ 24, 101578.
 The course described in this chapter was mostly developed by prof. Peter Dawyndt, but has also seen numerous contributions by teaching assistents.
 
 ** Facts and figures
@@ -1373,7 +1374,7 @@ Given that cohort sizes are large enough, historical data from a single course e
 
 Dodona has grown into a widely used automated assessment platform.
 As we have shown in this chapter, both students and teachers alike appreciate its extensive feature set and user-friendliness.
-By exploiting all Dodona features, it is possible to build out a highly activating course.
+By exploiting all Dodona features, it is possible to design and implement a highly activating course.
 While there is still a lot of time invested in running a course like this, the time Dodona saves can be reinvested in hands-on guidance of students and giving manual feedback on evaluations and examinations.
 
 * Under the hood: technical architecture and design
@@ -1900,7 +1901,7 @@ Further work then developed this proof of concept into the full judge we will pr
 
 We will expand on TESTed using an example exercise.
 In this exercise, students need to rotate a list.
-For example, in Python, ~rotate([0, 1, 2, 3, 4], 2)~ should return ~[3, 4, 0, 1, 2]~.
+For example, in Python, ~rotate([0, 1, 2, 3, 4], 2)~ should return the list ~[3, 4, 0, 1, 2]~.
 The goal is that teachers can write their exercises as in Listing\nbsp{}[[lst:technicaltesteddsl]].
 
 #+CAPTION: Example of a TESTed test plan, showing statements and expressions.
@@ -2191,8 +2192,8 @@ The infrastructure and tooling required for supporting the assessment of many su
 
 We now shift to the chapters where we make use of the data provided by Dodona to perform educational data mining research.
 
-This chapter is based on *Van Petegem, C.*, Deconinck, L., Mourisse, D., Maertens, R., Strijbol, N., Dhoedt, B., De Wever, B., Dawyndt, P., Mesuere, B., 2022. Pass/Fail Prediction in Programming Courses. /Journal of Educational Computing Research/, 68–95. https://doi.org/10.1177/07356331221085595
-It also briefly discusses the work reproduction of this research performed in Zhidkikh, D., Heilala, V., *Van Petegem, C.*, Dawyndt, P., Järvinen, M., Viitanen, S., De Wever, B., Mesuere, B., Lappalainen, V., Kettunen, L., & Hämäläinen, R., 2024. Reproducing Predictive Learning Analytics in CS1: Toward Generalizable and Explainable Models for Enhancing Student Retention. /Journal of Learning Analytics/, 1-21. https://doi.org/10.18608/jla.2024.7979
+This chapter is based on *Van Petegem, C.*, Deconinck, L., Mourisse, D., Maertens, R., Strijbol, N., Dhoedt, B., De Wever, B., Dawyndt, P., Mesuere, B., 2022. Pass/Fail Prediction in Programming Courses. /Journal of Educational Computing Research/, 68–95.
+It also briefly discusses the work reproduction of this research performed in Zhidkikh, D., Heilala, V., *Van Petegem, C.*, Dawyndt, P., Järvinen, M., Viitanen, S., De Wever, B., Mesuere, B., Lappalainen, V., Kettunen, L., & Hämäläinen, R., 2024. Reproducing Predictive Learning Analytics in CS1: Toward Generalizable and Explainable Models for Enhancing Student Retention. /Journal of Learning Analytics/, 1-21.
 
 The work presented in this chapter was part of the master thesis by Louise Deconinck, with the reproduction being led by Denis Zhidkikh.
 
@@ -2822,6 +2823,7 @@ We will then expand on some further experiments using data mining techniques we
 Section\nbsp{}[[#sec:feedbackprediction]] is based on an article that is currently being prepared for submission.
 
 Comments and evaluations were added to Dodona by myself.
+Niko Strijbol implemented the addition of grades to evaluations.
 Jorg Van Renterghem finalized the addition of feedback reuse.
 The work on feedback prediction was started by myself and further developed in collaboration with Kasper Demeyere during his master's thesis.
 
@@ -3493,8 +3495,8 @@ A skill profile would be more complicated though, since we would want some kind
 This leads right into another possibility for future research: exercise recommendation.
 Right now, learning paths in Dodona are static, determined by the teacher of the course the student is following.
 Dodona has a rich library of extra exercises, which some courses point to as opportunities for extra practice, but it is not always easy for students to know what exercises would be good for them.
-Using a skill profile, we could recommend exercise that only contain one skill where the student is behind on, allowing them to focus their practice on that skill specifically.
-We would again need to infer what skills are tested by exercises, but this was already required for the skill estimation itself as well.
+Using a skill profile, we could recommend exercises that only contain one skill the student has not fully attained, allowing them to focus their practice on that skill specifically.
+We would again need to infer what skills are tested by exercises, but this was already required for the skill estimation itself.
 
 The research from Chapter\nbsp{}[[#chap:passfail]] could also be used to help solve this problem in another way.
 If we know a student has a higher chance of failing the course, we might want to recommend some easier exercises.
@@ -3503,13 +3505,13 @@ Estimating the difficulty of an exercise is a problem unto itself though (and ho
 
 The use of LLMs in Dodona could also be an opportunity.
 As mentioned in Section\nbsp{}[[#subsec:feedbackpredictionconclusion]], a possibility for using LLMs could be to generate feedback while grading.
-By feeding an LLM with the student's code, an indication of the failed test cases (although doing this in a good format is an issue to solve in itself) and the type of issues that the teacher wants to remark upon it should be able to give a good starting point for the feedback.
+By feeding an LLM with the student's code, an indication of the failed test cases (although doing this in a good format is an issue to solve in itself) and the type of issues that the teacher wants to address, it should be able to give a good starting point for the feedback.
 This could also kickstart the process explained in Section\nbsp{}[[#subsec:feedbackpredictionconclusion]].
 By making generated feedback reusable, the given feedback can still remain consistent and fair.
 
 Another option is to integrate an LLM as an AI tutor (as, for example, Khan Academy has done with Khanmigo[fn:: https://www.khanmigo.ai/]).
 This way, it could interactively help students while they are learning.
-Instead of tools like ChatGPT or Bard which are typically used to get a correct answer immediately, an AI tutor can guide students to find the correct answer to an exercise by themselves.
+Instead of tools like ChatGPT or Bard which are typically used to get a correct answer immediately, an AI tutor can guide students to find the correct answer to an exercise gradually by giving hints.
 
 The final possibility we will present here is to prepare suggestions for answers to student questions on Dodona.
 At first glance, LLMs should be quite good at this.
@@ -3562,7 +3564,7 @@ The same might be necessary when learning to program: to learn the basics, stude
 :END:
 
 In this appendix, we give an overview of the most important Dodona releases, and the changes they introduced, organized per academic year.
-This is not a full overview of all Dodona releases, and does not mention all changes in a particular release.[[fn::
+This is not a full overview of all Dodona releases, and does not mention all changes in a particular release.[fn::
 A full overview of all Dodona releases, with their full changelog, can be found at https://github.com/dodona-edu/dodona/releases/.
 ]
 
diff --git a/rebuttal.md b/rebuttal.md
index eb7e3c2..ded024c 100644
--- a/rebuttal.md
+++ b/rebuttal.md
@@ -1,10 +1,12 @@
 # Rebuttal
 
-## Common remarks
+To view the actual changes made in response to the comments, see the version of the PhD that shows the difference with the version submitted in March.
+
+## Recurring remarks
 
 > The global research question should be more clearly stated in the text. This should also help with connecting the two parts of the dissertation.
 
-The section "Structure of this dissertation" has been edited to explicitly mention a global research question, and a research question for each chapter.
+The section "Structure of this dissertation" in the introduction has been edited to explicitly mention a global research question, and a research question for each chapter.
 
 > There should be more focus on the lessons learned when building an assessment platform like Dodona.
 
@@ -12,11 +14,17 @@ By adding a research question and conclusion to chapters 2, 3, and 4, this remar
 
 > Chapter 6 is of lower quality than the rest of the chapters.
 
-The work on the included article was continued after the dissertation was submitted, and that article has now been submitted to the Journal of Artificial Intelligence in Education. This version of the article is now included, which should solve the remarks on this chapter.
+The work on the included article was continued after the dissertation was submitted, and that article has now been submitted to the Journal of Artificial Intelligence in Education (currently under review). 
+The submitted version of the article is now included, which should solve the remarks on this chapter.
+In addition to textual improvements, the accuracy and the performance of the model have also been improved.
 
 > It would be good to compare Dodona and other modern platforms, perhaps via a comparative table.
 
-This comparative table has already been created by Sven Strickroth at https://systemscorpus.strickroth.net/. I tried including a selection from this table in the dissertation, but found it did not add much to the text.
+Such comparative table has already been created by Sven Strickroth at https://systemscorpus.strickroth.net/. 
+I tried including a selection from this table in the dissertation, but found it did not add much to the text.
+As a result, I finally decided not to include such a table.
+In general, a comparison should not merely be done based on the major features of most platforms (as most platforms share more ore less the same features).
+Differences appear in the detailed ways these features are implemented, which require a much more in-depth discussion than simply a comparative table.
 
 > The final chapter is short and could be expanded upon.
 
@@ -74,13 +82,17 @@ This suggestion was applied.
 
 > It may be useful to include an appendix with an example that illustrates the templating system adopted by TESTed.
 
-While this would be interesting, TESTed is not the main focus of this PhD. For more detail on this, I would like to refer to the PhD of my friend and colleague Niko Strijbol, which goes much more in detail on TESTed.
+While this would be interesting, TESTed is not the main focus of this PhD. 
+For more detail on this, I would like to refer to the PhD of my friend and colleague Niko Strijbol, which goes much more in detail on TESTed.
+It is also discussed in detail in an article that is currently under review (Strijbol et. al, 2024).
 
 ### Chapter 5
 
 > The fact that the fourth series is an exception can perhaps also be attributed to students facing the combined use of conditional and repetitive execution for the first time, which challenges their skills in terms of keeping track of the flow of execution.
 
-While true that this is challenging for students, most exercises in the third series (while repetitive execution is introduced) also contain conditional execution. Therefore I did not change the text related to this.
+While true that this is challenging for students, most exercises in the third series (while repetitive execution is introduced) also contain conditional execution. 
+What is definitely new in the fourth series are nested loops, which challenges the students.
+Therefore I did not change the text related to this.
 
 ## Frank Neven
 
@@ -94,7 +106,10 @@ This has been done.
 
 > In the list of publications on pages ix and x it is not very clear what the difference between the different publications is (1 and 6, 2 and 8, 4 and 5). Some seem to have the same title but published elsewhere? Is it really a different publication then?
 
-Some of these were publications of conference posters; these have been removed. The others are in fact different publications.
+Number 5 was the publication of a conference poster; this has been removed. 
+The others are in fact different publications.
+1 and 6 are two publications on TESTed, but on very different aspects of it.
+2 and 8 are two publications on Dolos, but also present different aspects of it.
 
 > In chapter 4 before explaining all implementation choices and details of TESTed in section 4.4 maybe first show a motivating example first to keep the reader's attention. (For example the example of Listing 4.5 could come much earlier.)
 
@@ -111,11 +126,13 @@ The mathematics has been taken out of the running text and placed in its own env
 > LLMs are becoming more important, and the possible link with auto-graders is clear. You do mention them to some extent in chapters 6 and 7 but a slightly more thorough section on that would have been preferable. (This remark is a suggestion and not a blocking factor.)
 
 The possibilities for future work have been expanded upon in the final chapter, including the LLM sections.
+This will also be the focus of the PhD of a new PhD student joining Team Dodona in September, Thomas Van Mullem.
 
 ## Rajia Hämäläinen
 
 > The differences and similarities between learning analytics and educational data mining could be more clearly described, both theoretically and in the practical section.
 
+This was indeed an oversight.
 Definitions of LA and EDM have been added to the introduction to make clear what is meant by these terms in this PhD.
 
 > It would be interesting to know more about the division of labor between Van Petegem and the rest of the Dodona team.