|
CS 456 - Systems Programming
Spring 2024
|
Displaying ./code/apr10/index.html
HTTP/1.1 200 OK
Date: Wed, 10 Apr 2024 22:41:27 GMT
Server: Apache/2.4.29 (Ubuntu)
Last-Modified: Mon, 11 Mar 2024 04:57:16 GMT
ETag: "35f08-6135b60297fd3"
Accept-Ranges: bytes
Content-Length: 220936
Vary: Accept-Encoding
Content-Type: text/html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>NASM Assembly Language Tutorials - asmtutor.com</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="">
<meta name="author" content="">
<link href="./assets/bootstrap/css/bootstrap.min.css" rel="stylesheet">
<link href="./assets/fontawesome/css/all.css" rel="stylesheet">
<link href="./assets/stylesheet.css" rel="stylesheet">
<link href="./assets/bootstrap/css/bootstrap-responsive.min.css" rel="stylesheet">
<script src="./assets/syntax-highlighter/scripts/shCore.js"></script>
<script src="./assets/syntax-highlighter/scripts/shBrushAsm.js"></script>
<link href="./assets/syntax-highlighter/styles/shCoreDefault.css" rel="stylesheet">
<script type="text/javascript">SyntaxHighlighter.all();</script>
<!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
<!--[if lt IE 9]>
<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
<![endif]-->
</head>
<body>
<div class="navbar navbar-inverse navbar-fixed-top">
<div class="navbar-inner">
<div class="container-fluid">
<span class="brand">NASM Assembly Language Tutorials - asmtutor.com</span>
<div class="nav-collapse collapse">
<p class="navbar-text pull-right">
<a class="navbar-link" href="#top">Top</a>
</p>
</div>
</div>
</div>
</div>
<div class="container-fluid">
<div class="row-fluid">
<div class="span3">
<div class="well sidebar-nav">
<ul class="nav nav-list">
<li class="nav-header">Lessons</li>
<li><a href="#lesson1">Lesson 1 <span>Hello, world!</span></a></li>
<li><a href="#lesson2">Lesson 2 <span>Proper program exit</span></a></li>
<li><a href="#lesson3">Lesson 3 <span>Calculate string length</span></a></li>
<li><a href="#lesson4">Lesson 4 <span>Subroutines</span></a></li>
<li><a href="#lesson5">Lesson 5 <span>External include files</span></a></li>
<li><a href="#lesson6">Lesson 6 <span>NULL terminating bytes</span></a></li>
<li><a href="#lesson7">Lesson 7 <span>Linefeeds</span></a></li>
<li><a href="#lesson8">Lesson 8 <span>Passing arguments</span></a></li>
<li><a href="#lesson9">Lesson 9 <span>User input</span></a></li>
<li><a href="#lesson10">Lesson 10 <span>Count to 10</span></a></li>
<li><a href="#lesson11">Lesson 11 <span>Count to 10 (itoa)</span></a></li>
<li><a href="#lesson12">Lesson 12 <span>Calculator - addition</span></a></li>
<li><a href="#lesson13">Lesson 13 <span>Calculator - subtraction</span></a></li>
<li><a href="#lesson14">Lesson 14 <span>Calculator - multiplication</span></a></li>
<li><a href="#lesson15">Lesson 15 <span>Calculator - division</span></a></li>
<li><a href="#lesson16">Lesson 16 <span>Calculator (atoi)</span></a></li>
<li><a href="#lesson17">Lesson 17 <span>Namespace</span></a></li>
<li><a href="#lesson18">Lesson 18 <span>Fizz Buzz</span></a></li>
<li><a href="#lesson19">Lesson 19 <span>Execute Command</span></a></li>
<li><a href="#lesson20">Lesson 20 <span>Process Forking</span></a></li>
<li><a href="#lesson21">Lesson 21 <span>Telling the time</span></a></li>
<li><a href="#lesson22">Lesson 22 <span>File Handling - Create</span></a></li>
<li><a href="#lesson23">Lesson 23 <span>File Handling - Write</span></a></li>
<li><a href="#lesson24">Lesson 24 <span>File Handling - Open</span></a></li>
<li><a href="#lesson25">Lesson 25 <span>File Handling - Read</span></a></li>
<li><a href="#lesson26">Lesson 26 <span>File Handling - Close</span></a></li>
<li><a href="#lesson27">Lesson 27 <span>File Handling - Update</span></a></li>
<li><a href="#lesson28">Lesson 28 <span>File Handling - Delete</span></a></li>
<li><a href="#lesson29">Lesson 29 <span>Sockets - Create</span></a></li>
<li><a href="#lesson30">Lesson 30 <span>Sockets - Bind</span></a></li>
<li><a href="#lesson31">Lesson 31 <span>Sockets - Listen</span></a></li>
<li><a href="#lesson32">Lesson 32 <span>Sockets - Accept</span></a></li>
<li><a href="#lesson33">Lesson 33 <span>Sockets - Read</span></a></li>
<li><a href="#lesson34">Lesson 34 <span>Sockets - Write</span></a></li>
<li><a href="#lesson35">Lesson 35 <span>Sockets - Close</span></a></li>
<li><a href="#lesson36">Lesson 36 <span>Download a Webpage</span></a></li>
</ul>
</div>
</div>
<div class="span9 lessons">
<div class="hero-unit">
<h2>Learn Assembly Language</h2>
<p>This project was put together to teach myself NASM x86 assembly language on linux.</p>
<p><a href="https://github.com/DGivney/assemblytutorials" class="btn btn-primary btn" target="_blank">Github Project »</a></p>
</div>
<div class="row-fluid hidden-phone">
<div class="span4">
<h2>Lesson 1</h2>
<h5>The obligatory 'Hello, world!'</h5>
<p>Introduction to the Linux System Call Table. In this lesson we use software interrupts to request system functions from the kernel in order to print out 'Hello World!' to the console.</p>
<p><a class="btn" href="#lesson1">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 2</h2>
<h5>Proper program exit</h5>
<p>A very brief lesson about memory addresses, sequential code execution and how to properly terminate a program without errors.</p>
<p><a class="btn" href="#lesson2">View details »</a></p>
</div>
<div class="span4">
<h2>Lesson 3</h2>
<h5>Calculate string length</h5>
<p>What if we wanted to output something that we don't know the length of? Like user input? Learn about loops, labels and pointer arithmic.</p>
<p><a class="btn" href="#lesson3">View details »</a></p>
</div>
</div>
<div class="row-fluid hidden-phone">
<div class="span4">
<h2>Lesson 4</h2>
<h5>Subroutines</h5>
<p>Introduction to the stack and how to write clean, reusable code in assembly.</p>
<p><a class="btn" href="#lesson4">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 5</h2>
<h5>External include files</h5>
<p>To further simplify our code we can move our subroutines into an external include file.</p>
<p><a class="btn" href="#lesson5">View details »</a></p>
</div>
<div class="span4">
<h2>Lesson 6</h2>
<h5>NULL terminating bytes</h5>
<p>A quick lesson on how memory is handled. This lesson also fixes the duplication bug we added in lesson 5.</p>
<p><a class="btn" href="#lesson6">View details »</a></p>
</div>
</div>
<div class="row-fluid hidden-phone">
<div class="span4">
<h2>Lesson 7</h2>
<h5>Linefeeds</h5>
<p>How you can use the stack to print linefeeds after strings and an introduction to the Extended Stack Pointer ESP.</p>
<p><a class="btn" href="#lesson7">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 8</h2>
<h5>Passing arguments</h5>
<p>Passing arguments to your program from the command line.</p>
<p><a class="btn" href="#lesson8">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 9</h2>
<h5>User input</h5>
<p>Introduction to the BSS section and how to trigger a call to sys_read to process user input.</p>
<p><a class="btn" href="#lesson9">View lesson »</a></p>
</div>
</div>
<div class="row-fluid hidden-phone">
<div class="span4">
<h2>Lesson 10</h2>
<h5>Count to 10</h5>
<p>Introduction to numbers and counting in assembly.</p>
<p><a class="btn" href="#lesson10">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 11</h2>
<h5>Count to 10 (itoa)</h5>
<p>Introduction to ASCII and how to convert integers to their string representations in assembly.</p>
<p><a class="btn" href="#lesson11">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 12</h2>
<h5>Calculator - addition</h5>
<p>Introduction to calulating numbers in assembly. This tutorial describes a simple program to add two numbers together.</p>
<p><a class="btn" href="#lesson12">View lesson »</a></p>
</div>
</div>
<div class="row-fluid hidden-phone">
<div class="span4">
<h2>Lesson 13</h2>
<h5>Calculator - subtraction</h5>
<p>Introduction to calulating numbers in assembly. This tutorial describes a simple program to subtract one number from another.</p>
<p><a class="btn" href="#lesson13">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 14</h2>
<h5>Calculator - multiplication</h5>
<p>Introduction to calulating numbers in assembly. This tutorial describes a simple program to multiply two numbers together.</p>
<p><a class="btn" href="#lesson14">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 15</h2>
<h5>Calculator - division</h5>
<p>Introduction to calulating numbers in assembly. This tutorial describes a simple program to divide one number by another.</p>
<p><a class="btn" href="#lesson15">View lesson »</a></p>
</div>
</div>
<div class="row-fluid hidden-phone">
<div class="span4">
<h2>Lesson 16</h2>
<h5>Calculator (atoi)</h5>
<p>This program takes a series of passed string arguments, converts them to integers and adds them all together.</p>
<p><a class="btn" href="#lesson16">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 17</h2>
<h5>Namespace</h5>
<p>Introduction to how NASM handles namespace when it comes to global and local labels.</p>
<p><a class="btn" href="#lesson17">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 18</h2>
<h5>Fizz Buzz</h5>
<p>The Fizz Buzz programming challenge recreated in NASM.</p>
<p><a class="btn" href="#lesson18">View lesson »</a></p>
</div>
</div>
<div class="row-fluid hidden-phone">
<div class="span4">
<h2>Lesson 19</h2>
<h5>Execute Command</h5>
<p>In this lesson we replace the currently running process with a new process that executes a command.</p>
<p><a class="btn" href="#lesson19">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 20</h2>
<h5>Process Forking</h5>
<p>In this lesson we create a new process that duplicates our current process.</p>
<p><a class="btn" href="#lesson20">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 21</h2>
<h5>Telling the time</h5>
<p>In this lesson we ask the kernel for the current unix timestamp.</p>
<p><a class="btn" href="#lesson21">View lesson »</a></p>
</div>
</div>
<div class="row-fluid hidden-phone">
<div class="span4">
<h2>Lesson 22</h2>
<h5>File Handling - Create</h5>
<p>In this lesson we learn how to create a new file in Assembly.</p>
<p><a class="btn" href="#lesson22">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 23</h2>
<h5>File Handling - Write</h5>
<p>In this lesson we write content to a newly created text file.</p>
<p><a class="btn" href="#lesson23">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 24</h2>
<h5>File Handling - Open</h5>
<p>In this lesson we open a text file and print it's file descriptor.</p>
<p><a class="btn" href="#lesson24">View lesson »</a></p>
</div>
</div>
<div class="row-fluid hidden-phone">
<div class="span4">
<h2>Lesson 25</h2>
<h5>File Handling - Read</h5>
<p>In this lesson we read content from a newly created text file.</p>
<p><a class="btn" href="#lesson25">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 26</h2>
<h5>File Handling - Close</h5>
<p>In this lesson we close a newly created text file using it's file descriptor.</p>
<p><a class="btn" href="#lesson26">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 27</h2>
<h5>File Handling - Update</h5>
<p>In this lesson we update the content of an included text file using seek.</p>
<p><a class="btn" href="#lesson27">View lesson »</a></p>
</div>
</div>
<div class="row-fluid hidden-phone">
<div class="span4">
<h2>Lesson 28</h2>
<h5>File Handling - Delete</h5>
<p>In this lesson we learn how to delete a file.</p>
<p><a class="btn" href="#lesson28">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 29</h2>
<h5>Sockets - Create</h5>
<p>In this lesson we learn how to create a new socket in assembly and store it's file descriptor.</p>
<p><a class="btn" href="#lesson29">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 30</h2>
<h5>Sockets - Bind</h5>
<p>In this lesson we learn how to bind a socket to an IP Address & Port Number.</p>
<p><a class="btn" href="#lesson30">View lesson »</a></p>
</div>
</div>
<div class="row-fluid hidden-phone">
<div class="span4">
<h2>Lesson 31</h2>
<h5>Sockets - Listen</h5>
<p>In this lesson we learn how to make a socket listen for incoming connections.</p>
<p><a class="btn" href="#lesson31">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 32</h2>
<h5>Sockets - Accept</h5>
<p>In this lesson we learn how to make a socket accept incoming connections.</p>
<p><a class="btn" href="#lesson32">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 33</h2>
<h5>Sockets - Read</h5>
<p>In this lesson we learn how to read incoming requests on a socket.</p>
<p><a class="btn" href="#lesson33">View lesson »</a></p>
</div>
</div>
<div class="row-fluid hidden-phone">
<div class="span4">
<h2>Lesson 34</h2>
<h5>Sockets - Write</h5>
<p>In this lesson we learn how to make a socket respond to incoming requests.</p>
<p><a class="btn" href="#lesson34">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 35</h2>
<h5>Sockets - Close</h5>
<p>In this lesson we learn how to shutdown and close an open socket connection.</p>
<p><a class="btn" href="#lesson35">View lesson »</a></p>
</div>
<div class="span4">
<h2>Lesson 36</h2>
<h5>Download a Webpage</h5>
<p>In this lesson we're going to connect to a webserver and send a HTTP request for a webpage. We'll then print the server's response to our terminal.</p>
<p><a class="btn" href="#lesson36">View lesson »</a></p>
</div>
</div>
<hr>
<div class="row-fluid" id="lesson1">
<article class="span12">
<header>
<h2>Lesson 1</h2>
<h4>Hello, world!</h4>
</header>
<h5>First, some background</h5>
<p>Assembly language is bare-bones. The only interface a programmer has above the actual hardware is the kernel itself. In order to build useful programs in assembly we need to use the linux system calls provided by the kernel. These system calls are a library built into the operating system to provide functions such as reading input from a keyboard and writing output to the screen.</p>
<p>When you invoke a system call the kernel will immediately suspend execution of your program. It will then contact the necessary drivers needed to perform the task you requested on the hardware and then return control back to your program.</p>
<p>
<span class="label label-info">Note:</span>
Drivers are called <i>drivers</i> because the kernel literally uses them to drive the hardware.
</p>
<p>We can accomplish this all in assembly by loading EAX with the function number (operation code OPCODE) we want to execute and filling the remaining registers with the arguments we want to pass to the system call. A software interrupt is requested with the INT instruction and the kernel takes over and calls the function from the library with our arguments. Simple.</p>
<p>For example requesting an interrupt when EAX=1 will call sys_exit and requesting an interrupt when EAX=4 will call sys_write instead. EBX, ECX & EDX will be passed as arguments if the function requires them. <a href="https://chromium.googlesource.com/chromiumos/docs/+/HEAD/constants/syscalls.md#x86-32_bit" target="_blank">Click here to view an example of a Linux System Call Table and its corresponding OPCODES.</a></p>
<h5>Writing our program</h5>
<p>Firstly we create a variable 'msg' in our .data section and assign it the string we want to output in this case 'Hello, world!'. In our .text section we tell the kernel where to begin execution by providing it with a global label _start: to denote the programs entry point.</p>
<p>We will be using the system call sys_write to output our message to the console window. This function is assigned OPCODE 4 in the Linux System Call Table. The function also takes 3 arguments which are sequentially loaded into EDX, ECX and EBX before requesting a software interrupt which will perform the task.</p>
<p>The arguments passed are as follows:
<ul>
<li> EDX will be loaded with the length (in bytes) of the string.</li>
<li> ECX will be loaded with the address of our variable created in the .data section.</li>
<li> EBX will be loaded with the file we want to write to – in this case STDOUT.</li>
</ul>
The datatype and meaning of the arguments passed can be found in the function's definition.
</p>
<p>We compile, link and run the program using the commands below.</p>
<div class="snippet">
<span class="filename">helloworld.asm</span>
<pre class="brush: asm;">
; Hello World Program - asmtutor.com
; Compile with: nasm -f elf helloworld.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 helloworld.o -o helloworld
; Run with: ./helloworld
SECTION .data
msg db 'Hello World!', 0Ah ; assign msg variable with your message string
SECTION .text
global _start
_start:
mov edx, 13 ; number of bytes to write - one for each letter plus 0Ah (line feed character)
mov ecx, msg ; move the memory address of our message string into ecx
mov ebx, 1 ; write to the STDOUT file
mov eax, 4 ; invoke SYS_WRITE (kernel opcode 4)
int 80h
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf helloworld.asm</span>
<span>~$ ld -m elf_i386 helloworld.o -o helloworld</span>
<span>~$ ./helloworld</span>
<span>Hello World!</span>
<span>Segmentation fault</span>
</div>
</div>
</div>
<p>
<span class="label label-important">Error:</span>
Segmentation fault
</p>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson2">
<article class="span12">
<header>
<h2>Lesson 2</h2>
<h4>Proper program exit</h4>
</header>
<h5>Some more background</h5>
<p>After successfully learning how to execute a system call in Lesson 1 we now need to learn about one of the most important system calls in the kernel, sys_exit.</p>
<p>Notice how after our 'Hello, world!' program ran we got a Segmentation fault? Well, computer programs can be thought of as a long strip of instructions that are loaded into memory and divided up into sections (or segments). This general pool of memory is shared between all programs and can be used to store variables, instructions, other programs or anything really. Each segment is given an address so that information stored in that section can be found later.</p>
<p>To execute a program that is loaded in memory, we use the global label _start: to tell the operating system where in memory our program can be found and executed. Memory is then accessed sequentially following the program logic which determines the next address to be accessed. The kernel jumps to that address in memory and executes it.</p>
<p>It's important to tell the operating system exactly where it should begin execution and where it should stop. In Lesson 1 we didn't tell the kernel where to stop execution. So, after we called sys_write the program continued sequentially executing the next address in memory, which could have been anything. We don't know what the kernel tried to execute but it caused it to choke and terminate the process for us instead - leaving us the error message of 'Segmentation fault'. Calling sys_exit at the end of all our programs will mean the kernel knows exactly when to terminate the process and return memory back to the general pool thus avoiding an error.</p>
<h5>Writing our program</h5>
<p>Sys_exit has a simple function definition. In the Linux System Call Table it is allocated OPCODE 1 and is passed a single argument through EBX.</p>
<p>In order to execute this function all we need to do is:
<ul>
<li>Load EBX with 0 to pass zero to the function meaning 'zero errors'.</li>
<li>Load EAX with 1 to call sys_exit.</li>
<li>Then request an interrupt on libc using INT 80h.</li>
</ul>
</p>
<p>We then compile, link and run it again.</p>
<p>
<span class="label label-info">Note:</span>
Only new code added in each lesson will be commented.
</p>
<div class="snippet">
<span class="filename">helloworld.asm</span>
<pre class="brush: asm;">
; Hello World Program - asmtutor.com
; Compile with: nasm -f elf helloworld.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 helloworld.o -o helloworld
; Run with: ./helloworld
SECTION .data
msg db 'Hello World!', 0Ah
SECTION .text
global _start
_start:
mov edx, 13
mov ecx, msg
mov ebx, 1
mov eax, 4
int 80h
mov ebx, 0 ; return 0 status on exit - 'No Errors'
mov eax, 1 ; invoke SYS_EXIT (kernel opcode 1)
int 80h
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf helloworld.asm</span>
<span>~$ ld -m elf_i386 helloworld.o -o helloworld</span>
<span>~$ ./helloworld</span>
<span>Hello World!</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson3">
<article class="span12">
<header>
<h2>Lesson 3</h2>
<h4>Calculate string length</h4>
</header>
<h5>Firstly, some background</h5>
<p>Why do we need to calculate the length of a string?</p>
<p>Well sys_write requires that we pass it a pointer to the string we want to output in memory and the length in bytes we want to print out. If we were to modify our message string we would have to update the length in bytes that we pass to sys_write as well, otherwise it will not print correctly. </p>
<p>You can see what I mean using the program in Lesson 2. Modify the message string to say 'Hello, brave new world!' then compile, link and run the new program. The output will be 'Hello, brave ' (the first 13 characters) because we are still only passing 13 bytes to sys_write as its length. It will be particularly necessary when we want to print out user input. As we won't know the length of the data when we compile our program, we will need a way to calculate the length at runtime in order to successfully print it out.</p>
<h5>Writing our program</h5>
<p>To calculate the length of the string we will use a technique called pointer arithmetic. Two registers are initialised pointing to the same address in memory. One register (in this case EAX) will be incremented forward one byte for each character in the output string until we reach the end of the string. The original pointer will then be subtracted from EAX. This is effectively like subtraction between two arrays and the result yields the number of elements between the two addresses. This result is then passed to sys_write replacing our hard coded count.</p>
<p>The CMP instruction compares the left hand side against the right hand side and sets a number of flags that are used for program flow. The flag we're checking is the ZF or Zero Flag. When the byte that EAX points to is equal to zero the ZF flag is set. We then use the JZ instruction to jump, if the ZF flag is set, to the point in our program labeled 'finished'. This is to break out of the nextchar loop and continue executing the rest of the program.</p>
<div class="snippet">
<span class="filename">helloworld-len.asm</span>
<pre class="brush: asm;">
; Hello World Program (Calculating string length)
; Compile with: nasm -f elf helloworld-len.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 helloworld-len.o -o helloworld-len
; Run with: ./helloworld-len
SECTION .data
msg db 'Hello, brave new world!', 0Ah ; we can modify this now without having to update anywhere else in the program
SECTION .text
global _start
_start:
mov ebx, msg ; move the address of our message string into EBX
mov eax, ebx ; move the address in EBX into EAX as well (Both now point to the same segment in memory)
nextchar:
cmp byte [eax], 0 ; compare the byte pointed to by EAX at this address against zero (Zero is an end of string delimiter)
jz finished ; jump (if the zero flagged has been set) to the point in the code labeled 'finished'
inc eax ; increment the address in EAX by one byte (if the zero flagged has NOT been set)
jmp nextchar ; jump to the point in the code labeled 'nextchar'
finished:
sub eax, ebx ; subtract the address in EBX from the address in EAX
; remember both registers started pointing to the same address (see line 15)
; but EAX has been incremented one byte for each character in the message string
; when you subtract one memory address from another of the same type
; the result is number of segments between them - in this case the number of bytes
mov edx, eax ; EAX now equals the number of bytes in our string
mov ecx, msg ; the rest of the code should be familiar now
mov ebx, 1
mov eax, 4
int 80h
mov ebx, 0
mov eax, 1
int 80h
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf helloworld-len.asm</span>
<span>~$ ld -m elf_i386 helloworld-len.o -o helloworld-len</span>
<span>~$ ./helloworld-len</span>
<span>Hello, brave new world!</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson4">
<article class="span12">
<header>
<h2>Lesson 4</h2>
<h4>Subroutines</h4>
</header>
<h5>Introduction to subroutines</h5>
<p>Subroutines are functions. They are reusable pieces of code that can be called by your program to perform various repeatable tasks. Subroutines are declared using labels just like we've used before (eg. _start:) however we don't use the JMP instruction to get to them - instead we use a new instruction <span class="instruction-name">CALL</span>. We also don't use the JMP instruction to return to our program after we have run the function. To return to our program from a subroutine we use the instruction RET instead.</p>
<h5>Why don't we JMP to subroutines?</h5>
<p>The great thing about writing a subroutine is that we can reuse it. If we want to be able to use the subroutine from anywhere in the code we would have to write some logic to determine where in the code we had jumped from and where we should jump back to. This would litter our code with unwanted labels. If we use <span class="instruction-name">CALL</span> and RET however, assembly handles this problem for us using something called the stack.</p>
<h5>Introduction to the stack</h5>
<p>The stack is a special type of memory. It's the same type of memory that we've used before however it's special in how it is used by our program. The stack is what is call <strong>Last In First Out</strong> memory (LIFO). You can think of the stack like a stack of plates in your kitchen. The last plate you put on the stack is also the first plate you will take off the stack next time you use a plate.</p>
<p>The stack in assembly is not storing plates though, its storing values. You can store a lot of things on the stack such as variables, addresses or other programs. We need to use the stack when we call subroutines to temporarily store values that will be restored later.</p>
<p>Any register that your function needs to use should have it's current value put on the stack for safe keeping using the PUSH instruction. Then after the function has finished it's logic, these registers can have their original values restored using the POP instruction. This means that any values in the registers will be the same before and after you've called your function. If we take care of this in our subroutine we can call functions without worrying about what changes they're making to our registers.</p>
<p>The <span class="instruction-name">CALL</span> and RET instructions also use the stack. When you <span class="instruction-name">CALL</span> a subroutine, the address you called it from in your program is pushed onto the stack. This address is then popped off the stack by RET and the program jumps back to that place in your code. This is why you should always JMP to labels but you should <span class="instruction-name">CALL</span> functions.</p>
<div class="snippet">
<span class="filename">helloworld-len.asm</span>
<pre class="brush: asm;">
; Hello World Program (Subroutines)
; Compile with: nasm -f elf helloworld-len.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 helloworld-len.o -o helloworld-len
; Run with: ./helloworld-len
SECTION .data
msg db 'Hello, brave new world!', 0Ah
SECTION .text
global _start
_start:
mov eax, msg ; move the address of our message string into EAX
call strlen ; call our function to calculate the length of the string
mov edx, eax ; our function leaves the result in EAX
mov ecx, msg ; this is all the same as before
mov ebx, 1
mov eax, 4
int 80h
mov ebx, 0
mov eax, 1
int 80h
strlen: ; this is our first function declaration
push ebx ; push the value in EBX onto the stack to preserve it while we use EBX in this function
mov ebx, eax ; move the address in EAX into EBX (Both point to the same segment in memory)
nextchar: ; this is the same as lesson3
cmp byte [eax], 0
jz finished
inc eax
jmp nextchar
finished:
sub eax, ebx
pop ebx ; pop the value on the stack back into EBX
ret ; return to where the function was called
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf helloworld-len.asm</span>
<span>~$ ld -m elf_i386 helloworld-len.o -o helloworld-len</span>
<span>~$ ./helloworld-len</span>
<span>Hello, brave new world!</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson5">
<article class="span12">
<header>
<h2>Lesson 5</h2>
<h4>External include files</h4>
</header>
<p>External include files allow us to move code from our program and put it into separate files. This technique is useful for writing clean, easy to maintain programs. Reusable bits of code can be written as subroutines and stored in separate files called libraries. When you need a piece of logic you can include the file in your program and use it as if they are part of the same file.</p>
<p>In this lesson we will move our string length calculating subroutine into an external file. We fill also make our string printing logic and program exit logic a subroutine and we will move them into this external file. Once it's completed our actual program will be clean and easier to read.</p>
<p>We can then declare another message variable and call our print function twice in order to demonstrate how we can reuse code.</p>
<p>
<span class="label label-info">Note:</span>
I won't be showing the code in functions.asm after this lesson unless it changes. It will just be included if needed.
</p>
<div class="snippet">
<span class="filename">functions.asm</span>
<pre class="brush: asm;">
;------------------------------------------
; int slen(String message)
; String length calculation function
slen:
push ebx
mov ebx, eax
nextchar:
cmp byte [eax], 0
jz finished
inc eax
jmp nextchar
finished:
sub eax, ebx
pop ebx
ret
;------------------------------------------
; void sprint(String message)
; String printing function
sprint:
push edx
push ecx
push ebx
push eax
call slen
mov edx, eax
pop eax
mov ecx, eax
mov ebx, 1
mov eax, 4
int 80h
pop ebx
pop ecx
pop edx
ret
;------------------------------------------
; void exit()
; Exit program and restore resources
quit:
mov ebx, 0
mov eax, 1
int 80h
ret
</pre>
</div>
<div class="snippet">
<span class="filename">helloworld-inc.asm</span>
<pre class="brush: asm;">
; Hello World Program (External file include)
; Compile with: nasm -f elf helloworld-inc.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 helloworld-inc.o -o helloworld-inc
; Run with: ./helloworld-inc
%include 'functions.asm' ; include our external file
SECTION .data
msg1 db 'Hello, brave new world!', 0Ah ; our first message string
msg2 db 'This is how we recycle in NASM.', 0Ah ; our second message string
SECTION .text
global _start
_start:
mov eax, msg1 ; move the address of our first message string into EAX
call sprint ; call our string printing function
mov eax, msg2 ; move the address of our second message string into EAX
call sprint ; call our string printing function
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf helloworld-inc.asm</span>
<span>~$ ld -m elf_i386 helloworld-inc.o -o helloworld-inc</span>
<span>~$ ./helloworld-inc</span>
<span>Hello, brave new world!</span>
<span>This is how we recycle in NASM.</span>
<span>This is how we recycle in NASM.</span>
</div>
</div>
</div>
<p>
<span class="label label-important">Error:</span>
Our second message is outputted twice. This is fixed in the next lesson.
</p>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson6">
<article class="span12">
<header>
<h2>Lesson 6</h2>
<h4>NULL terminating bytes</h4>
</header>
<p>Ok so why did our second message print twice when we only called our sprint function on msg2 once? Well actually it did only print once. You can see what I mean if you comment out our second call to sprint. The output will be both of our message strings.</p>
<p>But how is this possible?</p>
<p>What is happening is we weren't properly terminating our strings. In assembly, variables are stored one after another in memory so the last byte of our msg1 variable is right next to the first byte of our msg2 variable. We know our string length calculation is looking for a zero byte so unless our msg2 variable starts with a zero byte it keeps counting as if it's the same string (and as far as assembly is concerned it is the same string). So we need to put a zero byte or 0h after our strings to let assembly know where to stop counting.</p>
<p>
<span class="label label-info">Note:</span>
In programming 0h denotes a null byte and a null byte after a string tells assembly where it ends in memory.
</p>
<div class="snippet">
<span class="filename">helloworld-inc.asm</span>
<pre class="brush: asm;">
; Hello World Program (NULL terminating bytes)
; Compile with: nasm -f elf helloworld-inc.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 helloworld-inc.o -o helloworld-inc
; Run with: ./helloworld-inc
%include 'functions.asm'
SECTION .data
msg1 db 'Hello, brave new world!', 0Ah, 0h ; NOTE the null terminating byte
msg2 db 'This is how we recycle in NASM.', 0Ah, 0h ; NOTE the null terminating byte
SECTION .text
global _start
_start:
mov eax, msg1
call sprint
mov eax, msg2
call sprint
call quit
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf helloworld-inc.asm</span>
<span>~$ ld -m elf_i386 helloworld-inc.o -o helloworld-inc</span>
<span>~$ ./helloworld-inc</span>
<span>Hello, brave new world!</span>
<span>This is how we recycle in NASM.</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson7">
<article class="span12">
<header>
<h2>Lesson 7</h2>
<h4>Linefeeds</h4>
</header>
<p>Linefeeds are essential to console programs like our 'hello world' program. They become even more important once we start building programs that require user input. But linefeeds can be a pain to maintain. Sometimes you will want to include them in your strings and sometimes you will want to remove them. If we continue to hard code them in our variables by adding 0Ah after our declared message text, it will become a problem. If there's a place in the code that we don't want to print out the linefeed for that variable we will need to write some extra logic remove it from the string at runtime.</p>
<p>It would be better for the maintainability of our program if we write a subroutine that will print out our message and then print a linefeed afterwards. That way we can just call this subroutine when we need the linefeed and call our current sprint subroutine when we don't.</p>
<p>A call to sys_write requires we pass a pointer to an address in memory of the string we want to print so we can't just pass a linefeed character (0Ah) to our print function. We also don't want to create another variable just to hold a linefeed character so we will instead use the stack.</p>
<p>The way it works is by moving a linefeed character into EAX. We then push EAX onto the stack and get the address pointed to by the Extended Stack Pointer. ESP is another register. When you push items onto the stack, ESP is decremented to point to the address in memory of the last item and so it can be used to access that item directly from the stack. Since ESP points to an address in memory of a character, sys_write will be able to use it to print.</p>
<p>
<span class="label label-info">Note:</span>
I've highlighted the new code in functions.asm below.
</p>
<div class="snippet">
<span class="filename">functions.asm</span>
<pre class="brush: asm; highlight: [44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57]">
;------------------------------------------
; int slen(String message)
; String length calculation function
slen:
push ebx
mov ebx, eax
nextchar:
cmp byte [eax], 0
jz finished
inc eax
jmp nextchar
finished:
sub eax, ebx
pop ebx
ret
;------------------------------------------
; void sprint(String message)
; String printing function
sprint:
push edx
push ecx
push ebx
push eax
call slen
mov edx, eax
pop eax
mov ecx, eax
mov ebx, 1
mov eax, 4
int 80h
pop ebx
pop ecx
pop edx
ret
;------------------------------------------
; void sprintLF(String message)
; String printing with line feed function
sprintLF:
call sprint
push eax ; push eax onto the stack to preserve it while we use the eax register in this function
mov eax, 0Ah ; move 0Ah into eax - 0Ah is the ascii character for a linefeed
push eax ; push the linefeed onto the stack so we can get the address
mov eax, esp ; move the address of the current stack pointer into eax for sprint
call sprint ; call our sprint function
pop eax ; remove our linefeed character from the stack
pop eax ; restore the original value of eax before our function was called
ret ; return to our program
;------------------------------------------
; void exit()
; Exit program and restore resources
quit:
mov ebx, 0
mov eax, 1
int 80h
ret
</pre>
</div>
<div class="snippet">
<span class="filename">helloworld-lf.asm</span>
<pre class="brush: asm;">
; Hello World Program (Print with line feed)
; Compile with: nasm -f elf helloworld-lf.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 helloworld-lf.o -o helloworld-lf
; Run with: ./helloworld-lf
%include 'functions.asm'
SECTION .data
msg1 db 'Hello, brave new world!', 0h ; NOTE we have removed the line feed character 0Ah
msg2 db 'This is how we recycle in NASM.', 0h ; NOTE we have removed the line feed character 0Ah
SECTION .text
global _start
_start:
mov eax, msg1
call sprintLF ; NOTE we are calling our new print with linefeed function
mov eax, msg2
call sprintLF ; NOTE we are calling our new print with linefeed function
call quit
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf helloworld-lf.asm</span>
<span>~$ ld -m elf_i386 helloworld-lf.o -o helloworld-lf</span>
<span>~$ ./helloworld-lf</span>
<span>Hello, brave new world!</span>
<span>This is how we recycle in NASM.</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson8">
<article class="span12">
<header>
<h2>Lesson 8</h2>
<h4>Passing arguments</h4>
</header>
<p>Passing arguments to your program from the command line is as easy as popping them off the stack in NASM. When we run our program, any passed arguments are loaded onto the stack in reverse order. The name of the program is then loaded onto the stack and lastly the total number of arguments is loaded onto the stack. The last two stack items for a NASM compiled program are always the name of the program and the number of passed arguments.</p>
<p>So all we have to do to use them is pop the number of arguments off the stack first, then iterate once for each argument and perform our logic. In our program that means calling our print function.</p>
<p>
<span class="label label-info">Note:</span>
We are using the ECX register as our counter for the loop. Although it's a general-purpose register it's original intention was to be used as a counter.
</p>
<div class="snippet">
<span class="filename">helloworld-args.asm</span>
<pre class="brush: asm;">
; Hello World Program (Passing arguments from the command line)
; Compile with: nasm -f elf helloworld-args.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 helloworld-args.o -o helloworld-args
; Run with: ./helloworld-args
%include 'functions.asm'
SECTION .text
global _start
_start:
pop ecx ; first value on the stack is the number of arguments
nextArg:
cmp ecx, 0h ; check to see if we have any arguments left
jz noMoreArgs ; if zero flag is set jump to noMoreArgs label (jumping over the end of the loop)
pop eax ; pop the next argument off the stack
call sprintLF ; call our print with linefeed function
dec ecx ; decrease ecx (number of arguments left) by 1
jmp nextArg ; jump to nextArg label
noMoreArgs:
call quit
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf helloworld-args.asm</span>
<span>~$ ld -m elf_i386 helloworld-args.o -o helloworld-args</span>
<span>~$ ./helloworld-args "This is one argument" "This is another" 101</span>
<span>./helloworld-args</span>
<span>This is one argument</span>
<span>This is another</span>
<span>101</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson9">
<article class="span12">
<header>
<h2>Lesson 9</h2>
<h4>User input</h4>
</header>
<h5>Introduction to the .bss section</h5>
<p>So far we've used the .text and .data section so now it's time to introduce the .bss section. BSS stands for Block Started by Symbol. It is an area in our program that is used to reserve space in memory for uninitialised variables. We will use it to reserve some space in memory to hold our user input since we don't know how many bytes we'll need to store.</p>
<p>The syntax to declare variables is as follows:</p>
<div class="snippet">
<span class="filename">.bss section example</span>
<pre class="brush: asm;">
SECTION .bss
variableName1: RESB 1 ; reserve space for 1 byte
variableName2: RESW 1 ; reserve space for 1 word
variableName3: RESD 1 ; reserve space for 1 double word
variableName4: RESQ 1 ; reserve space for 1 double precision float (quad word)
variableName5: REST 1 ; reserve space for 1 extended precision float
</pre>
</div>
<h5>Writing our program</h5>
<p>We will be using the system call sys_read to receive and process input from the user. This function is assigned OPCODE 3 in the Linux System Call Table. Just like sys_write this function also takes 3 arguments which will be loaded into EDX, ECX and EBX before requesting a software interrupt that will call the function.</p>
<p>The arguments passed are as follows:
<ul>
<li> EDX will be loaded with the maximum length (in bytes) of the space in memory.</li>
<li> ECX will be loaded with the address of our variable created in the .bss section.</li>
<li> EBX will be loaded with the file we want to read from – in this case STDIN.</li>
</ul>
As always the datatype and meaning of the arguments passed can be found in the function's definition.
</p>
<p>When sys_read detects a linefeed, control returns to the program and the users input is located at the memory address you passed in ECX.</p>
<div class="snippet">
<span class="filename">helloworld-input.asm</span>
<pre class="brush: asm;">
; Hello World Program (Getting input)
; Compile with: nasm -f elf helloworld-input.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 helloworld-input.o -o helloworld-input
; Run with: ./helloworld-input
%include 'functions.asm'
SECTION .data
msg1 db 'Please enter your name: ', 0h ; message string asking user for input
msg2 db 'Hello, ', 0h ; message string to use after user has entered their name
SECTION .bss
sinput: resb 255 ; reserve a 255 byte space in memory for the users input string
SECTION .text
global _start
_start:
mov eax, msg1
call sprint
mov edx, 255 ; number of bytes to read
mov ecx, sinput ; reserved space to store our input (known as a buffer)
mov ebx, 0 ; read from the STDIN file
mov eax, 3 ; invoke SYS_READ (kernel opcode 3)
int 80h
mov eax, msg2
call sprint
mov eax, sinput ; move our buffer into eax (Note: input contains a linefeed)
call sprint ; call our print function
call quit
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf helloworld-input.asm</span>
<span>~$ ld -m elf_i386 helloworld-input.o -o helloworld-input</span>
<span>~$ ./helloworld-input</span>
<span>Please enter your name: Daniel Givney</span>
<span>Hello, Daniel Givney</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson10">
<article class="span12">
<header>
<h2>Lesson 10</h2>
<h4>Count to 10</h4>
</header>
<h5>Firstly, some background</h5>
<p>Counting by numbers is not as straight forward as you would think in assembly. Firstly we need to pass sys_write an address in memory so we can't just load our register with a number and call our print function. Secondly, numbers and strings are very different things in assembly. Strings are represented by what are called ASCII values. ASCII stands for <strong>American Standard Code for Information Interchange</strong>. A good reference for ASCII <a href="http://www.asciitable.com/">can be found here</a>. ASCII was created as a way to standardise the representation of strings across all computers.</p>
<p>Remember, we can't print a number - we have to print a string. In order to count to 10 we will need to convert our numbers from standard integers to their ASCII string representations. Have a look at the ASCII values table and notice that the string representation for the number '1' is actually '49' in ASCII. In fact, adding 48 to our numbers is all we have to do to convert them from integers to their ASCII string representations.</p>
<h5>Writing our program</h5>
<p>What we will do with our program is count from 1 to 10 using the ECX register. We will then add 48 to our counter to convert it from a number to it's ASCII string representation. We will then push this value to the stack and call our print function passing ESP as the memory address to print from. Once we have finished counting to 10 we will exit our counting loop and call our quit function.</p>
<div class="snippet">
<span class="filename">helloworld-10.asm</span>
<pre class="brush: asm;">
; Hello World Program (Count to 10)
; Compile with: nasm -f elf helloworld-10.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 helloworld-10.o -o helloworld-10
; Run with: ./helloworld-10
%include 'functions.asm'
SECTION .text
global _start
_start:
mov ecx, 0 ; ecx is initalised to zero.
nextNumber:
inc ecx ; increment ecx
mov eax, ecx ; move the address of our integer into eax
add eax, 48 ; add 48 to our number to convert from integer to ascii for printing
push eax ; push eax to the stack
mov eax, esp ; get the address of the character on the stack
call sprintLF ; call our print function
pop eax ; clean up the stack so we don't have unneeded bytes taking up space
cmp ecx, 10 ; have we reached 10 yet? compare our counter with decimal 10
jne nextNumber ; jump if not equal and keep counting
call quit
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf helloworld-10.asm</span>
<span>~$ ld -m elf_i386 helloworld-10.o -o helloworld-10</span>
<span>~$ ./helloworld-10</span>
<span>1</span>
<span>2</span>
<span>3</span>
<span>4</span>
<span>5</span>
<span>6</span>
<span>7</span>
<span>8</span>
<span>9</span>
<span>:</span>
</div>
</div>
</div>
<p>
<span class="label label-important">Error:</span>
Our number 10 prints a colon (:) character instead. What's going on?
</p>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson11">
<article class="span12">
<header>
<h2>Lesson 11</h2>
<h4>Count to 10 (itoa)</h4>
</header>
<p>So why did our program in Lesson 10 print out a colon character instead of the number 10?. Well lets have a look at our ASCII table. We can see that the colon character has a ASCII value of 58. We were adding 48 to our integers to convert them to their ASCII string representations so instead of passing sys_write the value '58' to print ten we actually need to pass the ASCII value for the number 1 followed by the ASCII value for the number 0. Passing sys_write '4948' is the correct string representation for the number '10'. So we can't just simply add 48 to our numbers to convert them, we first have to divide them by 10 because each place value needs to be converted individually.</p>
<p>We will write 2 new subroutines in this lesson 'iprint' and 'iprintLF'. These functions will be used when we want to print ASCII string representations of numbers. We achieve this by passing the number in EAX. We then initialise a counter in ECX. We will repeatedly divide the number by 10 and each time convert the remainder to a string by adding 48. We will then push this onto the stack for later use. Once we can no longer divide the number by 10 we will enter our second loop. In this print loop we will print the now converted string representations from the stack and pop them off. Popping them off the stack moves ESP forward to the next item on the stack. Each time we print a value we will decrease our counter ECX. Once all numbers have been converted and printed we will return to our program.</p>
<h5>How does the divide instruction work?</h5>
<p>The DIV and IDIV instructions work by dividing whatever is in EAX by the value passed to the instruction. The quotient part of the value is left in EAX and the remainder part is put into EDX (Originally called the data register).
<p>For example.</p>
<div class="snippet">
<span class="filename">IDIV instruction example</span>
<pre class="brush: asm;">
mov eax, 10 ; move 10 into eax
mov esi, 10 ; move 10 into esi
idiv esi ; divide eax by esi (eax will equal 1 and edx will equal 0)
idiv esi ; divide eax by esi again (eax will equal 0 and edx will equal 1)
</pre>
</div>
<h5>If we are only storing the remainder won't we have problems?</h5>
<p>No, because these are integers, when you divide a number by an even bigger number the quotient in EAX is 0 and the remainder is the number itself. This is because the number divides zero times leaving the original value as the remainder in EDX. How good is that?</p>
<p>
<span class="label label-info">Note:</span>
Only the new functions iprint and iprintLF have comments.
</p>
<div class="snippet">
<span class="filename">functions.asm</span>
<pre class="brush: asm;">
;------------------------------------------
; void iprint(Integer number)
; Integer printing function (itoa)
iprint:
push eax ; preserve eax on the stack to be restored after function runs
push ecx ; preserve ecx on the stack to be restored after function runs
push edx ; preserve edx on the stack to be restored after function runs
push esi ; preserve esi on the stack to be restored after function runs
mov ecx, 0 ; counter of how many bytes we need to print in the end
divideLoop:
inc ecx ; count each byte to print - number of characters
mov edx, 0 ; empty edx
mov esi, 10 ; mov 10 into esi
idiv esi ; divide eax by esi
add edx, 48 ; convert edx to it's ascii representation - edx holds the remainder after a divide instruction
push edx ; push edx (string representation of an intger) onto the stack
cmp eax, 0 ; can the integer be divided anymore?
jnz divideLoop ; jump if not zero to the label divideLoop
printLoop:
dec ecx ; count down each byte that we put on the stack
mov eax, esp ; mov the stack pointer into eax for printing
call sprint ; call our string print function
pop eax ; remove last character from the stack to move esp forward
cmp ecx, 0 ; have we printed all bytes we pushed onto the stack?
jnz printLoop ; jump is not zero to the label printLoop
pop esi ; restore esi from the value we pushed onto the stack at the start
pop edx ; restore edx from the value we pushed onto the stack at the start
pop ecx ; restore ecx from the value we pushed onto the stack at the start
pop eax ; restore eax from the value we pushed onto the stack at the start
ret
;------------------------------------------
; void iprintLF(Integer number)
; Integer printing function with linefeed (itoa)
iprintLF:
call iprint ; call our integer printing function
push eax ; push eax onto the stack to preserve it while we use the eax register in this function
mov eax, 0Ah ; move 0Ah into eax - 0Ah is the ascii character for a linefeed
push eax ; push the linefeed onto the stack so we can get the address
mov eax, esp ; move the address of the current stack pointer into eax for sprint
call sprint ; call our sprint function
pop eax ; remove our linefeed character from the stack
pop eax ; restore the original value of eax before our function was called
ret
;------------------------------------------
; int slen(String message)
; String length calculation function
slen:
push ebx
mov ebx, eax
nextchar:
cmp byte [eax], 0
jz finished
inc eax
jmp nextchar
finished:
sub eax, ebx
pop ebx
ret
;------------------------------------------
; void sprint(String message)
; String printing function
sprint:
push edx
push ecx
push ebx
push eax
call slen
mov edx, eax
pop eax
mov ecx, eax
mov ebx, 1
mov eax, 4
int 80h
pop ebx
pop ecx
pop edx
ret
;------------------------------------------
; void sprintLF(String message)
; String printing with line feed function
sprintLF:
call sprint
push eax
mov eax, 0AH
push eax
mov eax, esp
call sprint
pop eax
pop eax
ret
;------------------------------------------
; void exit()
; Exit program and restore resources
quit:
mov ebx, 0
mov eax, 1
int 80h
ret
</pre>
</div>
<div class="snippet">
<span class="filename">helloworld-itoa.asm</span>
<pre class="brush: asm;">
; Hello World Program (Count to 10 itoa)
; Compile with: nasm -f elf helloworld-itoa.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 helloworld-itoa.o -o helloworld-itoa
; Run with: ./helloworld-itoa
%include 'functions.asm'
SECTION .text
global _start
_start:
mov ecx, 0
nextNumber:
inc ecx
mov eax, ecx
call iprintLF ; NOTE call our new integer printing function (itoa)
cmp ecx, 10
jne nextNumber
call quit
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf helloworld-itoa.asm</span>
<span>~$ ld -m elf_i386 helloworld-itoa.o -o helloworld-itoa</span>
<span>~$ ./helloworld-itoa</span>
<span>1</span>
<span>2</span>
<span>3</span>
<span>4</span>
<span>5</span>
<span>6</span>
<span>7</span>
<span>8</span>
<span>9</span>
<span>10</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson12">
<article class="span12">
<header>
<h2>Lesson 12</h2>
<h4>Calculator - addition</h4>
</header>
<p>In this program we will be adding the registers EAX and EBX together and we'll leave our answer in EAX. Firstly we use the MOV instruction to load EAX with an integer (in this case 90). We then MOV an integer into EBX (in this case 9). Now all we need to do is use the ADD instruction to perform our addition. EBX & EAX will be added together leaving our answer in the left most register in this instruction (in our case EAX). Then all we need to do is call our integer printing function to complete the program.</p>
<div class="snippet">
<span class="filename">calculator-addition.asm</span>
<pre class="brush: asm;">
; Calculator (Addition)
; Compile with: nasm -f elf calculator-addition.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 calculator-addition.o -o calculator-addition
; Run with: ./calculator-addition
%include 'functions.asm'
SECTION .text
global _start
_start:
mov eax, 90 ; move our first number into eax
mov ebx, 9 ; move our second number into ebx
add eax, ebx ; add ebx to eax
call iprintLF ; call our integer print with linefeed function
call quit
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf calculator-addition.asm</span>
<span>~$ ld -m elf_i386 calculator-addition.o -o calculator-addition</span>
<span>~$ ./calculator-addition</span>
<span>99</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson13">
<article class="span12">
<header>
<h2>Lesson 13</h2>
<h4>Calculator - subtraction</h4>
</header>
<p>In this program we will be subtracting the value in the register EBX from the value in the register EAX. Firstly we load EAX and EBX with integers in the same way as Lesson 12. The only difference is we will be using the SUB instruction to perform our subtraction logic, leaving our answer in the left most register of this instruction (in our case EAX). Then all we need to do is call our integer printing function to complete the program.</p>
<div class="snippet">
<span class="filename">calculator-subtraction.asm</span>
<pre class="brush: asm;">
; Calculator (Subtraction)
; Compile with: nasm -f elf calculator-subtraction.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 calculator-subtraction.o -o calculator-subtraction
; Run with: ./calculator-subtraction
%include 'functions.asm'
SECTION .text
global _start
_start:
mov eax, 90 ; move our first number into eax
mov ebx, 9 ; move our second number into ebx
sub eax, ebx ; subtract ebx from eax
call iprintLF ; call our integer print with linefeed function
call quit
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf calculator-subtraction.asm</span>
<span>~$ ld -m elf_i386 calculator-subtraction.o -o calculator-subtraction</span>
<span>~$ ./calculator-subtraction</span>
<span>81</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson14">
<article class="span12">
<header>
<h2>Lesson 14</h2>
<h4>Calculator - multiplication</h4>
</header>
<p>In this program we will be multiplying the value in EBX by the value present in EAX. Firstly we load EAX and EBX with integers in the same way as Lesson 12. This time though we will be calling the MUL instruction to perform our multiplication logic. The MUL instruction is different from many instructions in NASM, in that it only accepts one further argument. The MUL instruction always multiples EAX by whatever value is passed after it. The answer is left in EAX.</p>
<div class="snippet">
<span class="filename">calculator-multiplication.asm</span>
<pre class="brush: asm;">
; Calculator (Multiplication)
; Compile with: nasm -f elf calculator-multiplication.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 calculator-multiplication.o -o calculator-multiplication
; Run with: ./calculator-multiplication
%include 'functions.asm'
SECTION .text
global _start
_start:
mov eax, 90 ; move our first number into eax
mov ebx, 9 ; move our second number into ebx
mul ebx ; multiply eax by ebx
call iprintLF ; call our integer print with linefeed function
call quit
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf calculator-multiplication.asm</span>
<span>~$ ld -m elf_i386 calculator-multiplication.o -o calculator-multiplication</span>
<span>~$ ./calculator-multiplication</span>
<span>810</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson15">
<article class="span12">
<header>
<h2>Lesson 15</h2>
<h4>Calculator - division</h4>
</header>
<p>In this program we will be dividing the value in EBX by the value present in EAX. We've used division before in our integer print subroutine. Our program requires a few extra strings in order to print out the correct answer but otherwise there's nothing complicated going on.</p>
<p>Firstly we load EAX and EBX with integers in the same way as Lesson 12. Division logic is performed using the DIV instruction. The DIV instruction always divides EAX by the value passed after it. It will leave the quotient part of the answer in EAX and put the remainder part in EDX (the original data register). We then MOV and call our strings and integers to print out the correct answer.</p>
<div class="snippet">
<span class="filename">calculator-division.asm</span>
<pre class="brush: asm;">
; Calculator (Division)
; Compile with: nasm -f elf calculator-division.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 calculator-division.o -o calculator-division
; Run with: ./calculator-division
%include 'functions.asm'
SECTION .data
msg1 db ' remainder ' ; a message string to correctly output result
SECTION .text
global _start
_start:
mov eax, 90 ; move our first number into eax
mov ebx, 9 ; move our second number into ebx
div ebx ; divide eax by ebx
call iprint ; call our integer print function on the quotient
mov eax, msg1 ; move our message string into eax
call sprint ; call our string print function
mov eax, edx ; move our remainder into eax
call iprintLF ; call our integer printing with linefeed function
call quit
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf calculator-division.asm</span>
<span>~$ ld -m elf_i386 calculator-division.o -o calculator-division</span>
<span>~$ ./calculator-division</span>
<span>10 remainder 0</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson16">
<article class="span12">
<header>
<h2>Lesson 16</h2>
<h4>Calculator (atoi)</h4>
</header>
<p>Our program will take several command line arguments and add them together printing out the result in the terminal.</p>
<h5>Writing our program</h5>
<p>Our program begins by using the POP instruction to get the number of passed arguments off the stack. This value is stored in ECX (originally known as the counter register). It will then POP the next value off the stack containing the program name and remove it from the number of arguments stored in ECX. It will then loop through the rest of the arguments popping each one off the stack and performing our addition logic. As we know, arguments passed via the command line are received by our program as strings. Before we can add the arguments together we will need to convert them to integers otherwise our result will not be correct. We do this by calling our Ascii to Integer function (atoi). This function will convert the ascii value into an integer and place the result in EAX. We can then add this value to EDX (originally known as the data register) where we will store the result of our additions. If the value passed to atoi is not an ascii representation of an integer our function will return zero instead. When all arguments have been converted and added together we will print out the result and call our quit function.</p>
<h5>How does the atoi function work</h5>
<p>Converting an ascii string into an integer value is not a trivial task. We know how to convert an integer to an ascii string so the process should essentially work in reverse. Firstly we take the address of the string and move it into ESI (originally known as the source register). We will then move along the string byte by byte (think of each byte as being a single digit or decimal placeholder). For each digit we will check if it's value is between 48-57 (ascii values for the digits 0-9).</p>
<p>Once we have performed this check and determined that the byte can be converted to an integer we will perform the following logic. We will subtract 48 from the value – converting the ascii value to it's decimal equivalent. We will then add this value to EAX (the general purpose register that will store our result). We will then multiple EAX by 10 as each byte represents a decimal placeholder and continue the loop.</p>
<p>When all bytes have been converted we need to do one last thing before we return the result. The last digit of any number represents a single unit (not a multiple of 10) so we have multiplied our result one too many times. We simple divide it by 10 once to correct this and then return. If no integer arguments were pass however, we skip this divide instruction.</p>
<h5>What is the BL register</h5>
<p>You may have noticed that the atoi function references the BL register. So far in these tutorials we have been exclusively using 32bit registers. These 32bit general purpose registers contain segments of memory that can also be referenced. These segments are available in 16bits and 8bits. We wanted a single byte (8bits) because a byte is the size of memory that is required to store a single ascii character. If we used a larger memory size we would have copied 8bits of data into 32bits of space leaving us with 'rubbish' bits - because only the first 8bits would be meaningful for our calculation.</p>
<p>The EBX register is 32bits. EBX's 16 bit segment is referenced as BX. BX contains the 8bit segments BL and BH (Lower and Higher bits). We wanted the first 8bits (lower bits) of EBX and so we referenced that storage area using BL.</p>
<p>Almost every assembly language tutorial begins with a history of the registers, their names and their sizes. These tutorials however were written to provide a foundation in NASM by first writing code and then understanding the theory. The full story about the size of registers, their history and importance are beyond the scope of this tutorial but we will return to that story in later tutorials.</p>
<p>
<span class="label label-info">Note:</span>
Only the new function in this file 'atoi' is shown below.
</p>
<div class="snippet">
<span class="filename">functions.asm</span>
<pre class="brush: asm;">
;------------------------------------------
; int atoi(Integer number)
; Ascii to integer function (atoi)
atoi:
push ebx ; preserve ebx on the stack to be restored after function runs
push ecx ; preserve ecx on the stack to be restored after function runs
push edx ; preserve edx on the stack to be restored after function runs
push esi ; preserve esi on the stack to be restored after function runs
mov esi, eax ; move pointer in eax into esi (our number to convert)
mov eax, 0 ; initialise eax with decimal value 0
mov ecx, 0 ; initialise ecx with decimal value 0
.multiplyLoop:
xor ebx, ebx ; resets both lower and uppper bytes of ebx to be 0
mov bl, [esi+ecx] ; move a single byte into ebx register's lower half
cmp bl, 48 ; compare ebx register's lower half value against ascii value 48 (char value 0)
jl .finished ; jump if less than to label finished
cmp bl, 57 ; compare ebx register's lower half value against ascii value 57 (char value 9)
jg .finished ; jump if greater than to label finished
sub bl, 48 ; convert ebx register's lower half to decimal representation of ascii value
add eax, ebx ; add ebx to our integer value in eax
mov ebx, 10 ; move decimal value 10 into ebx
mul ebx ; multiply eax by ebx to get place value
inc ecx ; increment ecx (our counter register)
jmp .multiplyLoop ; continue multiply loop
.finished:
cmp ecx, 0 ; compare ecx register's value against decimal 0 (our counter register)
je .restore ; jump if equal to 0 (no integer arguments were passed to atoi)
mov ebx, 10 ; move decimal value 10 into ebx
div ebx ; divide eax by value in ebx (in this case 10)
.restore:
pop esi ; restore esi from the value we pushed onto the stack at the start
pop edx ; restore edx from the value we pushed onto the stack at the start
pop ecx ; restore ecx from the value we pushed onto the stack at the start
pop ebx ; restore ebx from the value we pushed onto the stack at the start
ret
</pre>
</div>
<div class="snippet">
<span class="filename">calculator-atoi.asm</span>
<pre class="brush: asm;">
; Calculator (ATOI)
; Compile with: nasm -f elf calculator-atoi.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 calculator-atoi.o -o calculator-atoi
; Run with: ./calculator-atoi 20 1000 317
%include 'functions.asm'
SECTION .text
global _start
_start:
pop ecx ; first value on the stack is the number of arguments
pop edx ; second value on the stack is the program name (discarded when we initialise edx)
sub ecx, 1 ; decrease ecx by 1 (number of arguments without program name)
mov edx, 0 ; initialise our data register to store additions
nextArg:
cmp ecx, 0h ; check to see if we have any arguments left
jz noMoreArgs ; if zero flag is set jump to noMoreArgs label (jumping over the end of the loop)
pop eax ; pop the next argument off the stack
call atoi ; convert our ascii string to decimal integer
add edx, eax ; perform our addition logic
dec ecx ; decrease ecx (number of arguments left) by 1
jmp nextArg ; jump to nextArg label
noMoreArgs:
mov eax, edx ; move our data result into eax for printing
call iprintLF ; call our integer printing with linefeed function
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf calculator-atoi.asm</span>
<span>~$ ld -m elf_i386 calculator-atoi.o -o calculator-atoi</span>
<span>~$ ./calculator-atoi 20 1000 317</span>
<span>1337</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson17">
<article class="span12">
<header>
<h2>Lesson 17</h2>
<h4>Namespace</h4>
</header>
<p>Namespace is a necessary construct in any software project that involves a codebase that is larger than a few simple functions. Namespace provides scope to your identifiers and allows you to reuse naming conventions to make your code more readable and maintainable. In assembly language where subroutines are identified by global labels, namespace can be achieved by using local labels.</p>
<p>Up until the last few tutorials we have been using global labels exclusively. This means that blocks of logic that essentially perform the same task needed a label with a unique identifier. A good example would be our "finished" labels. These were global in scope meaning when we needed to break out of a loop in one function we could jump to a "finished" label. But if we needed to break out of a loop in a different function we would need to name this same task something else maybe calling it "done" or "continue". Being able to reuse the label "finished" would mean that someone reading the code would know that these blocks of logic perform almost the same task.</p>
<p>Local labels are prepended with a "." at the beginning of their name for example ".finished". You may have noticed them appearing as our code base in functions.asm grew. A local label is given the namespace of the first global label above it. You can jump to a local label by using the JMP instruction and the compiler will calculate which local label you are referencing by determining in what scope (based on the above global labels) the instruction was called.</p>
<p>
<span class="label label-info">Note:</span>
The file <a href="https://github.com/DGivney/assemblytutorials/blob/master/code/lesson17/functions.asm" target="_blank">functions.asm</a> was modified adding namespaces in all the subroutines. This is particularly important in the "slen" subroutine which contains a "finished" global label.
</p>
<div class="snippet">
<span class="filename">namespace.asm</span>
<pre class="brush: asm;">
; Namespace
; Compile with: nasm -f elf namespace.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 namespace.o -o namespace
; Run with: ./namespace
%include 'functions.asm'
SECTION .data
msg1 db 'Jumping to finished label.', 0h ; a message string
msg2 db 'Inside subroutine number: ', 0h ; a message string
msg3 db 'Inside subroutine "finished".', 0h ; a message string
SECTION .text
global _start
_start:
subrountineOne:
mov eax, msg1 ; move the address of msg1 into eax
call sprintLF ; call our string printing with linefeed function
jmp .finished ; jump to the local label under the subrountineOne scope
.finished:
mov eax, msg2 ; move the address of msg2 into eax
call sprint ; call our string printing function
mov eax, 1 ; move the value one into eax (for subroutine number one)
call iprintLF ; call our integer printing function with linefeed function
subrountineTwo:
mov eax, msg1 ; move the address of msg1 into eax
call sprintLF ; call our string print with linefeed function
jmp .finished ; jump to the local label under the subrountineTwo scope
.finished:
mov eax, msg2 ; move the address of msg2 into eax
call sprint ; call our string printing function
mov eax, 2 ; move the value two into eax (for subroutine number two)
call iprintLF ; call our integer printing function with linefeed function
mov eax, msg1 ; move the address of msg1 into eax
call sprintLF ; call our string printing with linefeed function
jmp finished ; jump to the global label finished
finished:
mov eax, msg3 ; move the address of msg3 into eax
call sprintLF ; call our string printing with linefeed function
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf namespace.asm</span>
<span>~$ ld -m elf_i386 namespace.o -o namespace</span>
<span>~$ ./namespace</span>
<span>Jumping to finished label.</span>
<span>Inside subroutine number: 1</span>
<span>Jumping to finished label.</span>
<span>Inside subroutine number: 2</span>
<span>Jumping to finished label.</span>
<span>Inside subroutine "finished".</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson18">
<article class="span12">
<header>
<h2>Lesson 18</h2>
<h4>Fizz Buzz</h4>
</header>
<h5>Firstly, some background</h5>
<p>FizzBuzz is group word game played in schools to teach children division. Players take turns to count aloud integers from 1 to 100 replacing any number divisible by 3 with the word "fizz" and any number divisible by 5 with the word "buzz". Numbers that are both divisible by 3 and 5 are replaced by the word "fizzbuzz". This children's game has also become a defacto interview screening question for computer programming jobs as it's thought to easily discover candidates that can't construct a simple logic gate.</p>
<h5>Writing our program</h5>
<p>There are a number of code solutions to this simple game and some languages offer very trivial and elegant solutions. Depending on how you choose to solve it, the solution almost always involves an if statement and possibly an else statement depending whether you choose to exploit the mathematical property that anything divisible by 5 & 3 would also be divisible by 3 * 5. Being that this is an assembly language tutorial we will provide a solution that involves a structure of two cascading if statements to print the words "fizz" and/or "buzz" and an else statement in case these fail, to print the integer as an ascii value. Each iteration of our loop will then print a line feed. Once we reach 100 we call our program exit function.</p>
<div class="snippet">
<span class="filename">fizzbuzz.asm</span>
<pre class="brush: asm;">
; Fizzbuzz
; Compile with: nasm -f elf fizzbuzz.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 fizzbuzz.o -o fizzbuzz
; Run with: ./fizzbuzz
%include 'functions.asm'
SECTION .data
fizz db 'Fizz', 0h ; a message string
buzz db 'Buzz', 0h ; a message string
SECTION .text
global _start
_start:
mov esi, 0 ; initialise our checkFizz boolean variable
mov edi, 0 ; initialise our checkBuzz boolean variable
mov ecx, 0 ; initialise our counter variable
nextNumber:
inc ecx ; increment our counter variable
.checkFizz:
mov edx, 0 ; clear the edx register - this will hold our remainder after division
mov eax, ecx ; move the value of our counter into eax for division
mov ebx, 3 ; move our number to divide by into ebx (in this case the value is 3)
div ebx ; divide eax by ebx
mov edi, edx ; move our remainder into edi (our checkFizz boolean variable)
cmp edi, 0 ; compare if the remainder is zero (meaning the counter divides by 3)
jne .checkBuzz ; if the remainder is not equal to zero jump to local label checkBuzz
mov eax, fizz ; else move the address of our fizz string into eax for printing
call sprint ; call our string printing function
.checkBuzz:
mov edx, 0 ; clear the edx register - this will hold our remainder after division
mov eax, ecx ; move the value of our counter into eax for division
mov ebx, 5 ; move our number to divide by into ebx (in this case the value is 5)
div ebx ; divide eax by ebx
mov esi, edx ; move our remainder into edi (our checkBuzz boolean variable)
cmp esi, 0 ; compare if the remainder is zero (meaning the counter divides by 5)
jne .checkInt ; if the remainder is not equal to zero jump to local label checkInt
mov eax, buzz ; else move the address of our buzz string into eax for printing
call sprint ; call our string printing function
.checkInt:
cmp edi, 0 ; edi contains the remainder after the division in checkFizz
je .continue ; if equal (counter divides by 3) skip printing the integer
cmp esi, 0 ; esi contains the remainder after the division in checkBuzz
je .continue ; if equal (counter divides by 5) skip printing the integer
mov eax, ecx ; else move the value in ecx (our counter) into eax for printing
call iprint ; call our integer printing function
.continue:
mov eax, 0Ah ; move an ascii linefeed character into eax
push eax ; push the address of eax onto the stack for printing
mov eax, esp ; get the stack pointer (address on the stack of our linefeed char)
call sprint ; call our string printing function to print a line feed
pop eax ; pop the stack so we don't waste resources
cmp ecx, 100 ; compare if our counter is equal to 100
jne nextNumber ; if not equal jump to the start of the loop
call quit ; else call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf fizzbuzz.asm</span>
<span>~$ ld -m elf_i386 fizzbuzz.o -o fizzbuzz</span>
<span>~$ ./fizzbuzz</span>
<span>1</span>
<span>2</span>
<span>Fizz</span>
<span>4</span>
<span>Buzz</span>
<span>Fizz</span>
<span>7</span>
<span>8</span>
<span>Fizz</span>
<span>Buzz</span>
<span>11</span>
<span>Fizz</span>
<span>13</span>
<span>14</span>
<span>FizzBuzz</span>
<span>16</span>
<span>...</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson19">
<article class="span12">
<header>
<h2>Lesson 19</h2>
<h4>Execute Command</h4>
</header>
<h5>Firstly, some background</h5>
<p>The EXEC family of functions replace the currently running process with a new process, that executes the command you specified when calling it. We will be using the SYS_EXECVE function in this lesson to replace our program's running process with a new process that will execute the linux program /bin/echo to print out “Hello World!”.</p>
<h5>Naming convention</h5>
<p>The naming convention used for this family of functions is <strong>exec</strong> (execute) followed by one or more of the following letters.</p>
<ul>
<li><span class="instruction-name">e</span> - An array of pointers to environment variables is explicitly passed to the new process image.</li>
<li><span class="instruction-name">l</span> - Command-line arguments are passed individually to the function.</li>
<li><span class="instruction-name">p</span> - Uses the PATH environment variable to find the file named in the path argument to be executed.</li>
<li><span class="instruction-name">v</span> - Command-line arguments are passed to the function as an array of pointers.</li>
</ul>
<h5>Writing our program</h5>
<p>The <span class="instruction-name">V</span> & <span class="instruction-name">E</span> at the end of our function name means we will need to pass our arguments in the following format: The first argument is a string containing the command to execute, then an array of arguments to pass to that command and then another array of environment variables that the new process will use. As we are calling a simple command we won't pass any special environment variables to the new process and instead will pass 0h (null).</p>
<p>Both the command arguments and the environment arguments need to be passed as an array of pointers (addresses to memory). That's why we define our strings first and then define a simple null-terminated struct (array) of the variables names. This is then passed to SYS_EXECVE. We call the function and the process is replaced by our command and output is returned to the terminal.</p>
<div class="snippet">
<span class="filename">execute.asm</span>
<pre class="brush: asm;">
; Execute
; Compile with: nasm -f elf execute.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 execute.o -o execute
; Run with: ./execute
%include 'functions.asm'
SECTION .data
command db '/bin/echo', 0h ; command to execute
arg1 db 'Hello World!', 0h
arguments dd command
dd arg1 ; arguments to pass to commandline (in this case just one)
dd 0h ; end the struct
environment dd 0h ; arguments to pass as environment variables (inthis case none) end the struct
SECTION .text
global _start
_start:
mov edx, environment ; address of environment variables
mov ecx, arguments ; address of the arguments to pass to the commandline
mov ebx, command ; address of the file to execute
mov eax, 11 ; invoke SYS_EXECVE (kernel opcode 11)
int 80h
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf execute.asm</span>
<span>~$ ld -m elf_i386 execute.o -o execute</span>
<span>~$ ./execute</span>
<span>Hello World!</span>
</div>
</div>
</div>
<p>
<span class="label label-info">Note:</span>
Here are a couple other commands to try.
</p>
<div class="snippet">
<span class="filename">execute.asm</span>
<pre class="brush: asm; first-line: 8;">
SECTION .data
command db '/bin/ls', 0h ; command to execute
arg1 db '-l', 0h
</pre>
</div>
<div class="snippet">
<span class="filename">execute.asm</span>
<pre class="brush: asm; first-line: 8;">
SECTION .data
command db '/bin/sleep', 0h ; command to execute
arg1 db '5', 0h
</pre>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson20">
<article class="span12">
<header>
<h2>Lesson 20</h2>
<h4>Process Forking</h4>
</header>
<h5>Firstly, some background</h5>
<p>In this lesson we will use SYS_FORK to create a new process that duplicates our current process. SYS_FORK takes no arguments - you just call fork and the new process is created. Both processes run concurrently. We can test the return value (in eax) to test whether we are currently in the parent or child process. The parent process returns a non-negative, non-zero integer. In the child process EAX is zero. This can be used to branch your logic between the parent and child.</p>
<p>In our program we exploit this fact to print out different messages in each process.</p>
<p>
<span class="label label-info">Note:</span>
Each process is responsible for safely exiting.
</p>
<div class="snippet">
<span class="filename">fork.asm</span>
<pre class="brush: asm;">
; Fork
; Compile with: nasm -f elf fork.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 fork.o -o fork
; Run with: ./fork
%include 'functions.asm'
SECTION .data
childMsg db 'This is the child process', 0h ; a message string
parentMsg db 'This is the parent process', 0h ; a message string
SECTION .text
global _start
_start:
mov eax, 2 ; invoke SYS_FORK (kernel opcode 2)
int 80h
cmp eax, 0 ; if eax is zero we are in the child process
jz child ; jump if eax is zero to child label
parent:
mov eax, parentMsg ; inside our parent process move parentMsg into eax
call sprintLF ; call our string printing with linefeed function
call quit ; quit the parent process
child:
mov eax, childMsg ; inside our child process move childMsg into eax
call sprintLF ; call our string printing with linefeed function
call quit ; quit the child process
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf fork.asm</span>
<span>~$ ld -m elf_i386 fork.o -o fork</span>
<span>~$ ./fork</span>
<span>This is the parent process</span>
<span>This is the child process</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson21">
<article class="span12">
<header>
<h2>Lesson 21</h2>
<h4>Telling the time</h4>
</header>
<p>Generating a unix timestamp in NASM is easy with the SYS_TIME function of the linux kernel. Simply pass OPCODE 13 to the kernel with no arguments and you are returned the <a href="https://en.wikipedia.org/wiki/Unix_epoch">Unix Epoch</a> in the EAX register.</p>
<p>That is the number of seconds that have elapsed since January 1st 1970 UTC.</p>
<div class="snippet">
<span class="filename">time.asm</span>
<pre class="brush: asm;">
; Time
; Compile with: nasm -f elf time.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 time.o -o time
; Run with: ./time
%include 'functions.asm'
SECTION .data
msg db 'Seconds since Jan 01 1970: ', 0h ; a message string
SECTION .text
global _start
_start:
mov eax, msg ; move our message string into eax for printing
call sprint ; call our string printing function
mov eax, 13 ; invoke SYS_TIME (kernel opcode 13)
int 80h ; call the kernel
call iprintLF ; call our integer printing function with linefeed
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf time.asm</span>
<span>~$ ld -m elf_i386 time.o -o time</span>
<span>~$ ./time</span>
<span>Seconds since Jan 01 1970: 1374995660</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson22">
<article class="span12">
<header>
<h2>Lesson 22</h2>
<h4>File Handling - Create</h4>
</header>
<h5>Firstly, some background</h5>
<p>File Handling in Linux is achieved through a small number of system calls related to creating, updating and deleting files. These functions require a <a href="https://en.wikipedia.org/wiki/File_descriptor" target="_blank">file descriptor</a> which is a unique, non-negative integer that identifies the file on the system.</p>
<h5>Writing our program</h5>
<p>We begin the tutorial by creating a file using sys_creat. We will then build upon our program in each of the following file handling lessons, adding code as we go. Eventually we will have a full program that can create, update, open, close and delete files.</p>
<p>sys_creat expects 2 arguments - the file permissions in ECX and the filename in EBX. The sys_creat opcode is then loaded into EAX and the kernel is called to create the file. The file descriptor of the created file is returned in EAX. This file descriptor can then be used for all other file handling functions.</p>
<div class="snippet">
<span class="filename">create.asm</span>
<pre class="brush: asm;">
; Create
; Compile with: nasm -f elf create.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 create.o -o create
; Run with: ./create
%include 'functions.asm'
SECTION .data
filename db 'readme.txt', 0h ; the filename to create
SECTION .text
global _start
_start:
mov ecx, 0777o ; set all permissions to read, write, execute
mov ebx, filename ; filename we will create
mov eax, 8 ; invoke SYS_CREAT (kernel opcode 8)
int 80h ; call the kernel
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf create.asm</span>
<span>~$ ld -m elf_i386 create.o -o create</span>
<span>~$ ./create</span>
</div>
</div>
</div>
<p>
<span class="label label-info">Note:</span>
The file 'readme.txt' will now have been created in the folder.
</p>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson23">
<article class="span12">
<header>
<h2>Lesson 23</h2>
<h4>File Handling - Write</h4>
</header>
<p>Building upon the previous lesson we will now use sys_write to write content to a newly created file.</p>
<p>sys_write expects 3 arguments - the number of bytes to write in EDX, the contents string to write in ECX and the <a href="https://en.wikipedia.org/wiki/File_descriptor" target="_blank">file descriptor</a> in EBX. The sys_write opcode is then loaded into EAX and the kernel is called to write the content to the file. In this lesson we will first call sys_creat to get a file descriptor which we will then load into EBX.</p>
<div class="snippet">
<span class="filename">write.asm</span>
<pre class="brush: asm;">
; Write
; Compile with: nasm -f elf write.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 write.o -o write
; Run with: ./write
%include 'functions.asm'
SECTION .data
filename db 'readme.txt', 0h ; the filename to create
contents db 'Hello world!', 0h ; the contents to write
SECTION .text
global _start
_start:
mov ecx, 0777o ; code continues from lesson 22
mov ebx, filename
mov eax, 8
int 80h
mov edx, 12 ; number of bytes to write - one for each letter of our contents string
mov ecx, contents ; move the memory address of our contents string into ecx
mov ebx, eax ; move the file descriptor of the file we created into ebx
mov eax, 4 ; invoke SYS_WRITE (kernel opcode 4)
int 80h ; call the kernel
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf write.asm</span>
<span>~$ ld -m elf_i386 write.o -o write</span>
<span>~$ ./write</span>
</div>
</div>
</div>
<p>
<span class="label label-info">Note:</span>
Open the newly created file 'readme.txt' in this folder and you will see the content 'Hello world!'.
</p>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson24">
<article class="span12">
<header>
<h2>Lesson 24</h2>
<h4>File Handling - Open</h4>
</header>
<p>Building upon the previous lesson we will now use sys_open to obtain the <a href="https://en.wikipedia.org/wiki/File_descriptor" target="_blank">file descriptor</a> of the newly created file. This file descriptor can then be used for all other file handling functions.</p>
<p>sys_open expects 2 arguments - the access mode (table below) in ECX and the filename in EBX. The sys_open opcode is then loaded into EAX and the kernel is called to open the file and return the file descriptor.</p>
<p>sys_open additionally accepts zero or more file creation flags and file status flags in EDX. <a href="http://man7.org/linux/man-pages/man2/open.2.html" target="_blank">Click here for more information about the access mode, file creation flags and file status flags</a>.</p>
<div class="row-fluid">
<div class="span6">
<table class="table">
<thead>
<tr>
<th></th>
<th>Description</th>
<th>Value</th>
</tr>
</thead>
<tbody>
<tr>
<td><b>O_RDONLY</b></td>
<td>open file in read only mode</td>
<td>0</td>
</tr>
<tr>
<td><b>O_WRONLY</b></td>
<td>open file in write only mode</td>
<td>1</td>
</tr>
<tr>
<td><b>O_RDWR</b></td>
<td>open file in read and write mode</td>
<td>2</td>
</tr>
</tbody>
</table>
</div>
</div>
<p>
<span class="label label-info">Note:</span>
sys_open returns the file descriptor in EAX. On linux this will be a unique, non-negative integer which we will print using our integer printing function.
</p>
<div class="snippet">
<span class="filename">open.asm</span>
<pre class="brush: asm;">
; Open
; Compile with: nasm -f elf open.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 open.o -o open
; Run with: ./open
%include 'functions.asm'
SECTION .data
filename db 'readme.txt', 0h ; the filename to create
contents db 'Hello world!', 0h ; the contents to write
SECTION .text
global _start
_start:
mov ecx, 0777o ; Create file from lesson 22
mov ebx, filename
mov eax, 8
int 80h
mov edx, 12 ; Write contents to file from lesson 23
mov ecx, contents
mov ebx, eax
mov eax, 4
int 80h
mov ecx, 0 ; flag for readonly access mode (O_RDONLY)
mov ebx, filename ; filename we created above
mov eax, 5 ; invoke SYS_OPEN (kernel opcode 5)
int 80h ; call the kernel
call iprintLF ; call our integer printing function
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf open.asm</span>
<span>~$ ld -m elf_i386 open.o -o open</span>
<span>~$ ./open</span>
<span>4</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson25">
<article class="span12">
<header>
<h2>Lesson 25</h2>
<h4>File Handling - Read</h4>
</header>
<p>Building upon the previous lesson we will now use sys_read to read the content of a newly created and opened file. We will store this string in a variable.</p>
<p>sys_read expects 3 arguments - the number of bytes to read in EDX, the memory address of our variable in ECX and the <a href="https://en.wikipedia.org/wiki/File_descriptor" target="_blank">file descriptor</a> in EBX. We will use the previous lessons sys_open code to obtain the file descriptor which we will then load into EBX. The sys_read opcode is then loaded into EAX and the kernel is called to read the file contents into our variable and is then printed to the screen.</p>
<p>
<span class="label label-info">Note:</span>
We will reserve 255 bytes in the .bss section to store the contents of the file. <a href="#lesson9">See Lesson 9 for more information on the .bss section.</a>
</p>
<div class="snippet">
<span class="filename">read.asm</span>
<pre class="brush: asm;">
; Read
; Compile with: nasm -f elf read.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 read.o -o read
; Run with: ./read
%include 'functions.asm'
SECTION .data
filename db 'readme.txt', 0h ; the filename to create
contents db 'Hello world!', 0h ; the contents to write
SECTION .bss
fileContents resb 255, ; variable to store file contents
SECTION .text
global _start
_start:
mov ecx, 0777o ; Create file from lesson 22
mov ebx, filename
mov eax, 8
int 80h
mov edx, 12 ; Write contents to file from lesson 23
mov ecx, contents
mov ebx, eax
mov eax, 4
int 80h
mov ecx, 0 ; Open file from lesson 24
mov ebx, filename
mov eax, 5
int 80h
mov edx, 12 ; number of bytes to read - one for each letter of the file contents
mov ecx, fileContents ; move the memory address of our file contents variable into ecx
mov ebx, eax ; move the opened file descriptor into EBX
mov eax, 3 ; invoke SYS_READ (kernel opcode 3)
int 80h ; call the kernel
mov eax, fileContents ; move the memory address of our file contents variable into eax for printing
call sprintLF ; call our string printing function
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf read.asm</span>
<span>~$ ld -m elf_i386 read.o -o read</span>
<span>~$ ./read</span>
<span>Hello world!</span>
</div>
</div>
</div>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson26">
<article class="span12">
<header>
<h2>Lesson 26</h2>
<h4>File Handling - Close</h4>
</header>
<p>Building upon the previous lesson we will now use sys_close to properly close an open file.</p>
<p>sys_close expects 1 argument - the <a href="https://en.wikipedia.org/wiki/File_descriptor" target="_blank">file descriptor</a> in EBX. We will use the previous lessons code to obtain the file descriptor which we will then load into EBX. The sys_close opcode is then loaded into EAX and the kernel is called to close the file and remove the active file descriptor.</p>
<div class="snippet">
<span class="filename">close.asm</span>
<pre class="brush: asm;">
; Close
; Compile with: nasm -f elf close.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 close.o -o close
; Run with: ./close
%include 'functions.asm'
SECTION .data
filename db 'readme.txt', 0h ; the filename to create
contents db 'Hello world!', 0h ; the contents to write
SECTION .bss
fileContents resb 255, ; variable to store file contents
SECTION .text
global _start
_start:
mov ecx, 0777o ; Create file from lesson 22
mov ebx, filename
mov eax, 8
int 80h
mov edx, 12 ; Write contents to file from lesson 23
mov ecx, contents
mov ebx, eax
mov eax, 4
int 80h
mov ecx, 0 ; Open file from lesson 24
mov ebx, filename
mov eax, 5
int 80h
mov edx, 12 ; Read file from lesson 25
mov ecx, fileContents
mov ebx, eax
mov eax, 3
int 80h
mov eax, fileContents
call sprintLF
mov ebx, ebx ; not needed but used to demonstrate that SYS_CLOSE takes a file descriptor from EBX
mov eax, 6 ; invoke SYS_CLOSE (kernel opcode 6)
int 80h ; call the kernel
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf close.asm</span>
<span>~$ ld -m elf_i386 close.o -o close</span>
<span>~$ ./close</span>
<span>Hello world!</span>
</div>
</div>
</div>
<p>
<span class="label label-info">Note:</span>
We have properly closed the file and removed the active file descriptor.
</p>
</article>
</div>
<hr>
<div class="row-fluid" id="lesson27">
<article class="span12">
<header>
<h2>Lesson 27</h2>
<h4>File Handling - Seek</h4>
</header>
<p>In this lesson we will open a file and update the file contents at the end of the file using sys_lseek.</p>
<p>Using sys_lseek you can move the cursor within the file by an offset in bytes. The below example will move the cursor to the end of the file, then pass 0 bytes as the offset (so we append to the end of the file and not beyond) before writing a string in that position. Try different values in ECX and EDX to write the content to different positions within the opened file.</p>
<p>sys_lseek expects 3 arguments - the whence argument (table below) in EDX, the offset in bytes in ECX, and the <a href="https://en.wikipedia.org/wiki/File_descriptor" target="_blank">file descriptor</a> in EBX. The sys_lseek opcode is then loaded into EAX and we call the kernel to move the file pointer to the correct offset. We then use sys_write to update the content at that position.</p>
<div class="row-fluid">
<div class="span6">
<table class="table">
<thead>
<tr>
<th></th>
<th>Description</th>
<th>Value</th>
</tr>
</thead>
<tbody>
<tr>
<td><b>SEEK_SET</b></td>
<td>beginning of the file</td>
<td>0</td>
</tr>
<tr>
<td><b>SEEK_CUR</b></td>
<td>current file offset</td>
<td>1</td>
</tr>
<tr>
<td><b>SEEK_END</b></td>
<td>end of the file</td>
<td>2</td>
</tr>
</tbody>
</table>
</div>
</div>
<br>
<p>
<span class="label label-info">Note:</span>
A file 'readme.txt' has been included in the code folder for this lesson. This file will be updated after running the program.
</p>
<div class="snippet">
<span class="filename">seek.asm</span>
<pre class="brush: asm;">
; Seek
; Compile with: nasm -f elf seek.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 seek.o -o seek
; Run with: ./seek
%include 'functions.asm'
SECTION .data
filename db 'readme.txt', 0h ; the filename to create
contents db '-updated-', 0h ; the contents to write at the start of the file
SECTION .text
global _start
_start:
mov ecx, 1 ; flag for writeonly access mode (O_WRONLY)
mov ebx, filename ; filename of the file to open
mov eax, 5 ; invoke SYS_OPEN (kernel opcode 5)
int 80h ; call the kernel
mov edx, 2 ; whence argument (SEEK_END)
mov ecx, 0 ; move the cursor 0 bytes
mov ebx, eax ; move the opened file descriptor into EBX
mov eax, 19 ; invoke SYS_LSEEK (kernel opcode 19)
int 80h ; call the kernel
mov edx, 9 ; number of bytes to write - one for each letter of our contents string
mov ecx, contents ; move the memory address of our contents string into ecx
mov ebx, ebx ; move the opened file descriptor into EBX (not required as EBX already has the FD)
mov eax, 4 ; invoke SYS_WRITE (kernel opcode 4)
int 80h ; call the kernel
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf seek.asm</span>
<span>~$ ld -m elf_i386 seek.o -o seek</span>
<span>~$ ./seek</span>
</div>
</div>
</div>
</article>
</div>
<div class="row-fluid" id="lesson28">
<article class="span12">
<header>
<h2>Lesson 28</h2>
<h4>File Handling - Delete</h4>
</header>
<p>Deleting a file on linux is achieved by calling sys_unlink.</p>
<p>sys_unlink expects 1 argument - the filename in EBX. The sys_unlink opcode is then loaded into EAX and the kernel is called to delete the file.</p>
<p>
<span class="label label-info">Note:</span>
A file 'readme.txt' has been included in the code folder for this lesson. This file will be deleted after running the program.
</p>
<div class="snippet">
<span class="filename">unlink.asm</span>
<pre class="brush: asm;">
; Unlink
; Compile with: nasm -f elf unlink.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 unlink.o -o unlink
; Run with: ./unlink
%include 'functions.asm'
SECTION .data
filename db 'readme.txt', 0h ; the filename to delete
SECTION .text
global _start
_start:
mov ebx, filename ; filename we will delete
mov eax, 10 ; invoke SYS_UNLINK (kernel opcode 10)
int 80h ; call the kernel
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf unlink.asm</span>
<span>~$ ld -m elf_i386 unlink.o -o unlink</span>
<span>~$ ./unlink</span>
</div>
</div>
</div>
</article>
</div>
<div class="row-fluid" id="lesson29">
<article class="span12">
<header>
<h2>Lesson 29</h2>
<h4>Sockets - Create</h4>
</header>
<h5>Firstly, some background</h5>
<p>Socket Programming in Linux is achieved through the use of the SYS_SOCKETCALL kernel function. The SYS_SOCKETCALL function is somewhat unique in that it encapsulates a number of different subroutines, all related to socket operations, within the one function. By passing different integer values in EBX we can change the behaviour of this function to create, listen, send, receive, close and more. <a href="https://gist.github.com/DGivney/7196bd7a9f21a12c9397bdcf9ae040d2" target="_blank">Click here</a> to view the full commented source code of the completed program.</p>
<h5>Writing our program</h5>
<p>We begin the tutorial by first initalizing some of our registers which we will use later to store important values. We will then create a socket using SYS_SOCKETCALL's first subroutine which is called 'socket'. We will then build upon our program in each of the following socket programming lessons, adding code as we go. Eventually we will have a full program that can create, bind, listen, accept, read, write and close sockets.</p>
<p>SYS_SOCKETCALL's subroutine 'socket' expects 2 arguments - a pointer to an array of arguments in ECX and the integer value 1 in EBX. The SYS_SOCKETCALL opcode is then loaded into EAX and the kernel is called to create the socket. Because everything in linux is a file, we recieve back the <a href="https://en.wikipedia.org/wiki/File_descriptor" target="_blank">file descriptor</a> of the created socket in EAX. This file descriptor can then be used for performing other socket programming functions.</p>
<p>
<span class="label label-info">Note:</span>
XORing a register by itself is an efficent way of ensuring the register is initalised with the integer value zero and doesn't contain an unexpected value that could corrupt your program.
</p>
<div class="snippet">
<span class="filename">socket.asm</span>
<pre class="brush: asm;">
; Socket
; Compile with: nasm -f elf socket.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 socket.o -o socket
; Run with: ./socket
%include 'functions.asm'
SECTION .text
global _start
_start:
xor eax, eax ; init eax 0
xor ebx, ebx ; init ebx 0
xor edi, edi ; init edi 0
xor esi, esi ; init esi 0
_socket:
push byte 6 ; push 6 onto the stack (IPPROTO_TCP)
push byte 1 ; push 1 onto the stack (SOCK_STREAM)
push byte 2 ; push 2 onto the stack (PF_INET)
mov ecx, esp ; move address of arguments into ecx
mov ebx, 1 ; invoke subroutine SOCKET (1)
mov eax, 102 ; invoke SYS_SOCKETCALL (kernel opcode 102)
int 80h ; call the kernel
call iprintLF ; call our integer printing function (print the file descriptor in EAX or -1 on error)
_exit:
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf socket.asm</span>
<span>~$ ld -m elf_i386 socket.o -o socket</span>
<span>~$ ./socket</span>
<span>3</span>
</div>
</div>
</div>
</article>
</div>
<div class="row-fluid" id="lesson30">
<article class="span12">
<header>
<h2>Lesson 30</h2>
<h4>Sockets - Bind</h4>
</header>
<p>Building on the previous lesson we will now associate the created socket with a local IP address and port which will allow us to connect to it. We do this by calling the second subroutine of SYS_SOCKETCALL which is called 'bind'.</p>
<p>We begin by storing the file descriptor we recieved in lesson 29 into EDI. EDI was originally called the Destination Index and is traditionally used in copy routines to store the location of a target file.</p>
<p>SYS_SOCKETCALL's subroutine 'bind' expects 2 arguments - a pointer to an array of arguments in ECX and the integer value 2 in EBX. The SYS_SOCKETCALL opcode is then loaded into EAX and the kernel is called to bind the socket.</p>
<div class="snippet">
<span class="filename">socket.asm</span>
<pre class="brush: asm;">
; Socket
; Compile with: nasm -f elf socket.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 socket.o -o socket
; Run with: ./socket
%include 'functions.asm'
SECTION .text
global _start
_start:
xor eax, eax ; initialize some registers
xor ebx, ebx
xor edi, edi
xor esi, esi
_socket:
push byte 6 ; create socket from lesson 29
push byte 1
push byte 2
mov ecx, esp
mov ebx, 1
mov eax, 102
int 80h
_bind:
mov edi, eax ; move return value of SYS_SOCKETCALL into edi (file descriptor for new socket, or -1 on error)
push dword 0x00000000 ; push 0 dec onto the stack IP ADDRESS (0.0.0.0)
push word 0x2923 ; push 9001 dec onto stack PORT (reverse byte order)
push word 2 ; push 2 dec onto stack AF_INET
mov ecx, esp ; move address of stack pointer into ecx
push byte 16 ; push 16 dec onto stack (arguments length)
push ecx ; push the address of arguments onto stack
push edi ; push the file descriptor onto stack
mov ecx, esp ; move address of arguments into ecx
mov ebx, 2 ; invoke subroutine BIND (2)
mov eax, 102 ; invoke SYS_SOCKETCALL (kernel opcode 102)
int 80h ; call the kernel
_exit:
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf socket.asm</span>
<span>~$ ld -m elf_i386 socket.o -o socket</span>
<span>~$ ./socket</span>
</div>
</div>
</div>
</article>
</div>
<div class="row-fluid" id="lesson31">
<article class="span12">
<header>
<h2>Lesson 31</h2>
<h4>Sockets - Listen</h4>
</header>
<p>In the previous lessons we created a socket and used the 'bind' subroutine to associate it with a local IP address and port. In this lesson we will use the 'listen' subroutine of SYS_SOCKETCALL to tell our socket to listen for incoming TCP requests. This will allow us to read and write to anyone who connects to our socket.</p>
<p>SYS_SOCKETCALL's subroutine 'listen' expects 2 arguments - a pointer to an array of arguments in ECX and the integer value 4 in EBX. The SYS_SOCKETCALL opcode is then loaded into EAX and the kernel is called. If succesful the socket will begin listening for incoming requests.</p>
<div class="snippet">
<span class="filename">socket.asm</span>
<pre class="brush: asm;">
; Socket
; Compile with: nasm -f elf socket.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 socket.o -o socket
; Run with: ./socket
%include 'functions.asm'
SECTION .text
global _start
_start:
xor eax, eax ; initialize some registers
xor ebx, ebx
xor edi, edi
xor esi, esi
_socket:
push byte 6 ; create socket from lesson 29
push byte 1
push byte 2
mov ecx, esp
mov ebx, 1
mov eax, 102
int 80h
_bind:
mov edi, eax ; bind socket from lesson 30
push dword 0x00000000
push word 0x2923
push word 2
mov ecx, esp
push byte 16
push ecx
push edi
mov ecx, esp
mov ebx, 2
mov eax, 102
int 80h
_listen:
push byte 1 ; move 1 onto stack (max queue length argument)
push edi ; push the file descriptor onto stack
mov ecx, esp ; move address of arguments into ecx
mov ebx, 4 ; invoke subroutine LISTEN (4)
mov eax, 102 ; invoke SYS_SOCKETCALL (kernel opcode 102)
int 80h ; call the kernel
_exit:
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf socket.asm</span>
<span>~$ ld -m elf_i386 socket.o -o socket</span>
<span>~$ ./socket</span>
</div>
</div>
</div>
</article>
</div>
<div class="row-fluid" id="lesson32">
<article class="span12">
<header>
<h2>Lesson 32</h2>
<h4>Sockets - Accept</h4>
</header>
<p>In the previous lessons we created a socket and used the 'bind' subroutine to associate it with a local IP address and port. We then used the 'listen' subroutine of SYS_SOCKETCALL to tell our socket to listen for incoming TCP requests. Now we will use the 'accept' subroutine of SYS_SOCKETCALL to tell our socket to accept those incoming requests. Our socket will then be ready to read and write to remote connections.</p>
<p>SYS_SOCKETCALL's subroutine 'accept' expects 2 arguments - a pointer to an array of arguments in ECX and the integer value 5 in EBX. The SYS_SOCKETCALL opcode is then loaded into EAX and the kernel is called. The 'accept' subroutine will create another file descriptor, this time identifying the incoming socket connection. We will use this file descriptor to read and write to the incoming connection in later lessons.</p>
<p>
<span class="label label-info">Note:</span>
Run the program and use the command <kbd>sudo netstat -plnt</kbd> in another terminal to view the socket listening on port 9001.
</p>
<div class="snippet">
<span class="filename">socket.asm</span>
<pre class="brush: asm;">
; Socket
; Compile with: nasm -f elf socket.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 socket.o -o socket
; Run with: ./socket
%include 'functions.asm'
SECTION .text
global _start
_start:
xor eax, eax ; initialize some registers
xor ebx, ebx
xor edi, edi
xor esi, esi
_socket:
push byte 6 ; create socket from lesson 29
push byte 1
push byte 2
mov ecx, esp
mov ebx, 1
mov eax, 102
int 80h
_bind:
mov edi, eax ; bind socket from lesson 30
push dword 0x00000000
push word 0x2923
push word 2
mov ecx, esp
push byte 16
push ecx
push edi
mov ecx, esp
mov ebx, 2
mov eax, 102
int 80h
_listen:
push byte 1 ; listen socket from lesson 31
push edi
mov ecx, esp
mov ebx, 4
mov eax, 102
int 80h
_accept:
push byte 0 ; push 0 dec onto stack (address length argument)
push byte 0 ; push 0 dec onto stack (address argument)
push edi ; push the file descriptor onto stack
mov ecx, esp ; move address of arguments into ecx
mov ebx, 5 ; invoke subroutine ACCEPT (5)
mov eax, 102 ; invoke SYS_SOCKETCALL (kernel opcode 102)
int 80h ; call the kernel
_exit:
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf socket.asm</span>
<span>~$ ld -m elf_i386 socket.o -o socket</span>
<span>~$ ./socket</span>
</div>
</div>
</div>
</article>
</div>
<div class="row-fluid" id="lesson33">
<article class="span12">
<header>
<h2>Lesson 33</h2>
<h4>Sockets - Read</h4>
</header>
<p>When an incoming connection is accepted by our socket, a new file descriptor identifying the incoming socket connection is returned in EAX. In this lesson we will use this file descriptor to read the incoming request headers from the connection.</p>
<p>We begin by storing the file descriptor we recieved in lesson 32 into ESI. ESI was originally called the Source Index and is traditionally used in copy routines to store the location of a target file.</p>
<p>We will use the kernel function sys_read to read from the incoming socket connection. As we have done in previous lessons, we will create a variable to store the contents being read from the file descriptor. Our socket will be using the HTTP protocol to communicate. Parsing HTTP request headers to determine the length of the incoming message and accepted response formats is beyond the scope of this tutorial. We will instead just read up to the first 255 bytes and print that to standardout.</p>
<p>Once the incoming connection has been accepted, it is very common for webservers to spawn a child process to manage the read/write communication. The parent process is then free to return to the listening/accept state and accept any new incoming requests in parallel. We will implement this design pattern below using SYS_FORK and the JMP instruction prior to reading the request headers in the child process.</p>
<p>To generate valid request headers we will use the commandline tool <kbd>curl</kbd> to connect to our listening socket. But you can also use a standard web browser to connect in the same way.</p>
<p>sys_read expects 3 arguments - the number of bytes to read in EDX, the memory address of our variable in ECX and the <a href="https://en.wikipedia.org/wiki/File_descriptor" target="_blank">file descriptor</a> in EBX. The sys_read opcode is then loaded into EAX and the kernel is called to read the contents into our variable which is then printed to the screen.</p>
<p>
<span class="label label-info">Note:</span>
We will reserve 255 bytes in the .bss section to store the contents being read from the file descriptor. <a href="#lesson9">See Lesson 9 for more information on the .bss section.</a>
</p>
<p>
<span class="label label-info">Note:</span>
Run the program and use the command <kbd>curl http://localhost:9001</kbd> in another terminal to view the request headers being read by our program.
</p>
<div class="snippet">
<span class="filename">socket.asm</span>
<pre class="brush: asm;">
; Socket
; Compile with: nasm -f elf socket.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 socket.o -o socket
; Run with: ./socket
%include 'functions.asm'
SECTION .bss
buffer resb 255, ; variable to store request headers
SECTION .text
global _start
_start:
xor eax, eax ; initialize some registers
xor ebx, ebx
xor edi, edi
xor esi, esi
_socket:
push byte 6 ; create socket from lesson 29
push byte 1
push byte 2
mov ecx, esp
mov ebx, 1
mov eax, 102
int 80h
_bind:
mov edi, eax ; bind socket from lesson 30
push dword 0x00000000
push word 0x2923
push word 2
mov ecx, esp
push byte 16
push ecx
push edi
mov ecx, esp
mov ebx, 2
mov eax, 102
int 80h
_listen:
push byte 1 ; listen socket from lesson 31
push edi
mov ecx, esp
mov ebx, 4
mov eax, 102
int 80h
_accept:
push byte 0 ; accept socket from lesson 32
push byte 0
push edi
mov ecx, esp
mov ebx, 5
mov eax, 102
int 80h
_fork:
mov esi, eax ; move return value of SYS_SOCKETCALL into esi (file descriptor for accepted socket, or -1 on error)
mov eax, 2 ; invoke SYS_FORK (kernel opcode 2)
int 80h ; call the kernel
cmp eax, 0 ; if return value of SYS_FORK in eax is zero we are in the child process
jz _read ; jmp in child process to _read
jmp _accept ; jmp in parent process to _accept
_read:
mov edx, 255 ; number of bytes to read (we will only read the first 255 bytes for simplicity)
mov ecx, buffer ; move the memory address of our buffer variable into ecx
mov ebx, esi ; move esi into ebx (accepted socket file descriptor)
mov eax, 3 ; invoke SYS_READ (kernel opcode 3)
int 80h ; call the kernel
mov eax, buffer ; move the memory address of our buffer variable into eax for printing
call sprintLF ; call our string printing function
_exit:
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf socket.asm</span>
<span>~$ ld -m elf_i386 socket.o -o socket</span>
<span>~$ ./socket</span>
<span>GET / HTTP/1.1</span>
<span>Host: localhost:9001</span>
<span>User-Agent: curl/x.xx.x</span>
<span>Accept: */*</span>
</div>
</div>
</div>
</article>
</div>
<div class="row-fluid" id="lesson34">
<article class="span12">
<header>
<h2>Lesson 34</h2>
<h4>Sockets - Write</h4>
</header>
<p>When an incoming connection is accepted by our socket, a new file descriptor identifying the incoming socket connection is returned in EAX. In this lesson we will use this file descriptor to send our response to the connection.</p>
<p>We will use the kernel function sys_write to write to the incoming socket connection. As our socket will be communicating using the HTTP protocol, we will need to send some compulsory headers in order to allow HTTP speaking clients to connect. We will send these following the formatting rules set out in the <a href="https://tools.ietf.org/html/rfc2616?spm=5176.doc32013.2.3.Aimyd7#section-4.2" target="_blank">RFC Standard</a>.</p>
<p>sys_write expects 3 arguments - the number of bytes to write in EDX, the response string to write in ECX and the <a href="https://en.wikipedia.org/wiki/File_descriptor" target="_blank">file descriptor</a> in EBX. The sys_write opcode is then loaded into EAX and the kernel is called to send our response back through our socket to the incoming connection.</p>
<p>
<span class="label label-info">Note:</span>
We will create a variable in the .data section to store the response we will write to the file descriptor. <a href="#lesson1">See Lesson 1 for more information on the .data section.</a>
</p>
<p>
<span class="label label-info">Note:</span>
Run the program and use the command <kbd>curl http://localhost:9001</kbd> in another terminal to view the response sent via our socket. Or connect to the same address using any standard web browser.
</p>
<div class="snippet">
<span class="filename">socket.asm</span>
<pre class="brush: asm;">
; Socket
; Compile with: nasm -f elf socket.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 socket.o -o socket
; Run with: ./socket
%include 'functions.asm'
SECTION .data
; our response string
response db 'HTTP/1.1 200 OK', 0Dh, 0Ah, 'Content-Type: text/html', 0Dh, 0Ah, 'Content-Length: 14', 0Dh, 0Ah, 0Dh, 0Ah, 'Hello World!', 0Dh, 0Ah, 0h
SECTION .bss
buffer resb 255, ; variable to store request headers
SECTION .text
global _start
_start:
xor eax, eax ; initialize some registers
xor ebx, ebx
xor edi, edi
xor esi, esi
_socket:
push byte 6 ; create socket from lesson 29
push byte 1
push byte 2
mov ecx, esp
mov ebx, 1
mov eax, 102
int 80h
_bind:
mov edi, eax ; bind socket from lesson 30
push dword 0x00000000
push word 0x2923
push word 2
mov ecx, esp
push byte 16
push ecx
push edi
mov ecx, esp
mov ebx, 2
mov eax, 102
int 80h
_listen:
push byte 1 ; listen socket from lesson 31
push edi
mov ecx, esp
mov ebx, 4
mov eax, 102
int 80h
_accept:
push byte 0 ; accept socket from lesson 32
push byte 0
push edi
mov ecx, esp
mov ebx, 5
mov eax, 102
int 80h
_fork:
mov esi, eax ; fork socket from lesson 33
mov eax, 2
int 80h
cmp eax, 0
jz _read
jmp _accept
_read:
mov edx, 255 ; read socket from lesson 33
mov ecx, buffer
mov ebx, esi
mov eax, 3
int 80h
mov eax, buffer
call sprintLF
_write:
mov edx, 78 ; move 78 dec into edx (length in bytes to write)
mov ecx, response ; move address of our response variable into ecx
mov ebx, esi ; move file descriptor into ebx (accepted socket id)
mov eax, 4 ; invoke SYS_WRITE (kernel opcode 4)
int 80h ; call the kernel
_exit:
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf socket.asm</span>
<span>~$ ld -m elf_i386 socket.o -o socket</span>
<span>~$ ./socket</span>
</div>
</div>
<br>
<span class="filename">New terminal window</span>
<div class="output">
<div class="inner">
<span>~$ curl http://localhost:9001</span>
<span>Hello World!</span>
</div>
</div>
</div>
</article>
</div>
<div class="row-fluid" id="lesson35">
<article class="span12">
<header>
<h2>Lesson 35</h2>
<h4>Sockets - Close</h4>
</header>
<p>In this lesson we will use sys_close to properly close the active socket connection in the child process after our response has been sent. This will free up some resources that can be used to accept new incoming connections.</p>
<p>sys_close expects 1 argument - the <a href="https://en.wikipedia.org/wiki/File_descriptor" target="_blank">file descriptor</a> in EBX. The sys_close opcode is then loaded into EAX and the kernel is called to close the socket and remove the active file descriptor.</p>
<p>
<span class="label label-info">Note:</span>
Run the program and use the command <kbd>curl http://localhost:9001</kbd> in another terminal or connect to the same address using any standard web browser.
</p>
<div class="snippet">
<span class="filename">socket.asm</span>
<pre class="brush: asm;">
; Socket
; Compile with: nasm -f elf socket.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 socket.o -o socket
; Run with: ./socket
%include 'functions.asm'
SECTION .data
; our response string
response db 'HTTP/1.1 200 OK', 0Dh, 0Ah, 'Content-Type: text/html', 0Dh, 0Ah, 'Content-Length: 14', 0Dh, 0Ah, 0Dh, 0Ah, 'Hello World!', 0Dh, 0Ah, 0h
SECTION .bss
buffer resb 255, ; variable to store request headers
SECTION .text
global _start
_start:
xor eax, eax ; initialize some registers
xor ebx, ebx
xor edi, edi
xor esi, esi
_socket:
push byte 6 ; create socket from lesson 29
push byte 1
push byte 2
mov ecx, esp
mov ebx, 1
mov eax, 102
int 80h
_bind:
mov edi, eax ; bind socket from lesson 30
push dword 0x00000000
push word 0x2923
push word 2
mov ecx, esp
push byte 16
push ecx
push edi
mov ecx, esp
mov ebx, 2
mov eax, 102
int 80h
_listen:
push byte 1 ; listen socket from lesson 31
push edi
mov ecx, esp
mov ebx, 4
mov eax, 102
int 80h
_accept:
push byte 0 ; accept socket from lesson 32
push byte 0
push edi
mov ecx, esp
mov ebx, 5
mov eax, 102
int 80h
_fork:
mov esi, eax ; fork socket from lesson 33
mov eax, 2
int 80h
cmp eax, 0
jz _read
jmp _accept
_read:
mov edx, 255 ; read socket from lesson 33
mov ecx, buffer
mov ebx, esi
mov eax, 3
int 80h
mov eax, buffer
call sprintLF
_write:
mov edx, 78 ; write socket from lesson 34
mov ecx, response
mov ebx, esi
mov eax, 4
int 80h
_close:
mov ebx, esi ; move esi into ebx (accepted socket file descriptor)
mov eax, 6 ; invoke SYS_CLOSE (kernel opcode 6)
int 80h ; call the kernel
_exit:
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf socket.asm</span>
<span>~$ ld -m elf_i386 socket.o -o socket</span>
<span>~$ ./socket</span>
</div>
</div>
<br>
<span class="filename">New terminal window</span>
<div class="output">
<div class="inner">
<span>~$ curl http://localhost:9001</span>
<span>Hello World!</span>
</div>
</div>
</div>
<p>
<span class="label label-info">Note:</span>
We have properly closed the socket connections and removed their active file descriptors.
</p>
</article>
</div>
<div class="row-fluid" id="lesson36">
<article class="span12">
<header>
<h2>Lesson 36</h2>
<h4>Download a Webpage</h4>
</header>
<p>In the previous lessons we have been learning how to use the many subroutines of the SYS_SOCKETCALL kernel function to create, manage and transfer data through Linux sockets. We will continue that theme in this lesson by using the 'connect' subroutine of SYS_SOCKETCALL to connect to a remote webserver and download a webpage.</p>
<p>These are the steps we need to follow to connect a socket to a remote server:
<ul>
<li> Call SYS_SOCKETCALL's subroutine 'socket' to create an active socket that we will use to send outbound requests.</li>
<li> Call SYS_SOCKETCALL's subroutine 'connect' to connect our socket with a socket on the remote webserver.</li>
<li> Use SYS_WRITE to send a HTTP formatted request through our socket to the remote webserver.</li>
<li> Use SYS_READ to recieve the HTTP formatted response from the webserver.</li>
</ul>
We will then use our string printing function to print the response to our terminal.
</p>
<h5>What is a HTTP Request</h5>
<p>The HTTP specification has evolved through a number of standard versions including <a href="https://tools.ietf.org/html/rfc1945" target="_blank">1.0 in RFC1945</a>, <a href="https://tools.ietf.org/html/rfc2068" target="_blank">1.1 in RFC2068</a> and <a href="https://tools.ietf.org/html/rfc7540" target="_blank">2.0 in RFC7540</a>. Version 1.1 is still the most common today.</p>
<p>A HTTP/1.1 request is comprised of 3 sections:
<ol>
<li> A line containing the <i>request method</i>, <i>request url</i>, and <i>http version</i></li>
<li> An optional section of <i><a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers">request headers</a></i></li>
<li> An <i>empty line</i> that tells the remote server you have finished sending the request and you will begin waiting for the response.</li>
</ol>
</p>
<p>A typical HTTP request for the root document on this server would look like this:</p>
<div class="snippet">
<pre class="brush: asm;">
GET / HTTP/1.1 ; A line containing the request method, url and version
Host: asmtutor.com ; A section of request headers
; A required empty line
</pre>
</div>
<h5>Writing our program</h5>
<p>This tutorial starts out like the previous ones by calling SYS_SOCKETCALL's subroutine 'socket' to initially create our socket. However, instead of calling 'bind' on this socket we will call 'connect' with an IP Address and Port Number to connect our socket to a remote webserver. We will then use the SYS_WRITE and SYS_READ kernel methods to transfer data between the two sockets by sending a HTTP request and reading the HTTP response.</p>
<p>SYS_SOCKETCALL's subroutine 'connect' expects 2 arguments - a pointer to an array of arguments in ECX and the integer value 3 in EBX. The SYS_SOCKETCALL opcode is then loaded into EAX and the kernel is called to connect to the socket.</p>
<p>
<span class="label label-info">Note:</span>
In Linux we can use the following command <kbd>./crawler > index.html</kbd> to save the output of our program to a file instead.
</p>
<div class="snippet">
<span class="filename">crawler.asm</span>
<pre class="brush: asm;">
; Crawler
; Compile with: nasm -f elf crawler.asm
; Link with (64 bit systems require elf_i386 option): ld -m elf_i386 crawler.o -o crawler
; Run with: ./crawler
%include 'functions.asm'
SECTION .data
; our request string
request db 'GET / HTTP/1.1', 0Dh, 0Ah, 'Host: 139.162.39.66:80', 0Dh, 0Ah, 0Dh, 0Ah, 0h
SECTION .bss
buffer resb 1, ; variable to store response
SECTION .text
global _start
_start:
xor eax, eax ; init eax 0
xor ebx, ebx ; init ebx 0
xor edi, edi ; init edi 0
_socket:
push byte 6 ; push 6 onto the stack (IPPROTO_TCP)
push byte 1 ; push 1 onto the stack (SOCK_STREAM)
push byte 2 ; push 2 onto the stack (PF_INET)
mov ecx, esp ; move address of arguments into ecx
mov ebx, 1 ; invoke subroutine SOCKET (1)
mov eax, 102 ; invoke SYS_SOCKETCALL (kernel opcode 102)
int 80h ; call the kernel
_connect:
mov edi, eax ; move return value of SYS_SOCKETCALL into edi (file descriptor for new socket, or -1 on error)
push dword 0x4227a28b ; push 139.162.39.66 onto the stack IP ADDRESS (reverse byte order)
push word 0x5000 ; push 80 onto stack PORT (reverse byte order)
push word 2 ; push 2 dec onto stack AF_INET
mov ecx, esp ; move address of stack pointer into ecx
push byte 16 ; push 16 dec onto stack (arguments length)
push ecx ; push the address of arguments onto stack
push edi ; push the file descriptor onto stack
mov ecx, esp ; move address of arguments into ecx
mov ebx, 3 ; invoke subroutine CONNECT (3)
mov eax, 102 ; invoke SYS_SOCKETCALL (kernel opcode 102)
int 80h ; call the kernel
_write:
mov edx, 43 ; move 43 dec into edx (length in bytes to write)
mov ecx, request ; move address of our request variable into ecx
mov ebx, edi ; move file descriptor into ebx (created socket file descriptor)
mov eax, 4 ; invoke SYS_WRITE (kernel opcode 4)
int 80h ; call the kernel
_read:
mov edx, 1 ; number of bytes to read (we will read 1 byte at a time)
mov ecx, buffer ; move the memory address of our buffer variable into ecx
mov ebx, edi ; move edi into ebx (created socket file descriptor)
mov eax, 3 ; invoke SYS_READ (kernel opcode 3)
int 80h ; call the kernel
cmp eax, 0 ; if return value of SYS_READ in eax is zero, we have reached the end of the file
jz _close ; jmp to _close if we have reached the end of the file (zero flag set)
mov eax, buffer ; move the memory address of our buffer variable into eax for printing
call sprint ; call our string printing function
jmp _read ; jmp to _read
_close:
mov ebx, edi ; move edi into ebx (connected socket file descriptor)
mov eax, 6 ; invoke SYS_CLOSE (kernel opcode 6)
int 80h ; call the kernel
_exit:
call quit ; call our quit function
</pre>
<div class="output">
<div class="inner">
<span>~$ nasm -f elf crawler.asm</span>
<span>~$ ld -m elf_i386 crawler.o -o crawler</span>
<span>~$ ./crawler</span>
<span>HTTP/1.1 200 OK</span>
<span>Content-Type: text/html</span>
<span> </span>
<span><!DOCTYPE html></span>
<span><html lang="en"></span>
<span>...</span>
<span></html></span>
</div>
</div>
</div>
</article>
</div>
<a href="#" class="btn-float hidden-phone hidden-tablet">
<i class="fas fa-arrow-up"></i>
</a>
</div>
</div>
<hr>
<footer>
<p>Learn assembly language at <a href="https://asmtutor.com">https://asmtutor.com</a></p>
</footer>
</div>
<script src="//ajax.googleapis.com/ajax/libs/jquery/1.9.0/jquery.min.js"></script>
<script src="./assets/bootstrap/js/bootstrap.min.js"></script>
<script src="./assets/script.js"></script>
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-41213327-1', 'asmtutor.com');
ga('send', 'pageview');
</script>
</body>
</html>
|