Kalyan Chakravorty Blog: December 2005

Thursday, December 29, 2005

Merge two presorted lists

Refer: Mathbits.com

//Function to merge two pre-sorted arrays
void merge_sort(apvector &arrayA, apvector &arrayB, apvector &arrayC)
{
int indexA = 0; // initialize the Array Indices
int indexB = 0;
int indexC = 0;

while((indexA < arrayA.length( )) && (indexB < arrayB.length( ))
{
if (arrayA[indexA] < arrayB[indexB])
{
arrayC[indexC] = arrayA[indexA];
indexA++; //increase the index
}
else
{
arrayC[indexC] = arrayB[indexB];
indexB++; //increase the index
}
indexC++; //move to the next position in the new array
}
// Push remaining elements to end of new array when 1 feeder array is empty
while (indexA < arrayA.length( ))
{
arrayC[indexC] = arrayA[indexA];
indexA++;
indexC++;
}
while (indexB < arrayB.length( ))
{
arrayC[indexC] = arrayB[indexB];
indexB++;
indexC++;
}
return;
}

Wednesday, December 28, 2005

Recursion

1.
Sum of the elements of the linked list:
int sum_list(struct list_node *l)
{
if(l == NULL)
return 0;
return l.data + sum_list(l.next);
}

Friday, December 23, 2005

string libraries

Stolen from http://fxr.watson.org/fxr/source/lib/string.c?v=linux-2.4.22

1 /*
2 * linux/lib/string.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7 /*
8 * stupid library routines.. The optimized versions should generally be found
9 * as inline code in
10 *
11 * These are buggy as well..
12 *
13 * * Fri Jun 25 1999, Ingo Oeser
14 * - Added strsep() which will replace strtok() soon (because strsep() is
15 * reentrant and should be faster). Use only strsep() in new code, please.
16 */
17
18 #include
19 #include
20 #include
21
22 #ifndef __HAVE_ARCH_STRNICMP
23 /**
24 * strnicmp - Case insensitive, length-limited string comparison
25 * @s1: One string
26 * @s2: The other string
27 * @len: the maximum number of characters to compare
28 */
29 int strnicmp(const char *s1, const char *s2, size_t len)
30 {
31 /* Yes, Virginia, it had better be unsigned */
32 unsigned char c1, c2;
33
34 c1 = 0; c2 = 0;
35 if (len) {
36 do {
37 c1 = *s1; c2 = *s2;
38 s1++; s2++;
39 if (!c1)
40 break;
41 if (!c2)
42 break;
43 if (c1 == c2)
44 continue;
45 c1 = tolower(c1);
46 c2 = tolower(c2);
47 if (c1 != c2)
48 break;
49 } while (--len);
50 }
51 return (int)c1 - (int)c2;
52 }
53 #endif
54
55 char * ___strtok;
56
57 #ifndef __HAVE_ARCH_STRCPY
58 /**
59 * strcpy - Copy a %NUL terminated string
60 * @dest: Where to copy the string to
61 * @src: Where to copy the string from
62 */
63 char * strcpy(char * dest,const char *src)
64 {
65 char *tmp = dest;
66
67 while ((*dest++ = *src++) != '\0')
68 /* nothing */;
69 return tmp;
70 }
71 #endif
72
73 #ifndef __HAVE_ARCH_STRNCPY
74 /**
75 * strncpy - Copy a length-limited, %NUL-terminated string
76 * @dest: Where to copy the string to
77 * @src: Where to copy the string from
78 * @count: The maximum number of bytes to copy
79 *
80 * Note that unlike userspace strncpy, this does not %NUL-pad the buffer.
81 * However, the result is not %NUL-terminated if the source exceeds
82 * @count bytes.
83 */
84 char * strncpy(char * dest,const char *src,size_t count)
85 {
86 char *tmp = dest;
87
88 while (count-- && (*dest++ = *src++) != '\0')
89 /* nothing */;
90
91 return tmp;
92 }
93 #endif
94
95 #ifndef __HAVE_ARCH_STRCAT
96 /**
97 * strcat - Append one %NUL-terminated string to another
98 * @dest: The string to be appended to
99 * @src: The string to append to it
100 */
101 char * strcat(char * dest, const char * src)
102 {
103 char *tmp = dest;
104
105 while (*dest)
106 dest++;
107 while ((*dest++ = *src++) != '\0')
108 ;
109
110 return tmp;
111 }
112 #endif
113
114 #ifndef __HAVE_ARCH_STRNCAT
115 /**
116 * strncat - Append a length-limited, %NUL-terminated string to another
117 * @dest: The string to be appended to
118 * @src: The string to append to it
119 * @count: The maximum numbers of bytes to copy
120 *
121 * Note that in contrast to strncpy, strncat ensures the result is
122 * terminated.
123 */
124 char * strncat(char *dest, const char *src, size_t count)
125 {
126 char *tmp = dest;
127
128 if (count) {
129 while (*dest)
130 dest++;
131 while ((*dest++ = *src++)) {
132 if (--count == 0) {
133 *dest = '\0';
134 break;
135 }
136 }
137 }
138
139 return tmp;
140 }
141 #endif
142
143 #ifndef __HAVE_ARCH_STRCMP
144 /**
145 * strcmp - Compare two strings
146 * @cs: One string
147 * @ct: Another string
148 */
149 int strcmp(const char * cs,const char * ct)
150 {
151 register signed char __res;
152
153 while (1) {
154 if ((__res = *cs - *ct++) != 0 || !*cs++)
155 break;
156 }
157
158 return __res;
159 }
160 #endif
161
162 #ifndef __HAVE_ARCH_STRNCMP
163 /**
164 * strncmp - Compare two length-limited strings
165 * @cs: One string
166 * @ct: Another string
167 * @count: The maximum number of bytes to compare
168 */
169 int strncmp(const char * cs,const char * ct,size_t count)
170 {
171 register signed char __res = 0;
172
173 while (count) {
174 if ((__res = *cs - *ct++) != 0 || !*cs++)
175 break;
176 count--;
177 }
178
179 return __res;
180 }
181 #endif
182
183 #ifndef __HAVE_ARCH_STRCHR
184 /**
185 * strchr - Find the first occurrence of a character in a string
186 * @s: The string to be searched
187 * @c: The character to search for
188 */
189 char * strchr(const char * s, int c)
190 {
191 for(; *s != (char) c; ++s)
192 if (*s == '\0')
193 return NULL;
194 return (char *) s;
195 }
196 #endif
197
198 #ifndef __HAVE_ARCH_STRRCHR
199 /**
200 * strrchr - Find the last occurrence of a character in a string
201 * @s: The string to be searched
202 * @c: The character to search for
203 */
204 char * strrchr(const char * s, int c)
205 {
206 const char *p = s + strlen(s);
207 do {
208 if (*p == (char)c)
209 return (char *)p;
210 } while (--p >= s);
211 return NULL;
212 }
213 #endif
214
215 #ifndef __HAVE_ARCH_STRLEN
216 /**
217 * strlen - Find the length of a string
218 * @s: The string to be sized
219 */
220 size_t strlen(const char * s)
221 {
222 const char *sc;
223
224 for (sc = s; *sc != '\0'; ++sc)
225 /* nothing */;
226 return sc - s;
227 }
228 #endif
229
230 #ifndef __HAVE_ARCH_STRNLEN
231 /**
232 * strnlen - Find the length of a length-limited string
233 * @s: The string to be sized
234 * @count: The maximum number of bytes to search
235 */
236 size_t strnlen(const char * s, size_t count)
237 {
238 const char *sc;
239
240 for (sc = s; count-- && *sc != '\0'; ++sc)
241 /* nothing */;
242 return sc - s;
243 }
244 #endif
245
246 #ifndef __HAVE_ARCH_STRSPN
247 /**
248 * strspn - Calculate the length of the initial substring of @s which only
249 * contain letters in @accept
250 * @s: The string to be searched
251 * @accept: The string to search for
252 */
253 size_t strspn(const char *s, const char *accept)
254 {
255 const char *p;
256 const char *a;
257 size_t count = 0;
258
259 for (p = s; *p != '\0'; ++p) {
260 for (a = accept; *a != '\0'; ++a) {
261 if (*p == *a)
262 break;
263 }
264 if (*a == '\0')
265 return count;
266 ++count;
267 }
268
269 return count;
270 }
271 #endif
272
273 #ifndef __HAVE_ARCH_STRPBRK
274 /**
275 * strpbrk - Find the first occurrence of a set of characters
276 * @cs: The string to be searched
277 * @ct: The characters to search for
278 */
279 char * strpbrk(const char * cs,const char * ct)
280 {
281 const char *sc1,*sc2;
282
283 for( sc1 = cs; *sc1 != '\0'; ++sc1) {
284 for( sc2 = ct; *sc2 != '\0'; ++sc2) {
285 if (*sc1 == *sc2)
286 return (char *) sc1;
287 }
288 }
289 return NULL;
290 }
291 #endif
292
293 #ifndef __HAVE_ARCH_STRTOK
294 /**
295 * strtok - Split a string into tokens
296 * @s: The string to be searched
297 * @ct: The characters to search for
298 *
299 * WARNING: strtok is deprecated, use strsep instead.
300 */
301 char * strtok(char * s,const char * ct)
302 {
303 char *sbegin, *send;
304
305 sbegin = s ? s : ___strtok;
306 if (!sbegin) {
307 return NULL;
308 }
309 sbegin += strspn(sbegin,ct);
310 if (*sbegin == '\0') {
311 ___strtok = NULL;
312 return( NULL );
313 }
314 send = strpbrk( sbegin, ct);
315 if (send && *send != '\0')
316 *send++ = '\0';
317 ___strtok = send;
318 return (sbegin);
319 }
320 #endif
321
322 #ifndef __HAVE_ARCH_STRSEP
323 /**
324 * strsep - Split a string into tokens
325 * @s: The string to be searched
326 * @ct: The characters to search for
327 *
328 * strsep() updates @s to point after the token, ready for the next call.
329 *
330 * It returns empty tokens, too, behaving exactly like the libc function
331 * of that name. In fact, it was stolen from glibc2 and de-fancy-fied.
332 * Same semantics, slimmer shape. ;)
333 */
334 char * strsep(char **s, const char *ct)
335 {
336 char *sbegin = *s, *end;
337
338 if (sbegin == NULL)
339 return NULL;
340
341 end = strpbrk(sbegin, ct);
342 if (end)
343 *end++ = '\0';
344 *s = end;
345
346 return sbegin;
347 }
348 #endif
349
350 #ifndef __HAVE_ARCH_MEMSET
351 /**
352 * memset - Fill a region of memory with the given value
353 * @s: Pointer to the start of the area.
354 * @c: The byte to fill the area with
355 * @count: The size of the area.
356 *
357 * Do not use memset() to access IO space, use memset_io() instead.
358 */
359 void * memset(void * s,int c,size_t count)
360 {
361 char *xs = (char *) s;
362
363 while (count--)
364 *xs++ = c;
365
366 return s;
367 }
368 #endif
369
370 #ifndef __HAVE_ARCH_BCOPY
371 /**
372 * bcopy - Copy one area of memory to another
373 * @src: Where to copy from
374 * @dest: Where to copy to
375 * @count: The size of the area.
376 *
377 * Note that this is the same as memcpy(), with the arguments reversed.
378 * memcpy() is the standard, bcopy() is a legacy BSD function.
379 *
380 * You should not use this function to access IO space, use memcpy_toio()
381 * or memcpy_fromio() instead.
382 */
383 char * bcopy(const char * src, char * dest, int count)
384 {
385 char *tmp = dest;
386
387 while (count--)
388 *tmp++ = *src++;
389
390 return dest;
391 }
392 #endif
393
394 #ifndef __HAVE_ARCH_MEMCPY
395 /**
396 * memcpy - Copy one area of memory to another
397 * @dest: Where to copy to
398 * @src: Where to copy from
399 * @count: The size of the area.
400 *
401 * You should not use this function to access IO space, use memcpy_toio()
402 * or memcpy_fromio() instead.
403 */
404 void * memcpy(void * dest,const void *src,size_t count)
405 {
406 char *tmp = (char *) dest, *s = (char *) src;
407
408 while (count--)
409 *tmp++ = *s++;
410
411 return dest;
412 }
413 #endif
414
415 #ifndef __HAVE_ARCH_MEMMOVE
416 /**
417 * memmove - Copy one area of memory to another
418 * @dest: Where to copy to
419 * @src: Where to copy from
420 * @count: The size of the area.
421 *
422 * Unlike memcpy(), memmove() copes with overlapping areas.
423 */
424 void * memmove(void * dest,const void *src,size_t count)
425 {
426 char *tmp, *s;
427
428 if (dest <= src) {
429 tmp = (char *) dest;
430 s = (char *) src;
431 while (count--)
432 *tmp++ = *s++;
433 }
434 else {
435 tmp = (char *) dest + count;
436 s = (char *) src + count;
437 while (count--)
438 *--tmp = *--s;
439 }
440
441 return dest;
442 }
443 #endif
444
445 #ifndef __HAVE_ARCH_MEMCMP
446 /**
447 * memcmp - Compare two areas of memory
448 * @cs: One area of memory
449 * @ct: Another area of memory
450 * @count: The size of the area.
451 */
452 int memcmp(const void * cs,const void * ct,size_t count)
453 {
454 const unsigned char *su1, *su2;
455 int res = 0;
456
457 for( su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
458 if ((res = *su1 - *su2) != 0)
459 break;
460 return res;
461 }
462 #endif
463
464 #ifndef __HAVE_ARCH_MEMSCAN
465 /**
466 * memscan - Find a character in an area of memory.
467 * @addr: The memory area
468 * @c: The byte to search for
469 * @size: The size of the area.
470 *
471 * returns the address of the first occurrence of @c, or 1 byte past
472 * the area if @c is not found
473 */
474 void * memscan(void * addr, int c, size_t size)
475 {
476 unsigned char * p = (unsigned char *) addr;
477
478 while (size) {
479 if (*p == c)
480 return (void *) p;
481 p++;
482 size--;
483 }
484 return (void *) p;
485 }
486 #endif
487
488 #ifndef __HAVE_ARCH_STRSTR
489 /**
490 * strstr - Find the first substring in a %NUL terminated string
491 * @s1: The string to be searched
492 * @s2: The string to search for
493 */
494 char * strstr(const char * s1,const char * s2)
495 {
496 int l1, l2;
497
498 l2 = strlen(s2);
499 if (!l2)
500 return (char *) s1;
501 l1 = strlen(s1);
502 while (l1 >= l2) {
503 l1--;
504 if (!memcmp(s1,s2,l2))
505 return (char *) s1;
506 s1++;
507 }
508 return NULL;
509 }
510 #endif
511
512 #ifndef __HAVE_ARCH_MEMCHR
513 /**
514 * memchr - Find a character in an area of memory.
515 * @s: The memory area
516 * @c: The byte to search for
517 * @n: The size of the area.
518 *
519 * returns the address of the first occurrence of @c, or %NULL
520 * if @c is not found
521 */
522 void *memchr(const void *s, int c, size_t n)
523 {
524 const unsigned char *p = s;
525 while (n-- != 0) {
526 if ((unsigned char)c == *p++) {
527 return (void *)(p-1);
528 }
529 }
530 return NULL;
531 }
532
533 #endif
534

Thursday, December 22, 2005

Reversing a linked list

link reverse(link x)
{ link t, y = x, r = NULL;
while (y != NULL)
{ t = y->next; y->next = r; r = y; y = t; }
return r;
}

Deleting from a linked list

Deleting from a Linked List

1. Linked List example:

Here, we will discuss deleting an element with a specific value from a linked list.

Here are some constraints for our example:

1. The list holds char's.
2. Deletion can occur anywhere in the list.
3. The delete operation will not return the element that we delete, it only need remove it from the list.

Now, recall the basic structure of a singly-linked list:

link
|
v
--------- --------- ---------
| a | --+---> | b | --+---> | c | 0 |
--------- --------- ---------

In other words, there is a node for each element, where nodes consist of a part in which the element is stored and a link to the next node. The last node links to nothing (i.e., there are no nodes after it). Also, there is a link to the beginning of the list.

What do these links correspond to in C++?

Answer: Pointers!

We'll consider our elements and nodes to have the following types:

typedef char ItemType;

class Node {
public:
...
ItemType getData();
Node *getNext();
void setNext(Node *);
private:
ItemType data;
Node *next; // pointer to node after me
};

And, the actual linked list will be a type called List:

class List {
public:
...
bool search(ItemType);
private:
Node *link; // pointer to beginning (1st node) of linked list
};

When the list is empty, what value will the pointer to the beginning have?

Answer: There are no nodes to point to, so it will be NULL!

Finally, if we want an instance of the List class, we can create one as:

List list;

2. Different cases:

There are a few steps to deleting a specific element from the list:

1. Find the node with the element (if it exists).
2. Remove that node.
3. Reconnect the linked list.
4. Update the link to the beginning (if necessary).

The order in which we perform these steps will depend on how we implement the deletion operation.

Note: For simplicity, if there are two elements with the value we want, we'll just remove the first one.

Finding the node in question is a matter of traversing the list and looking at each node's element.

Reconnecting the list once a node is to be removed is more interesting. Let's consider at least 3 cases:

* Removing a node from the beginning.
* Removing a node from the middle.
* Removing a node from the end.
Removing from the beginning
When removing the node at the beginning of the list, there is no relinking of nodes to be performed, since the first node has no preceding node. For example, removing node with a:

link
|
v
--------- --------- ---------
| a | --+---> | b | --+---> | c | 0 |
--------- --------- ---------

However, we must fix the pointer to the beginning of the list:

link
|
+-------------+
|
v
--------- --------- ---------
| a | --+---> | b | --+---> | c | 0 |
--------- --------- ---------

Removing from the middle
Removing a node from the middle requires that the preceding node skips over the node being removed. For example, removing the node with b:

link
|
v
--------- --------- ---------
| a | --+--+ | b | --+---> | c | 0 |
--------- | --------- ---------
| ^
+----------------+

This means that we need some way to refer to the node before the one we want to remove.
Removing from the end
Removing a node from the end requires that the preceding node becomes the new end of the list (i.e., points to nothing after it). For example, removing the node with c:

link
|
v
--------- --------- ---------
| a | --+---> | b | 0 | | c | 0 |
--------- --------- ---------

Note that the last two cases (middle and end) can be combined by saying that "the node preceding the one to be removed must point where the one to be removed does."

IMPORTANT NOTE: When there is only one node, and it is the one to be removed, are we in the case of removing at the beginning or end?! We should make sure that one of these cases handles removing this single node ELSE we should create a new case.

What we've learned from these pictures is that when we find the node we want to remove, we need some kind of reference to the node preceding it OR some way to backtrack to that node.
Deletion member functions
We'll write List member functions to delete an element from a list in 2 ways: iteratively and recursively (which will require a helper function):

class List {
public:
...
bool deleteIter(ItemType);
bool deleteRecur(ItemType);
private:
Node *link;
Node *delRec(Node *, ItemType); // helper function
};

The deletion functions will return a bool representing whether the element was deleted (i.e., was present in the list).

3. Iterative implementation:

Let's look at how to delete iteratively (i.e., with a loop) via a member function:

bool deleteIter(ItemType);

We'll need to be able to change the pointer to the list, link, in the case that the first node in the list is removed. Since deleteIter() is a member function of List, and link is one of the data member of that class, we can access it in deleteIter() when necessary.

We will iterate (i.e., loop) through the list to find the item to delete. It's easy to see how to start a pointer at the beginning and move it from one node to the next:

--------- --------- ---------
| x | --+---> | y | --+---> | z | --+--->
--------- --------- ---------
^
|
currP

In other words:

* Start at the beginning:

currP = link

* Advance to the next node when necessary:

currP = currP->getNext()

by calling the member function to get the next node.

* Stop when there are no more nodes, i.e., make sure that:

currP != NULL

It's easy to combine these into a for loop:

for (currP = link; currP != NULL; currP = currP->getNext()) {
...

However, when we find the one to remove, we'll also need a pointer to the previous node:

--------- --------- ---------
| x | --+---> | y | --+---> | z | --+--->
--------- --------- ---------
^ ^
| |
prevP currP

Thus, we also need to maintain a previous pointer at each step of the loop:

for (currP = link;
currP != NULL;
prevP = currP, currP = currP->getNext()) {

(Notice that there are 2 increments separated by a comma.)

To complete the function, we'll have to:

* Have some way to indicate when there is no previous node.
Remember that removing the first node is a special case because...
o It requires no relinking of nodes.
o We have to update the pointer to the beginning of the list.
* Relink the list (i.e., skip over the node to be deleted).
* Remove the node.

One implementation of this function might be:

bool List::deleteIter(ItemType item)
{
Node *currP, *prevP;

/* For 1st node, indicate there is no previous. */
prevP = NULL;

/*
* Visit each node, maintaining a pointer to
* the previous node we just visited.
*/
for (currP = link;
currP != NULL;
prevP = currP, currP = currP->getNext()) {

if (currP->getData() == item) { /* Found it. */
if (prevP == NULL) {
/* Fix beginning pointer. */
link = currP->getNext();
} else {
/*
* Fix previous node's next to
* skip over the removed node.
*/
prevP->setNext(currP->getNext());
}

delete currP; /* Deallocate the node. */
return true; /* Done searching. */
}
}

return false; /* Not in the list. */
}

We do handle the situation of removing the first node, i.e., when there is no previous. See how we use a previous pointer value of NULL to indicate this and do the right thing.

Does this function handle deletions from empty lists correctly?

Finally, this function can be called as:

list.deleteIter(item);

4. Recursive implementation:

Another way to implement a deletion function is with recursion (since recursion gives us backtracking).

The recursive version will need to receive a pointer to the node it is examining, and thus, the recursion will have to be done by a helper function:

Node *delRec(Node *, ItemType);

which will be private in class List. Users of the list will call the public member function deleteRecur(), which will mainly just call delRec() to do all the work and will return a true/false value depending on whether the item we want to delete is in the list or not.

bool deleteRecur(ItemType);

Remember that for a recursive function (i.e., a function that calls itself), we need to decide:

* Base case(s): when to stop recursing.
* Recursive part(s): how function will call itself to solve the problem.

The base cases are fairly easy, since we need to stop recursing when:

* We find the matching value.
* We search the whole list and don't find a matching value.

To determine what should be the recursive part of the function, we should consider 2 tasks required of this function:

1. Finding the one to remove.

How will we search the list for the element? Well, the list will be passed as a pointer to a node (recall the prototype--here we've named the pointer currP):

Node *delRec(Node *currP, ItemType item);

So, if we decide that the node pointed to by currP is not the right node, then we just recurse to examine the next node:

delRec(currP->getNext(), item);

and so on, until all nodes have been examined.

2. Relink the list.

When we remove a node, the previous node has to skip over the removed node. A way to achieve this with recursion is through pointer reassignment.

Consider the following, where the current call to delRec() gets a pointer to the node with x:

1. delRec(pointer-to-node-with-x, 'y');

--------- --------- ---------
| x | --+---> | y | --+---> | z | --+--->
--------- --------- ---------
^
|
currP (in 1.)

Since x is NOT the thing to be removed, another recursive call is made. This next recursive call is going to access the node with y:

1. delRec(pointer-to-node-with-x, 'y');
2. delRec(pointer-to-node-with-y, 'y');

--------- --------- ---------
| x | --+---> | y | --+---> | z | --+--->
--------- --------- ---------
^ ^
| |
currP (in 1.) currP (in 2.)

Remember that when the recursive call on the node with y (call 2) is done, we'll be back to the call on the node with x (i.e., we backtrack to call 1):

1. delRec(pointer-to-node-with-x, 'y');
2. call 2 done, back to 1.

--------- --------- ---------
| x | --+---> | y | --+---> | z | --+--->
--------- --------- ---------
^
|
currP (in 1.)

Now since y is the thing to be removed...

If there was some way that this function call on the node with y (call 2) could tell us the node with y was removed and that it pointed to the node with z, then we could fix the node with x, having it skip over y (to z).

Well, it can!! Remember that the recursive function returns a pointer to a node!

Thus, what we'll do is have each recursive call return a pointer to the node to which the previous node should point. Thus, when the node is left alone, we just return a pointer to itself (i.e., "Don't skip over me!"). When the node is removed, we return a pointer to the next node (i.e., "Skip over me!").

So, we use the return value of the recursive call as:

currP->setNext(delRec(currP->getNext(), item));

When the next node is not removed, this pointer reassignment is equivalent to if we had done:

currP->next = currP->next;

i.e., no change. However, if the next node is the one to be removed, it would be like we had done:

currP->next = currP->next->next;

and thus, skips over the removed node.

Note: This also works for removing the first node (i.e., it fixes the pointer to the beginning of the list), if the initial call to the function is:

link = delRec(link, item);

Again, to complete the function, all the things we'll have to do are:

* Determine when we hit the end of the list.
* Determine when we've found the right node.
* Deallocate the correct node.
* Return what the link in the previous call should point to.

Finally, our functions look like:

bool List::deleteRecur(ItemType item)
{
if (!search(item))
return false;
link = delRec(link, item);
return true;
}

Node *List::delRec(Node *currP, ItemType item)
{
/* See if we are at end of list. */
if (currP == NULL)
return NULL;

/*
* Check to see if current node is one
* to be deleted.
*/
if (currP->getData() == item) {
Node *tempNextP;

/* Save the next pointer in the node. */
tempNextP = currP->getNext();

/* Deallocate the node. */
delete currP;

/*
* Return the NEW pointer to where we
* were called from. I.e., the pointer
* the previous call will use to "skip
* over" the removed node.
*/
return tempNextP;
}

/*
* Check the rest of the list, fixing the next
* pointer in case the next node is the one
* removed.
*/
currP->setNext(delRec(currP->getNext(), item));

/*
* Return the pointer to where we were called
* from. Since we did not remove this node it
* will be the same.
*/
return currP;
}

This recursive implementation even works when we try to delete from an empty list (i.e., there are no nodes).

A call to delete recursively will thus look like:

list.deleteRecur(item);

BU CAS CS - Deleting from a Linked List
This page created by Jisook Youn .
Material adapted for C++ from Deleting from a Linked List (C version by Robert I. Pitts ).

Wednesday, December 21, 2005

I.I.D

Independent Identically distrbuted random variables

Monday, December 19, 2005

Inner and Outer Joins

Sunday, December 18, 2005

Select Clause

The select distinct clause on a grouped query is redundant since the groups are already distinct.

-Kalyan

Wednesday, December 14, 2005

Clinton HandShake

So to my technical blog, I thought of adding something which is very non technical but significant enough to get an entry to my blog.

I happen to get up in the morning, pretty late than my usual time at 11 a.m and saw the mail that has become a part of ablution these days :). And see this mail from my dept head that X president Bill Clinton is visiting our college, I have read about him and I always found him to be an interesting personality and to add to that I shook hands with them.

Monday, December 12, 2005

Newton Method for Square Root

#include
#include
#include
int main()
{
const double SMALLFRACTION = 1.0E-8;
double x;
double r;
cout << "Enter number : ";
cin >> x;
r = x / 2.0;
while(fabs(x - r*r) > SMALLFRACTION*x)
{
cout << r << endl;
r = 0.5 *(x / r + r);
}
cout << "Number was : " << x << ", root is "
<< r << endl;
return EXIT_SUCCESS;
}

Guess Quotient Average
1 2/1 (2+1)/2 = 1.5
1.5 2/1.5 =1.33 (1.33+1.5)/2 = 1.4167
1.4167 2/1/4167= 1.4118 (1.4167+1.41118)/2 = 1.4142

Normalization:Could not have been better

Stolen from Mike Hillyer
--------------------------

When users ask for advice about their database applications, one of the first things I try to help them with is the normalization of their table structure. Normalization is the process of removing redundant data from your tables in order to improve storage efficiency, data integrity and scalability. This improvement is balanced against an increase in complexity and potential performance losses from the joining of the normalized tables at query-time.

Mike's Bookshop

Let's say you were looking to start an online bookstore. You would need to track certain information about the books available to your site viewers, such as:

Title
Author
Author Biography
ISBN
Price
Subject

Number of Pages
Publisher
Description
Review
Reviewer Name

Let's start by adding a couple of books written by Luke Welling and Laura Thomson. Because this book has two authors, we are going to need to accommodate both in our table. Lets take a look at a typical approach I encounter:

Title	Author1	Author2	ISBN	Subject	Pages	Publisher
PHP and MySQL Web Development	Luke Welling	Laura Thomson	0672317842	PHP, MySQL	867	Sams
MySQL Tutorial	Luke Welling	Laura Thomson	0672325845	MySQL	300	Sams

Let's take a look at some issues involved in this table design:

First, this table is not very efficient with storage. Lets imagine for a second that Luke and Laura were extremely busy writers and managed to produce 500 books for our database. The combination of their two names is 25 characters long, and since we will repeat their two names in 500 rows we are wasting 25 × 500 = 12,500 bytes of storage space unnecessarily.

Second, this design does not protect data integrity. Lets once again imagine that Luke and Laura have written 500 books. Someone has had to type their names into the database 500 times, and it is very likely that one of their names will be misspelled at least once (i.e.. Thompson instead of Thomson). Our data is now corrupt, and anyone searching for book by author name will find some of the results missing. The same thing could happen with publisher name. Sams publishes hundreds of titles and if the publisher's name were misspelled even once the list of books by publisher would be missing titles.

Third, this table does not scale well. First of all, we have limited ourselves to only two authors, yet some books are written by over a dozen people. This kind of limitation is often exhibited in personal info tables where the typical design includes Phone1, Phone2, and Phone3 columns. In both cases we are limiting future growth of our data. Another limitation to scalability is that fact that with all our data in one table our one table file will grow faster and we will have more trouble with file size limitations of our underlying operating system.

First Normal Form

The normalization process involves getting our data to conform to three progressive normal forms, and a higher level of normalization cannot be achieved until the previous levels have been achieved (there are actually five normal forms, but the last two are mainly academic and will not be discussed). The First Normal Form (or 1NF) involves removal of redundant data from horizontal rows. We want to ensure that there is no duplication of data in a given row, and that every column stores the least amount of information possible (making the field atomic).

In our table above we have two violations of First Normal Form:. First, we have more than one author field, and our subject field contains more than one piece of information. With more than one value in a single field, it would be very difficult to search for all books on a given subject. In addition, with two author fields we have two fields to search in order to look for a book by a specific author

We could get around these problems by modifying our table to have only one author field, with two entries for a book with two authors, as in the following example:

Title	Author	ISBN	Subject	Pages	Publisher
PHP and MySQL Web Development	Luke Welling	0672317842	MySQL	867	Sams
PHP and MySQL Web Development	Laura Thomson	0672317842	PHP	867	Sams
MySQL Tutorial	Laura Thomson	0672325845	MySQL	300	Sams
MySQL Tutorial	Luke Welling	0672325845	MySQL	300	Sams

While this approach has no redundant columns and the subject column has only one piece of information, we do have a problem that we now have two rows for a single book. Also, to ensure that we can do a search of author and subject (i.e. books on PHP by Luke Welling), we would need four rows to ensure that we had each combination of author and subject. Additionally, we would be violating the Second Normal Form, which will be described below.

A better solution to our problem would be to separate the redundant data into separate tables, with the tables related to each other. In this case we will create an Author table and a Subject table to store our information, removing that information from the Book table:

Author Table:

Author_ID	Last Name	First Name	Bio
1	Welling	Luke	Author of books on PHP and MySQL
2	Thomson	Laura	Another author of books on PHP and MySQL

Subject Table:

Subject_ID	Subject
1	PHP
2	MySQL

Book Table:

ISBN	Title	Pages	Publisher
0672317842	PHP and MySQL Web Development	867	Sams
0672325845	MySQL Tutorial	300	Sams

While creating the Author table, I also split author name down into separate first name and last name columns, in order to follow the requirement for storing as little information as possible in a given column. Each table has a primary key value which can be used for joining tables together when querying the data. A primary key value must be unique with in the table (no two books can have the same ISBN number), and a primary key is also an INDEX, which speeds up data retrieval based on the primary key.

Defining Relationships

As you can see, while our data is now split up, relationships between the table have not been defined. There are various types of relationships that can exist between two tables:

One to (Zero or) One
One to (Zero or) Many
Many to Many

The relationship between the Book table and the Author table is a many-to-many relationship: A book can have more than one author, and an author can write more than one book. To represent a many-to-many relationship in a relational database we need a third table to serve as a link between the two:

Book_Author Table:

ISBN	Author_ID
0672317842	1
0672317842	2
0672325845	1
0672325845	2

Similarly, the Subject table also has a many-to-many relationship with the Book table, as a book can cover multiple subjects, and a subject can be explained by multiple books:

Book_Subject Table:

ISBN	Subject_ID
0672317842	1
0672317842	2
0672325845	2

The one-to-many relationship will be covered in the next section. As you can see, we now have established the relationships between the Book, Author, and Subject tables. A book can have an unlimited number of authors, and can refer to an unlimited number of subjects. We can also easily search for books by a given author or referring to a given subject.

In the linking tables above the values stored refer to primary key values from the Book, Author, and Subject tables. Columns in a table that refer to primary keys from another table are known as foreign keys, and serve the purpose of defining data relationships. In database systems which support referential integrity, such as the InnoDB storage engine for MySQL, defining a column as a foreign key will also allow to database software to enforce the relationships you define. For example, with foreign keys defined, the InnoDB storage engine will not allow you to insert a row into the Book_Subject table unless the book and subject in question already exist in the book and subject tables. Such systems will also prevent the deletion of books from the book table that have 'child' entries in the book_subject or book_author tables.

Second Normal Form

Where the First Normal Form deals with redundancy of data across a horizontal row, Second Normal Form (or 2NF) deals with redundancy of data in vertical columns. As stated earlier, the normal forms are progressive, so to achieve Second Normal Form, your tables must already be in First Normal Form. Lets take another look at our new Book table:

Book Table:

ISBN	Title	Pages	Publisher
0672317842	PHP and MySQL Web Development	867	Sams
0672325845	MySQL Tutorial	300	Sams

This table is in First Normal Form; we do not repeat the same data in any two columns, and each column holds only the minimum amount of data. However, in the Publisher column we have the same publisher repeating in each row. This is a violation of Second Normal Form. As I stated above, this leaves the chance for spelling errors to occur. The user needs to type in the publisher name each time and such an approach is also inefficient in regards to storage usage, and the more data a table has the more IO requests are needed to scan the table, resulting in slower queries.

To normalize this table to Second Normal Form, we will once again break data off to a separate table. In this case we will move publisher information to a separate table as follows:

Publisher Table:

Publisher_ID	Publisher Name
1	Sams

In this case we have a one-to-many relationship between the book table and the publisher. A given book has only one publisher (for our purposes), and a publisher will publish many books. When we have a one-to-many relationship, we place a foreign key in the many, pointing to the primary key of the one. Here is the new Book table:

Book Table:

ISBN	Title	Pages	Publisher_ID
0672317842	PHP and MySQL Web Development	867	1
0672325845	MySQL Tutorial	300	1

Since the Book table is the 'many' portion of our one-to-many relationship, we have placed the primary key value of the 'one' portion in the Publisher_ID column as a foreign key.

The other requirement for Second Normal Form is that you cannot have any data in a table with a composite key that does not relate to all portions of the composite key. A composite key is a primary key that incorporates more than one column, as with our Book_Author table:

Let's say we wanted to add in tracking of reviews and started with the following table:

Review Table:

ISBN	Reviewer_ID	Review_Text	Reviewer_Name
0672325845	1	What a great book! I learned a lot!	Mike Hillyer

In this table the combination of Reviewer_ID and ISBN form the primary key (a composite key), ensuring that no reviewer writes more than one review for the same book. In this case the reviewer name is only related to the Reviewer_ID, and not the ISBN, and is therefore in violation of Second Normal Form.

In this case we solve the problem by having a separate table for the data that relates to only one part of the key. If the portion of the primary key that the field relates to is a foreign key to another table, move the data there (as in this situation where the Reviewer_ID will be a separate table):

Reviewer Table:

Reviewer_ID	Reviewer_Name	Reviewer_House	Reviewer_Street	Reviewer_City	Reviewer_Province	Reviewer_PostalCode
1	Mike Hillyer	123	Main Street	Calgary	Alberta	T1S-2N2

Review Table:

ISBN	Reviewer_ID	Review_Text
0672325845	1	What a great book! I learned a lot!

Third Normal Form

I have a confession to make; I do not often use Third Normal Form. In Third Normal Form we are looking for data in our tables that is not fully dependant on the primary key, but dependant on another value in the table. In the reviewer table above the Reviewer_Street and Reviewer_City fields are really dependant on the Reviewer_PostalCode and not the Reviewer_ID. To bring this table into compliance with Third Normal Form, we would need a table based on Postal Code:

PostalCode Table:

Postal_Code	Street	City
T1S-2N2	Main Street	Calgary

Now of course this new table violates Second Normal Form as the Street and City will be vertically redundant, and must be broken off to separate Street and City tables, and Province will need to be in it's own table which the City table will refer to as a foreign key.

The point of the last example is that Normalization is a tradeoff. In fact, there are two additional normal forms that are generally recognized, but are mainly academic. Additional normalization results in more complex JOIN queries, and this can cause a decrease in performance. In some cases performance can even be increased by de-normalizing table data, but de-normalization is beyond the scope of this article.

Conclusion

It is generally a good idea to make sure that your data at least conforms to Second Normal Form. Our goal is to avoid data redundancy to prevent corruption and make the best possible use of storage. Make sure that the same value is not stored in more than one place. With data in multiple locations we have to perform multiple updates when data needs to be changed, and corruption can begin to creep into the database.

Third Normal Form removed even more data redundancy, but at the cost of simplicity and performance. In our example above, do we really expect the City and Street names to change very regularly? In this situation Third Normal Form still prevents misspelling of City and Street names, but it is up to you to find a balance between Normalization and Speed/Simplicity. In an upcoming article we will look at how to form queries that join our normalized data together.

Sunday, December 04, 2005

Difference between char* and char[]

From K& R

char amessage[] = "Hello Word"; // This is an array
char*pmessage = "Hello World"; // This is a pointer

This is immediately visible in strtok function in C, which breaks the sentence into words

if I take something in this direction
char*origstring = "abc,def,ghi,jkl";
char* sep = ",";

char*delimstring = strtok(origstring,sep);
This would result in an error

Could be overcomed by the following solution
char origstring[] = "abc,def,ghi,jkl";
char* sep = ",";

char*delimstring = strtok(origstring,sep);

-Kalyan

Thursday, December 01, 2005

What is continous and discrete data?

So i had this basic doubt today of what is continous and discrete data :) , funny but I googled it to find

Stolen from isixsigma.com

Discrete Data :
Discrete data is information that can be categorized into a classification. Discrete data is based on counts. Only a finite number of values is possible, and the values cannot be subdivided meaningfully. For example, the number of parts damaged in shipment.

Attribute data (aka Discrete data) is data that can't be broken down into a smaller unit and add additional meaning. It is typically things counted in whole numbers. There is no such thing as 'half a defect.' Population data is attribute because you are generally counting people and putting them into various catagories (i.e. you are counting their 'attributes'). I know, you were about to ask about the '2.4 kids' statistic when they talk about average house holds. But that actually illustrates my point. Who ever heard of .4 of a person. It doesn't really add addition 'meaning' to the description.

Continous Data:
ontinuous data is information that can be measured on a continuum or scale. Continuous data can have almost any numeric value and can be meaningfully subdivided into finer and finer increments, depending upon the precision of the measurement system.

As opposed to discrete data like good or bad, off or on, etc., continuous data can be recorded at many different points (length, size, width, time, temperature, cost, etc.).

Continuous data is data that can be measured and broken down into smaller parts and still have meaning. Money, temperature and time are continous.Volume (like volume of water or air) and size are continuous data.

Let's say you are measuring the size of a marble. To be within specification, the marble must be at least 25mm but no bigger than 27mm. If you measure and simply count the number of marbles that are out of spec (good vs bad) you are collecting attribute data. However, if you are actually measuring each marble and recording the size (i.e 25.2mm, 26.1mm, 27.5mm, etc) that's continuous data, and you actually get more information about what you're measuring from continuous data than from attribute data.

Data can be continuous in the geometry or continuous in the range of values. The range of values for a particular data item has a minimum and a maximum value. Continuous data can be any value in between.

It is the data that can be measured on a scale.

-Kalyan

Unary Operators in C

Unary operators in C associate from right to left