Thursday, December 29, 2005

Merge two presorted lists


//Function to merge two pre-sorted arrays
void merge_sort(apvector &arrayA, apvector &arrayB, apvector &arrayC)
int indexA = 0; // initialize the Array Indices
int indexB = 0;
int indexC = 0;

while((indexA < arrayA.length( )) && (indexB < arrayB.length( ))
if (arrayA[indexA] < arrayB[indexB])
arrayC[indexC] = arrayA[indexA];
indexA++; //increase the index
arrayC[indexC] = arrayB[indexB];
indexB++; //increase the index
indexC++; //move to the next position in the new array
// Push remaining elements to end of new array when 1 feeder array is empty
while (indexA < arrayA.length( ))
arrayC[indexC] = arrayA[indexA];
while (indexB < arrayB.length( ))
arrayC[indexC] = arrayB[indexB];

Wednesday, December 28, 2005


Sum of the elements of the linked list:
int sum_list(struct list_node *l)
if(l == NULL)
return 0;
return + sum_list(;

Friday, December 23, 2005

string libraries

Stolen from

1 /*
2 * linux/lib/string.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 /*
8 * stupid library routines.. The optimized versions should generally be found
9 * as inline code in
10 *
11 * These are buggy as well..
12 *
13 * * Fri Jun 25 1999, Ingo Oeser
14 * - Added strsep() which will replace strtok() soon (because strsep() is
15 * reentrant and should be faster). Use only strsep() in new code, please.
16 */
18 #include
19 #include
20 #include
23 /**
24 * strnicmp - Case insensitive, length-limited string comparison
25 * @s1: One string
26 * @s2: The other string
27 * @len: the maximum number of characters to compare
28 */
29 int strnicmp(const char *s1, const char *s2, size_t len)
30 {
31 /* Yes, Virginia, it had better be unsigned */
32 unsigned char c1, c2;
34 c1 = 0; c2 = 0;
35 if (len) {
36 do {
37 c1 = *s1; c2 = *s2;
38 s1++; s2++;
39 if (!c1)
40 break;
41 if (!c2)
42 break;
43 if (c1 == c2)
44 continue;
45 c1 = tolower(c1);
46 c2 = tolower(c2);
47 if (c1 != c2)
48 break;
49 } while (--len);
50 }
51 return (int)c1 - (int)c2;
52 }
53 #endif
55 char * ___strtok;
57 #ifndef __HAVE_ARCH_STRCPY
58 /**
59 * strcpy - Copy a %NUL terminated string
60 * @dest: Where to copy the string to
61 * @src: Where to copy the string from
62 */
63 char * strcpy(char * dest,const char *src)
64 {
65 char *tmp = dest;
67 while ((*dest++ = *src++) != '\0')
68 /* nothing */;
69 return tmp;
70 }
71 #endif
73 #ifndef __HAVE_ARCH_STRNCPY
74 /**
75 * strncpy - Copy a length-limited, %NUL-terminated string
76 * @dest: Where to copy the string to
77 * @src: Where to copy the string from
78 * @count: The maximum number of bytes to copy
79 *
80 * Note that unlike userspace strncpy, this does not %NUL-pad the buffer.
81 * However, the result is not %NUL-terminated if the source exceeds
82 * @count bytes.
83 */
84 char * strncpy(char * dest,const char *src,size_t count)
85 {
86 char *tmp = dest;
88 while (count-- && (*dest++ = *src++) != '\0')
89 /* nothing */;
91 return tmp;
92 }
93 #endif
95 #ifndef __HAVE_ARCH_STRCAT
96 /**
97 * strcat - Append one %NUL-terminated string to another
98 * @dest: The string to be appended to
99 * @src: The string to append to it
100 */
101 char * strcat(char * dest, const char * src)
102 {
103 char *tmp = dest;
105 while (*dest)
106 dest++;
107 while ((*dest++ = *src++) != '\0')
108 ;
110 return tmp;
111 }
112 #endif
114 #ifndef __HAVE_ARCH_STRNCAT
115 /**
116 * strncat - Append a length-limited, %NUL-terminated string to another
117 * @dest: The string to be appended to
118 * @src: The string to append to it
119 * @count: The maximum numbers of bytes to copy
120 *
121 * Note that in contrast to strncpy, strncat ensures the result is
122 * terminated.
123 */
124 char * strncat(char *dest, const char *src, size_t count)
125 {
126 char *tmp = dest;
128 if (count) {
129 while (*dest)
130 dest++;
131 while ((*dest++ = *src++)) {
132 if (--count == 0) {
133 *dest = '\0';
134 break;
135 }
136 }
137 }
139 return tmp;
140 }
141 #endif
143 #ifndef __HAVE_ARCH_STRCMP
144 /**
145 * strcmp - Compare two strings
146 * @cs: One string
147 * @ct: Another string
148 */
149 int strcmp(const char * cs,const char * ct)
150 {
151 register signed char __res;
153 while (1) {
154 if ((__res = *cs - *ct++) != 0 || !*cs++)
155 break;
156 }
158 return __res;
159 }
160 #endif
162 #ifndef __HAVE_ARCH_STRNCMP
163 /**
164 * strncmp - Compare two length-limited strings
165 * @cs: One string
166 * @ct: Another string
167 * @count: The maximum number of bytes to compare
168 */
169 int strncmp(const char * cs,const char * ct,size_t count)
170 {
171 register signed char __res = 0;
173 while (count) {
174 if ((__res = *cs - *ct++) != 0 || !*cs++)
175 break;
176 count--;
177 }
179 return __res;
180 }
181 #endif
183 #ifndef __HAVE_ARCH_STRCHR
184 /**
185 * strchr - Find the first occurrence of a character in a string
186 * @s: The string to be searched
187 * @c: The character to search for
188 */
189 char * strchr(const char * s, int c)
190 {
191 for(; *s != (char) c; ++s)
192 if (*s == '\0')
193 return NULL;
194 return (char *) s;
195 }
196 #endif
198 #ifndef __HAVE_ARCH_STRRCHR
199 /**
200 * strrchr - Find the last occurrence of a character in a string
201 * @s: The string to be searched
202 * @c: The character to search for
203 */
204 char * strrchr(const char * s, int c)
205 {
206 const char *p = s + strlen(s);
207 do {
208 if (*p == (char)c)
209 return (char *)p;
210 } while (--p >= s);
211 return NULL;
212 }
213 #endif
215 #ifndef __HAVE_ARCH_STRLEN
216 /**
217 * strlen - Find the length of a string
218 * @s: The string to be sized
219 */
220 size_t strlen(const char * s)
221 {
222 const char *sc;
224 for (sc = s; *sc != '\0'; ++sc)
225 /* nothing */;
226 return sc - s;
227 }
228 #endif
230 #ifndef __HAVE_ARCH_STRNLEN
231 /**
232 * strnlen - Find the length of a length-limited string
233 * @s: The string to be sized
234 * @count: The maximum number of bytes to search
235 */
236 size_t strnlen(const char * s, size_t count)
237 {
238 const char *sc;
240 for (sc = s; count-- && *sc != '\0'; ++sc)
241 /* nothing */;
242 return sc - s;
243 }
244 #endif
246 #ifndef __HAVE_ARCH_STRSPN
247 /**
248 * strspn - Calculate the length of the initial substring of @s which only
249 * contain letters in @accept
250 * @s: The string to be searched
251 * @accept: The string to search for
252 */
253 size_t strspn(const char *s, const char *accept)
254 {
255 const char *p;
256 const char *a;
257 size_t count = 0;
259 for (p = s; *p != '\0'; ++p) {
260 for (a = accept; *a != '\0'; ++a) {
261 if (*p == *a)
262 break;
263 }
264 if (*a == '\0')
265 return count;
266 ++count;
267 }
269 return count;
270 }
271 #endif
273 #ifndef __HAVE_ARCH_STRPBRK
274 /**
275 * strpbrk - Find the first occurrence of a set of characters
276 * @cs: The string to be searched
277 * @ct: The characters to search for
278 */
279 char * strpbrk(const char * cs,const char * ct)
280 {
281 const char *sc1,*sc2;
283 for( sc1 = cs; *sc1 != '\0'; ++sc1) {
284 for( sc2 = ct; *sc2 != '\0'; ++sc2) {
285 if (*sc1 == *sc2)
286 return (char *) sc1;
287 }
288 }
289 return NULL;
290 }
291 #endif
293 #ifndef __HAVE_ARCH_STRTOK
294 /**
295 * strtok - Split a string into tokens
296 * @s: The string to be searched
297 * @ct: The characters to search for
298 *
299 * WARNING: strtok is deprecated, use strsep instead.
300 */
301 char * strtok(char * s,const char * ct)
302 {
303 char *sbegin, *send;
305 sbegin = s ? s : ___strtok;
306 if (!sbegin) {
307 return NULL;
308 }
309 sbegin += strspn(sbegin,ct);
310 if (*sbegin == '\0') {
311 ___strtok = NULL;
312 return( NULL );
313 }
314 send = strpbrk( sbegin, ct);
315 if (send && *send != '\0')
316 *send++ = '\0';
317 ___strtok = send;
318 return (sbegin);
319 }
320 #endif
322 #ifndef __HAVE_ARCH_STRSEP
323 /**
324 * strsep - Split a string into tokens
325 * @s: The string to be searched
326 * @ct: The characters to search for
327 *
328 * strsep() updates @s to point after the token, ready for the next call.
329 *
330 * It returns empty tokens, too, behaving exactly like the libc function
331 * of that name. In fact, it was stolen from glibc2 and de-fancy-fied.
332 * Same semantics, slimmer shape. ;)
333 */
334 char * strsep(char **s, const char *ct)
335 {
336 char *sbegin = *s, *end;
338 if (sbegin == NULL)
339 return NULL;
341 end = strpbrk(sbegin, ct);
342 if (end)
343 *end++ = '\0';
344 *s = end;
346 return sbegin;
347 }
348 #endif
350 #ifndef __HAVE_ARCH_MEMSET
351 /**
352 * memset - Fill a region of memory with the given value
353 * @s: Pointer to the start of the area.
354 * @c: The byte to fill the area with
355 * @count: The size of the area.
356 *
357 * Do not use memset() to access IO space, use memset_io() instead.
358 */
359 void * memset(void * s,int c,size_t count)
360 {
361 char *xs = (char *) s;
363 while (count--)
364 *xs++ = c;
366 return s;
367 }
368 #endif
370 #ifndef __HAVE_ARCH_BCOPY
371 /**
372 * bcopy - Copy one area of memory to another
373 * @src: Where to copy from
374 * @dest: Where to copy to
375 * @count: The size of the area.
376 *
377 * Note that this is the same as memcpy(), with the arguments reversed.
378 * memcpy() is the standard, bcopy() is a legacy BSD function.
379 *
380 * You should not use this function to access IO space, use memcpy_toio()
381 * or memcpy_fromio() instead.
382 */
383 char * bcopy(const char * src, char * dest, int count)
384 {
385 char *tmp = dest;
387 while (count--)
388 *tmp++ = *src++;
390 return dest;
391 }
392 #endif
394 #ifndef __HAVE_ARCH_MEMCPY
395 /**
396 * memcpy - Copy one area of memory to another
397 * @dest: Where to copy to
398 * @src: Where to copy from
399 * @count: The size of the area.
400 *
401 * You should not use this function to access IO space, use memcpy_toio()
402 * or memcpy_fromio() instead.
403 */
404 void * memcpy(void * dest,const void *src,size_t count)
405 {
406 char *tmp = (char *) dest, *s = (char *) src;
408 while (count--)
409 *tmp++ = *s++;
411 return dest;
412 }
413 #endif
415 #ifndef __HAVE_ARCH_MEMMOVE
416 /**
417 * memmove - Copy one area of memory to another
418 * @dest: Where to copy to
419 * @src: Where to copy from
420 * @count: The size of the area.
421 *
422 * Unlike memcpy(), memmove() copes with overlapping areas.
423 */
424 void * memmove(void * dest,const void *src,size_t count)
425 {
426 char *tmp, *s;
428 if (dest <= src) {
429 tmp = (char *) dest;
430 s = (char *) src;
431 while (count--)
432 *tmp++ = *s++;
433 }
434 else {
435 tmp = (char *) dest + count;
436 s = (char *) src + count;
437 while (count--)
438 *--tmp = *--s;
439 }
441 return dest;
442 }
443 #endif
445 #ifndef __HAVE_ARCH_MEMCMP
446 /**
447 * memcmp - Compare two areas of memory
448 * @cs: One area of memory
449 * @ct: Another area of memory
450 * @count: The size of the area.
451 */
452 int memcmp(const void * cs,const void * ct,size_t count)
453 {
454 const unsigned char *su1, *su2;
455 int res = 0;
457 for( su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
458 if ((res = *su1 - *su2) != 0)
459 break;
460 return res;
461 }
462 #endif
464 #ifndef __HAVE_ARCH_MEMSCAN
465 /**
466 * memscan - Find a character in an area of memory.
467 * @addr: The memory area
468 * @c: The byte to search for
469 * @size: The size of the area.
470 *
471 * returns the address of the first occurrence of @c, or 1 byte past
472 * the area if @c is not found
473 */
474 void * memscan(void * addr, int c, size_t size)
475 {
476 unsigned char * p = (unsigned char *) addr;
478 while (size) {
479 if (*p == c)
480 return (void *) p;
481 p++;
482 size--;
483 }
484 return (void *) p;
485 }
486 #endif
488 #ifndef __HAVE_ARCH_STRSTR
489 /**
490 * strstr - Find the first substring in a %NUL terminated string
491 * @s1: The string to be searched
492 * @s2: The string to search for
493 */
494 char * strstr(const char * s1,const char * s2)
495 {
496 int l1, l2;
498 l2 = strlen(s2);
499 if (!l2)
500 return (char *) s1;
501 l1 = strlen(s1);
502 while (l1 >= l2) {
503 l1--;
504 if (!memcmp(s1,s2,l2))
505 return (char *) s1;
506 s1++;
507 }
508 return NULL;
509 }
510 #endif
512 #ifndef __HAVE_ARCH_MEMCHR
513 /**
514 * memchr - Find a character in an area of memory.
515 * @s: The memory area
516 * @c: The byte to search for
517 * @n: The size of the area.
518 *
519 * returns the address of the first occurrence of @c, or %NULL
520 * if @c is not found
521 */
522 void *memchr(const void *s, int c, size_t n)
523 {
524 const unsigned char *p = s;
525 while (n-- != 0) {
526 if ((unsigned char)c == *p++) {
527 return (void *)(p-1);
528 }
529 }
530 return NULL;
531 }
533 #endif

Thursday, December 22, 2005

Reversing a linked list

link reverse(link x)
{ link t, y = x, r = NULL;
while (y != NULL)
{ t = y->next; y->next = r; r = y; y = t; }
return r;

Deleting from a linked list

Deleting from a Linked List

1. Linked List example:

Here, we will discuss deleting an element with a specific value from a linked list.

Here are some constraints for our example:

1. The list holds char's.
2. Deletion can occur anywhere in the list.
3. The delete operation will not return the element that we delete, it only need remove it from the list.

Now, recall the basic structure of a singly-linked list:

--------- --------- ---------
| a | --+---> | b | --+---> | c | 0 |
--------- --------- ---------

In other words, there is a node for each element, where nodes consist of a part in which the element is stored and a link to the next node. The last node links to nothing (i.e., there are no nodes after it). Also, there is a link to the beginning of the list.

What do these links correspond to in C++?

Answer: Pointers!

We'll consider our elements and nodes to have the following types:

typedef char ItemType;

class Node {
ItemType getData();
Node *getNext();
void setNext(Node *);
ItemType data;
Node *next; // pointer to node after me

And, the actual linked list will be a type called List:

class List {
bool search(ItemType);
Node *link; // pointer to beginning (1st node) of linked list

When the list is empty, what value will the pointer to the beginning have?

Answer: There are no nodes to point to, so it will be NULL!

Finally, if we want an instance of the List class, we can create one as:

List list;

2. Different cases:

There are a few steps to deleting a specific element from the list:

1. Find the node with the element (if it exists).
2. Remove that node.
3. Reconnect the linked list.
4. Update the link to the beginning (if necessary).

The order in which we perform these steps will depend on how we implement the deletion operation.

Note: For simplicity, if there are two elements with the value we want, we'll just remove the first one.

Finding the node in question is a matter of traversing the list and looking at each node's element.

Reconnecting the list once a node is to be removed is more interesting. Let's consider at least 3 cases:

* Removing a node from the beginning.
* Removing a node from the middle.
* Removing a node from the end.
Removing from the beginning
When removing the node at the beginning of the list, there is no relinking of nodes to be performed, since the first node has no preceding node. For example, removing node with a:

--------- --------- ---------
| a | --+---> | b | --+---> | c | 0 |
--------- --------- ---------

However, we must fix the pointer to the beginning of the list:

--------- --------- ---------
| a | --+---> | b | --+---> | c | 0 |
--------- --------- ---------

Removing from the middle
Removing a node from the middle requires that the preceding node skips over the node being removed. For example, removing the node with b:

--------- --------- ---------
| a | --+--+ | b | --+---> | c | 0 |
--------- | --------- ---------
| ^

This means that we need some way to refer to the node before the one we want to remove.
Removing from the end
Removing a node from the end requires that the preceding node becomes the new end of the list (i.e., points to nothing after it). For example, removing the node with c:

--------- --------- ---------
| a | --+---> | b | 0 | | c | 0 |
--------- --------- ---------

Note that the last two cases (middle and end) can be combined by saying that "the node preceding the one to be removed must point where the one to be removed does."

IMPORTANT NOTE: When there is only one node, and it is the one to be removed, are we in the case of removing at the beginning or end?! We should make sure that one of these cases handles removing this single node ELSE we should create a new case.

What we've learned from these pictures is that when we find the node we want to remove, we need some kind of reference to the node preceding it OR some way to backtrack to that node.
Deletion member functions
We'll write List member functions to delete an element from a list in 2 ways: iteratively and recursively (which will require a helper function):

class List {
bool deleteIter(ItemType);
bool deleteRecur(ItemType);
Node *link;
Node *delRec(Node *, ItemType); // helper function

The deletion functions will return a bool representing whether the element was deleted (i.e., was present in the list).

3. Iterative implementation:

Let's look at how to delete iteratively (i.e., with a loop) via a member function:

bool deleteIter(ItemType);

We'll need to be able to change the pointer to the list, link, in the case that the first node in the list is removed. Since deleteIter() is a member function of List, and link is one of the data member of that class, we can access it in deleteIter() when necessary.

We will iterate (i.e., loop) through the list to find the item to delete. It's easy to see how to start a pointer at the beginning and move it from one node to the next:

--------- --------- ---------
| x | --+---> | y | --+---> | z | --+--->
--------- --------- ---------

In other words:

* Start at the beginning:

currP = link

* Advance to the next node when necessary:

currP = currP->getNext()

by calling the member function to get the next node.

* Stop when there are no more nodes, i.e., make sure that:

currP != NULL

It's easy to combine these into a for loop:

for (currP = link; currP != NULL; currP = currP->getNext()) {

However, when we find the one to remove, we'll also need a pointer to the previous node:

--------- --------- ---------
| x | --+---> | y | --+---> | z | --+--->
--------- --------- ---------
^ ^
| |
prevP currP

Thus, we also need to maintain a previous pointer at each step of the loop:

for (currP = link;
currP != NULL;
prevP = currP, currP = currP->getNext()) {

(Notice that there are 2 increments separated by a comma.)

To complete the function, we'll have to:

* Have some way to indicate when there is no previous node.
Remember that removing the first node is a special case because...
o It requires no relinking of nodes.
o We have to update the pointer to the beginning of the list.
* Relink the list (i.e., skip over the node to be deleted).
* Remove the node.

One implementation of this function might be:

bool List::deleteIter(ItemType item)
Node *currP, *prevP;

/* For 1st node, indicate there is no previous. */
prevP = NULL;

* Visit each node, maintaining a pointer to
* the previous node we just visited.
for (currP = link;
currP != NULL;
prevP = currP, currP = currP->getNext()) {

if (currP->getData() == item) { /* Found it. */
if (prevP == NULL) {
/* Fix beginning pointer. */
link = currP->getNext();
} else {
* Fix previous node's next to
* skip over the removed node.

delete currP; /* Deallocate the node. */
return true; /* Done searching. */

return false; /* Not in the list. */

We do handle the situation of removing the first node, i.e., when there is no previous. See how we use a previous pointer value of NULL to indicate this and do the right thing.

Does this function handle deletions from empty lists correctly?

Finally, this function can be called as:


4. Recursive implementation:

Another way to implement a deletion function is with recursion (since recursion gives us backtracking).

The recursive version will need to receive a pointer to the node it is examining, and thus, the recursion will have to be done by a helper function:

Node *delRec(Node *, ItemType);

which will be private in class List. Users of the list will call the public member function deleteRecur(), which will mainly just call delRec() to do all the work and will return a true/false value depending on whether the item we want to delete is in the list or not.

bool deleteRecur(ItemType);

Remember that for a recursive function (i.e., a function that calls itself), we need to decide:

* Base case(s): when to stop recursing.
* Recursive part(s): how function will call itself to solve the problem.

The base cases are fairly easy, since we need to stop recursing when:

* We find the matching value.
* We search the whole list and don't find a matching value.

To determine what should be the recursive part of the function, we should consider 2 tasks required of this function:

1. Finding the one to remove.

How will we search the list for the element? Well, the list will be passed as a pointer to a node (recall the prototype--here we've named the pointer currP):

Node *delRec(Node *currP, ItemType item);

So, if we decide that the node pointed to by currP is not the right node, then we just recurse to examine the next node:

delRec(currP->getNext(), item);

and so on, until all nodes have been examined.

2. Relink the list.

When we remove a node, the previous node has to skip over the removed node. A way to achieve this with recursion is through pointer reassignment.

Consider the following, where the current call to delRec() gets a pointer to the node with x:

1. delRec(pointer-to-node-with-x, 'y');

--------- --------- ---------
| x | --+---> | y | --+---> | z | --+--->
--------- --------- ---------
currP (in 1.)

Since x is NOT the thing to be removed, another recursive call is made. This next recursive call is going to access the node with y:

1. delRec(pointer-to-node-with-x, 'y');
2. delRec(pointer-to-node-with-y, 'y');

--------- --------- ---------
| x | --+---> | y | --+---> | z | --+--->
--------- --------- ---------
^ ^
| |
currP (in 1.) currP (in 2.)

Remember that when the recursive call on the node with y (call 2) is done, we'll be back to the call on the node with x (i.e., we backtrack to call 1):

1. delRec(pointer-to-node-with-x, 'y');
2. call 2 done, back to 1.

--------- --------- ---------
| x | --+---> | y | --+---> | z | --+--->
--------- --------- ---------
currP (in 1.)

Now since y is the thing to be removed...

If there was some way that this function call on the node with y (call 2) could tell us the node with y was removed and that it pointed to the node with z, then we could fix the node with x, having it skip over y (to z).

Well, it can!! Remember that the recursive function returns a pointer to a node!

Thus, what we'll do is have each recursive call return a pointer to the node to which the previous node should point. Thus, when the node is left alone, we just return a pointer to itself (i.e., "Don't skip over me!"). When the node is removed, we return a pointer to the next node (i.e., "Skip over me!").

So, we use the return value of the recursive call as:

currP->setNext(delRec(currP->getNext(), item));

When the next node is not removed, this pointer reassignment is equivalent to if we had done:

currP->next = currP->next;

i.e., no change. However, if the next node is the one to be removed, it would be like we had done:

currP->next = currP->next->next;

and thus, skips over the removed node.

Note: This also works for removing the first node (i.e., it fixes the pointer to the beginning of the list), if the initial call to the function is:

link = delRec(link, item);

Again, to complete the function, all the things we'll have to do are:

* Determine when we hit the end of the list.
* Determine when we've found the right node.
* Deallocate the correct node.
* Return what the link in the previous call should point to.

Finally, our functions look like:

bool List::deleteRecur(ItemType item)
if (!search(item))
return false;
link = delRec(link, item);
return true;

Node *List::delRec(Node *currP, ItemType item)
/* See if we are at end of list. */
if (currP == NULL)
return NULL;

* Check to see if current node is one
* to be deleted.
if (currP->getData() == item) {
Node *tempNextP;

/* Save the next pointer in the node. */
tempNextP = currP->getNext();

/* Deallocate the node. */
delete currP;

* Return the NEW pointer to where we
* were called from. I.e., the pointer
* the previous call will use to "skip
* over" the removed node.
return tempNextP;

* Check the rest of the list, fixing the next
* pointer in case the next node is the one
* removed.
currP->setNext(delRec(currP->getNext(), item));

* Return the pointer to where we were called
* from. Since we did not remove this node it
* will be the same.
return currP;

This recursive implementation even works when we try to delete from an empty list (i.e., there are no nodes).

A call to delete recursively will thus look like:


BU CAS CS - Deleting from a Linked List
This page created by Jisook Youn .
Material adapted for C++ from Deleting from a Linked List (C version by Robert I. Pitts ).

Wednesday, December 21, 2005


Independent Identically distrbuted random variables

Monday, December 19, 2005

Sunday, December 18, 2005

Select Clause

The select distinct clause on a grouped query is redundant since the groups are already distinct.


Wednesday, December 14, 2005

Clinton HandShake

So to my technical blog, I thought of adding something which is very non technical but significant enough to get an entry to my blog.

I happen to get up in the morning, pretty late than my usual time at 11 a.m and saw the mail that has become a part of ablution these days :). And see this mail from my dept head that X president Bill Clinton is visiting our college, I have read about him and I always found him to be an interesting personality and to add to that I shook hands with them.

Monday, December 12, 2005

Newton Method for Square Root

int main()
const double SMALLFRACTION = 1.0E-8;
double x;
double r;
cout << "Enter number : ";
cin >> x;
r = x / 2.0;
while(fabs(x - r*r) > SMALLFRACTION*x)
cout << r << endl;
r = 0.5 *(x / r + r);
cout << "Number was : " << x << ", root is "
<< r << endl;

Guess Quotient Average
1 2/1 (2+1)/2 = 1.5
1.5 2/1.5 =1.33 (1.33+1.5)/2 = 1.4167
1.4167 2/1/4167= 1.4118 (1.4167+1.41118)/2 = 1.4142

Normalization:Could not have been better

Stolen from Mike Hillyer

When users ask for advice about their database applications, one of the first things I try to help them with is the normalization of their table structure. Normalization is the process of removing redundant data from your tables in order to improve storage efficiency, data integrity and scalability. This improvement is balanced against an increase in complexity and potential performance losses from the joining of the normalized tables at query-time.

Mike's Bookshop

Let's say you were looking to start an online bookstore. You would need to track certain information about the books available to your site viewers, such as:

  • Title
  • Author
  • Author Biography
  • ISBN
  • Price
  • Subject
  • Number of Pages
  • Publisher
  • Description
  • Review
  • Reviewer Name

Let's start by adding a couple of books written by Luke Welling and Laura Thomson. Because this book has two authors, we are going to need to accommodate both in our table. Lets take a look at a typical approach I encounter:

Title Author1 Author2 ISBN Subject Pages Publisher
PHP and MySQL Web Development Luke Welling Laura Thomson 0672317842 PHP, MySQL 867 Sams
MySQL Tutorial Luke Welling Laura Thomson 0672325845 MySQL 300 Sams

Let's take a look at some issues involved in this table design:

First, this table is not very efficient with storage. Lets imagine for a second that Luke and Laura were extremely busy writers and managed to produce 500 books for our database. The combination of their two names is 25 characters long, and since we will repeat their two names in 500 rows we are wasting 25 × 500 = 12,500 bytes of storage space unnecessarily.

Second, this design does not protect data integrity. Lets once again imagine that Luke and Laura have written 500 books. Someone has had to type their names into the database 500 times, and it is very likely that one of their names will be misspelled at least once (i.e.. Thompson instead of Thomson). Our data is now corrupt, and anyone searching for book by author name will find some of the results missing. The same thing could happen with publisher name. Sams publishes hundreds of titles and if the publisher's name were misspelled even once the list of books by publisher would be missing titles.

Third, this table does not scale well. First of all, we have limited ourselves to only two authors, yet some books are written by over a dozen people. This kind of limitation is often exhibited in personal info tables where the typical design includes Phone1, Phone2, and Phone3 columns. In both cases we are limiting future growth of our data. Another limitation to scalability is that fact that with all our data in one table our one table file will grow faster and we will have more trouble with file size limitations of our underlying operating system.

First Normal Form

The normalization process involves getting our data to conform to three progressive normal forms, and a higher level of normalization cannot be achieved until the previous levels have been achieved (there are actually five normal forms, but the last two are mainly academic and will not be discussed). The First Normal Form (or 1NF) involves removal of redundant data from horizontal rows. We want to ensure that there is no duplication of data in a given row, and that every column stores the least amount of information possible (making the field atomic).

In our table above we have two violations of First Normal Form:. First, we have more than one author field, and our subject field contains more than one piece of information. With more than one value in a single field, it would be very difficult to search for all books on a given subject. In addition, with two author fields we have two fields to search in order to look for a book by a specific author

We could get around these problems by modifying our table to have only one author field, with two entries for a book with two authors, as in the following example:

Title Author ISBN Subject Pages Publisher
PHP and MySQL Web Development Luke Welling 0672317842 MySQL 867 Sams
PHP and MySQL Web Development Laura Thomson 0672317842 PHP 867 Sams
MySQL Tutorial Laura Thomson 0672325845 MySQL 300 Sams
MySQL Tutorial Luke Welling 0672325845 MySQL 300 Sams

While this approach has no redundant columns and the subject column has only one piece of information, we do have a problem that we now have two rows for a single book. Also, to ensure that we can do a search of author and subject (i.e. books on PHP by Luke Welling), we would need four rows to ensure that we had each combination of author and subject. Additionally, we would be violating the Second Normal Form, which will be described below.

A better solution to our problem would be to separate the redundant data into separate tables, with the tables related to each other. In this case we will create an Author table and a Subject table to store our information, removing that information from the Book table:

Author Table:

Author_ID Last Name First Name Bio
1 Welling Luke Author of books on PHP and MySQL
2 Thomson Laura Another author of books on PHP and MySQL

Subject Table:

Subject_ID Subject

Book Table:

ISBN Title Pages Publisher
0672317842 PHP and MySQL Web Development 867 Sams
0672325845 MySQL Tutorial 300 Sams

While creating the Author table, I also split author name down into separate first name and last name columns, in order to follow the requirement for storing as little information as possible in a given column. Each table has a primary key value which can be used for joining tables together when querying the data. A primary key value must be unique with in the table (no two books can have the same ISBN number), and a primary key is also an INDEX, which speeds up data retrieval based on the primary key.

Defining Relationships

As you can see, while our data is now split up, relationships between the table have not been defined. There are various types of relationships that can exist between two tables:

  • One to (Zero or) One
  • One to (Zero or) Many
  • Many to Many

The relationship between the Book table and the Author table is a many-to-many relationship: A book can have more than one author, and an author can write more than one book. To represent a many-to-many relationship in a relational database we need a third table to serve as a link between the two:

Book_Author Table:

ISBN Author_ID
0672317842 1
0672317842 2
0672325845 1
0672325845 2

Similarly, the Subject table also has a many-to-many relationship with the Book table, as a book can cover multiple subjects, and a subject can be explained by multiple books:

Book_Subject Table:

ISBN Subject_ID
0672317842 1
0672317842 2
0672325845 2

The one-to-many relationship will be covered in the next section. As you can see, we now have established the relationships between the Book, Author, and Subject tables. A book can have an unlimited number of authors, and can refer to an unlimited number of subjects. We can also easily search for books by a given author or referring to a given subject.

In the linking tables above the values stored refer to primary key values from the Book, Author, and Subject tables. Columns in a table that refer to primary keys from another table are known as foreign keys, and serve the purpose of defining data relationships. In database systems which support referential integrity, such as the InnoDB storage engine for MySQL, defining a column as a foreign key will also allow to database software to enforce the relationships you define. For example, with foreign keys defined, the InnoDB storage engine will not allow you to insert a row into the Book_Subject table unless the book and subject in question already exist in the book and subject tables. Such systems will also prevent the deletion of books from the book table that have 'child' entries in the book_subject or book_author tables.

Second Normal Form

Where the First Normal Form deals with redundancy of data across a horizontal row, Second Normal Form (or 2NF) deals with redundancy of data in vertical columns. As stated earlier, the normal forms are progressive, so to achieve Second Normal Form, your tables must already be in First Normal Form. Lets take another look at our new Book table:

Book Table:

ISBN Title Pages Publisher
0672317842 PHP and MySQL Web Development 867 Sams
0672325845 MySQL Tutorial 300 Sams

This table is in First Normal Form; we do not repeat the same data in any two columns, and each column holds only the minimum amount of data. However, in the Publisher column we have the same publisher repeating in each row. This is a violation of Second Normal Form. As I stated above, this leaves the chance for spelling errors to occur. The user needs to type in the publisher name each time and such an approach is also inefficient in regards to storage usage, and the more data a table has the more IO requests are needed to scan the table, resulting in slower queries.

To normalize this table to Second Normal Form, we will once again break data off to a separate table. In this case we will move publisher information to a separate table as follows:

Publisher Table:

Publisher_ID Publisher Name
1 Sams

In this case we have a one-to-many relationship between the book table and the publisher. A given book has only one publisher (for our purposes), and a publisher will publish many books. When we have a one-to-many relationship, we place a foreign key in the many, pointing to the primary key of the one. Here is the new Book table:

Book Table:

ISBN Title Pages Publisher_ID
0672317842 PHP and MySQL Web Development 867 1
0672325845 MySQL Tutorial 300 1

Since the Book table is the 'many' portion of our one-to-many relationship, we have placed the primary key value of the 'one' portion in the Publisher_ID column as a foreign key.

The other requirement for Second Normal Form is that you cannot have any data in a table with a composite key that does not relate to all portions of the composite key. A composite key is a primary key that incorporates more than one column, as with our Book_Author table:

Let's say we wanted to add in tracking of reviews and started with the following table:

Review Table:

ISBN Reviewer_ID Review_Text Reviewer_Name
0672325845 1 What a great book! I learned a lot! Mike Hillyer

In this table the combination of Reviewer_ID and ISBN form the primary key (a composite key), ensuring that no reviewer writes more than one review for the same book. In this case the reviewer name is only related to the Reviewer_ID, and not the ISBN, and is therefore in violation of Second Normal Form.

In this case we solve the problem by having a separate table for the data that relates to only one part of the key. If the portion of the primary key that the field relates to is a foreign key to another table, move the data there (as in this situation where the Reviewer_ID will be a separate table):

Reviewer Table:

Reviewer_ID Reviewer_Name Reviewer_House Reviewer_Street Reviewer_City Reviewer_Province Reviewer_PostalCode
1 Mike Hillyer 123 Main Street Calgary Alberta T1S-2N2

Review Table:

ISBN Reviewer_ID Review_Text
0672325845 1 What a great book! I learned a lot!

Third Normal Form

I have a confession to make; I do not often use Third Normal Form. In Third Normal Form we are looking for data in our tables that is not fully dependant on the primary key, but dependant on another value in the table. In the reviewer table above the Reviewer_Street and Reviewer_City fields are really dependant on the Reviewer_PostalCode and not the Reviewer_ID. To bring this table into compliance with Third Normal Form, we would need a table based on Postal Code:

PostalCode Table:

Postal_Code Street City
T1S-2N2 Main Street Calgary

Now of course this new table violates Second Normal Form as the Street and City will be vertically redundant, and must be broken off to separate Street and City tables, and Province will need to be in it's own table which the City table will refer to as a foreign key.

The point of the last example is that Normalization is a tradeoff. In fact, there are two additional normal forms that are generally recognized, but are mainly academic. Additional normalization results in more complex JOIN queries, and this can cause a decrease in performance. In some cases performance can even be increased by de-normalizing table data, but de-normalization is beyond the scope of this article.


It is generally a good idea to make sure that your data at least conforms to Second Normal Form. Our goal is to avoid data redundancy to prevent corruption and make the best possible use of storage. Make sure that the same value is not stored in more than one place. With data in multiple locations we have to perform multiple updates when data needs to be changed, and corruption can begin to creep into the database.

Third Normal Form removed even more data redundancy, but at the cost of simplicity and performance. In our example above, do we really expect the City and Street names to change very regularly? In this situation Third Normal Form still prevents misspelling of City and Street names, but it is up to you to find a balance between Normalization and Speed/Simplicity. In an upcoming article we will look at how to form queries that join our normalized data together.

Sunday, December 04, 2005

Difference between char* and char[]

From K& R

char amessage[] = "Hello Word"; // This is an array
char*pmessage = "Hello World"; // This is a pointer

This is immediately visible in strtok function in C, which breaks the sentence into words

if I take something in this direction
char*origstring = "abc,def,ghi,jkl";
char* sep = ",";

char*delimstring = strtok(origstring,sep);
This would result in an error

Could be overcomed by the following solution
char origstring[] = "abc,def,ghi,jkl";
char* sep = ",";

char*delimstring = strtok(origstring,sep);


Thursday, December 01, 2005

What is continous and discrete data?

So i had this basic doubt today of what is continous and discrete data :) , funny but I googled it to find

Stolen from

Discrete Data :
Discrete data is information that can be categorized into a classification. Discrete data is based on counts. Only a finite number of values is possible, and the values cannot be subdivided meaningfully. For example, the number of parts damaged in shipment.

Attribute data (aka Discrete data) is data that can't be broken down into a smaller unit and add additional meaning. It is typically things counted in whole numbers. There is no such thing as 'half a defect.' Population data is attribute because you are generally counting people and putting them into various catagories (i.e. you are counting their 'attributes'). I know, you were about to ask about the '2.4 kids' statistic when they talk about average house holds. But that actually illustrates my point. Who ever heard of .4 of a person. It doesn't really add addition 'meaning' to the description.

Continous Data:
ontinuous data is information that can be measured on a continuum or scale. Continuous data can have almost any numeric value and can be meaningfully subdivided into finer and finer increments, depending upon the precision of the measurement system.

As opposed to discrete data like good or bad, off or on, etc., continuous data can be recorded at many different points (length, size, width, time, temperature, cost, etc.).

Continuous data is data that can be measured and broken down into smaller parts and still have meaning. Money, temperature and time are continous.Volume (like volume of water or air) and size are continuous data.

Let's say you are measuring the size of a marble. To be within specification, the marble must be at least 25mm but no bigger than 27mm. If you measure and simply count the number of marbles that are out of spec (good vs bad) you are collecting attribute data. However, if you are actually measuring each marble and recording the size (i.e 25.2mm, 26.1mm, 27.5mm, etc) that's continuous data, and you actually get more information about what you're measuring from continuous data than from attribute data.

Data can be continuous in the geometry or continuous in the range of values. The range of values for a particular data item has a minimum and a maximum value. Continuous data can be any value in between.

It is the data that can be measured on a scale.


Unary Operators in C

Unary operators in C associate from right to left

Monday, November 28, 2005

Unions in C

Stolen from GOTW

Unions allow more than one object, of either class or builtin type, to occupy the same space in memory. For example:

// Example 1
union U
int i;
float f;

U u;

u.i = 42; // ok, now i is active
std::cout << u.i << std::endl;

u.f = 3.14f; // ok, now f is active
std::cout <<>

But only one of the types can be "active" at a time -- after all, the storage can after all only hold one value at a time. Also, unions only support some kinds of types, which leads us into the next question:

Friday, November 25, 2005

Reviewing a Paper

Original Text by Bill Griswold

The questions you want to have answered by reading a paper are the following:
  • What are motivations for this work? For a research paper, there is an expectation that a problem has been solved that no one else has published in the literature. This problem intrinsically has two parts. The first is often unstated. Think of this as the people problem. The people problem is the benefits that are desired in the world at large; for example some issue of quality of life, such as saved time or increased safety. The second part is the technical problem, which is why the people problem does not have a trivial solution; that is, why a new technological or engineering solution may be required. Implicitly there is implication that previous solutions to the problem are inadequate. Occasionally an author will fail to state either point, making your job much more difficult.
  • What is the proposed solution? This is also called the hypothesis or idea. There should also be an argument about why the solution solves the problem better than previous solutions. There should also be a discussion about how the solution is achieved (designed and implemented) or is at least achievable.
  • What is the evaluation of the proposed solution? An idea alone is usually not adequate for publication of a research paper. What argument and/or experiment is made to make a case for the value of the ideas? What benefits or problems are identified? Are they convincing?
  • What are the contributions? The contributions in a paper may be many and varied. Ideas, software, experimental techniques, and area survey are a few key possibilities.
  • What are future directions for this research? Not only what future directions do the authors identify, but what ideas did you come up with while reading the paper?
More to Add
Copied from Harvard Site

Many conferences ask you to judge the paper (usually on a scale of 1 to 5) in a number of categories. For

  • Relevance: How relevant is the paper to the conference?
  • Presentation: How well-written is the paper? Is it totally incomprehensible or lucid and eloquent?
  • Originality: How novel is the paper? Are the technical ideas presented new?
  • Correctness: Is the paper technically correct? Are the experiments performed or the analysis presented valid?
  • Confidence: How well-versed are you, the reviewer, in this area? Are you an expert in the field and confident your feedback is correct or are you unfamiliar with the field and unsure of your feedback?
  • Overall: What is your overall rating for this paper? Do you enthusiastically support acceptance of this paper into the conference, or would you be embarrassed to be on a Program Committee thataccepted a paper like this?

Sunday, November 20, 2005

Java Collections

A set is a collection that cannot contain duplicate elements.
A list is an ordered collection, it may contain duplicate elements.

How is shuffle implemented in java?

Java Serialization

Look at the sun documentation
Serialization API

We all know the Java platform allows us to create reusable objects in memory. However, all of those objects exist only as long as the Java virtual machine1 remains running. It would be nice if the objects we create could exist beyond the lifetime of the virtual machine, wouldn't it? Well, with object serialization, you can flatten your objects and reuse them in powerful ways.

Object serialization is the process of saving an object's state to a sequence of bytes, as well as the process of rebuilding those bytes into a live object at some future time. The Java Serialization API provides a standard mechanism for developers to handle object serialization. The API is small and easy to use, provided the classes and methods are understood.

Throughout this article, we'll examine how to persist your Java objects, starting with the basics and proceeding to the more advanced concepts. We'll learn three different ways to perform serialization -- using the default protocol, customizing the default protocol, and creating our own protocol -- and we'll investigate concerns that arise with any persistence scheme such as object caching, version control, and performance issues.

By the conclusion of this article, you should have a solid comprehension of that powerful yet sometimes poorly understood Java API.

First Things First: The Default Mechanism

Let's start with the basics. To persist an object in Java, we must have a persistent object. An object is marked serializable by implementing the interface, which signifies to the underlying API that the object can be flattened into bytes and subsequently inflated in the future.

Let's look at a persistent class we'll use to demonstrate the serialization mechanism:

10 import;
20 import java.util.Date;
30 import java.util.Calendar;
40 public class PersistentTime implements Serializable
50 {
60 private Date time;
80 public PersistentTime()
90 {
100 time = Calendar.getInstance().getTime();
110 }
130 public Date getTime()
140 {
150 return time;
160 }
170 }

As you can see, the only thing we had to do differently from creating a normal class is implement the interface on line 40. The completely empty Serializable is only a marker interface -- it simply allows the serialization mechanism to verify that the class is able to be persisted. Thus, we turn to the first rule of serialization:

Rule #1: The object to be persisted must implement the Serializable interface or inherit that implementation from its object hierarchy.

The next step is to actually persist the object. That is done with the class. That class is a filter stream--it is wrapped around a lower-level byte stream (called a node stream) to handle the serialization protocol for us. Node streams can be used to write to file systems or even across sockets. That means we could easily transfer a flattened object across a network wire and have it be rebuilt on the other side!

Take a look at the code used to save the PersistentTime object:

10 import;
20 import;
30 import;
40 public class FlattenTime
50 {
60 public static void main(String [] args)
70 {
80 String filename = "time.ser";
90 if(args.length > 0)
100 {
110 filename = args[0];
120 }
130 PersistentTime time = new PersistentTime();
140 FileOutputStream fos = null;
150 ObjectOutputStream out = null;
160 try
170 {
180 fos = new FileOutputStream(filename);
190 out = new ObjectOutputStream(fos);
200 out.writeObject(time);
210 out.close();
220 }
230 catch(IOException ex)
240 {
250 ex.printStackTrace();
260 }
270 }
280 }

The real work happens on line 200 when we call the ObjectOutputStream.writeObject() method, which kicks off the serialization mechanism and the object is flattened (in that case to a file).

To restore the file, we can employ the following code:

10 import;
20 import;
30 import;
40 import java.util.Calendar;
50 public class InflateTime
60 {
70 public static void main(String [] args)
80 {
90 String filename = "time.ser";
100 if(args.length > 0)
110 {
120 filename = args[0];
130 }
140 PersistentTime time = null;
150 FileInputStream fis = null;
160 ObjectInputStream in = null;
170 try
180 {
190 fis = new FileInputStream(filename);
200 in = new ObjectInputStream(fis);
210 time = (PersistentTime)in.readObject();
220 in.close();
230 }
240 catch(IOException ex)
250 {
260 ex.printStackTrace();
270 }
280 catch(ClassNotFoundException ex)
290 {
300 ex.printStackTrace();
310 }
320 // print out restored time
330 System.out.println("Flattened time: " + time.getTime());
340 System.out.println();
350 // print out the current time
360 System.out.println("Current time: " + Calendar.getInstance().getTime());
370 }

In the code above, the object's restoration occurs on line 210 with the ObjectInputStream.readObject() method call. The method call reads in the raw bytes that we previously persisted and creates a live object that is an exact replica of the original. Because readObject() can read any serializable object, a cast to the correct type is required. With that in mind, the class file must be accessible from the system in which the restoration occurs. In other words, the object's class file and methods are not saved; only the object's state is saved.

Later, on line 360, we simply call the getTime() method to retrieve the time that the original object flattened. The flatten time is compared to the current time to demonstrate that the mechanism indeed worked as expected.

Nonserializable Objects

The basic mechanism of Java serialization is simple to use, but there are some more things to know. As mentioned before, only objects marked Serializable can be persisted. The java.lang.Object class does not implement that interface. Therefore, not all the objects in Java can be persisted automatically. The good news is that most of them -- like AWT and Swing GUI components, strings, and arrays -- are serializable.

On the other hand, certain system-level classes such as Thread, OutputStream and its subclasses, and Socket are not serializable. Indeed, it would not make any sense if they were. For example, thread running in my JVM would be using my system's memory. Persisting it and trying to run it in your JVM would make no sense at all. Another important point about java.lang.Object not implementing the Serializable interface is that any class you create that extends only Object (and no other serializable classes) is not serializable unless you implement the interface yourself (as done with the previous example).

That situation presents a problem: what if we have a class that contains an instance of Thread? In that case, can we ever persist objects of that type? The answer is yes, as long as we tell the serialization mechanism our intentions by marking our class's Thread object as transient.

Let's assume we want to create a class that performs an animation. I will not actually provide the animation code here, but here is the class we'll use:

10 import;
20 public class PersistentAnimation implements Serializable, Runnable
30 {
40 transient private Thread animator;
50 private int animationSpeed;
60 public PersistentAnimation(int animationSpeed)
70 {
80 this.animationSpeed = animationSpeed;
90 animator = new Thread(this);
100 animator.start();
110 }
120 public void run()
130 {
140 while(true)
150 {
160 // do animation here
170 }
180 }
190 }

When we create an instance of the PersistentAnimation class, the thread animator will be created and started as we expect. We've marked the thread on line 40 transient to tell the serialization mechanism that the field should not be saved along with the rest of that object's state (in that case, the field speed). The bottom line: you must mark transient any field that either cannot be serialized or any field you do not want serialized. Serialization does not care about access modifiers such as private -- all nontransient fields are considered part of an object's persistent state and are eligible for persistence.

Therefore, we have another rule to add. Here are both rules concerning persistent objects:

  • Rule #1: The object to be persisted must implement the Serializable interface or inherit that implementation from its object hierarchy
  • Rule #2: The object to be persisted must mark all nonserializable fields transient

Customize the Default Protocol

Let's move on to the second way to perform serialization: customize the default protocol. Though the animation code above demonstrates how a thread could be included as part of an object while still making that object be serializable, there is a major problem with it if we recall how Java creates objects. To wit, when we create an object with the new keyword, the object's constructor is called only when a new instance of a class is created. Keeping that basic fact in mind, let's revisit our animation code. First, we instantiate an object of type PersistentAnimation, which begins the animation thread sequence. Next, we serialize the object with that code:

PersistentAnimation animation = new PersistentAnimation(10);
FileOutputStream fos = ...
ObjectOutputStream out = new ObjectOutputStream(fos);

All seems fine until we read the object back in with a call to the readObject() method. Remember, a constructor is called only when a new instance is created. We are not creating a new instance here, we are restoring a persisted object. The end result is the animation object will work only once, when it is first instantiated. Kind of makes it useless to persist it, huh?

Well, there is good news. We can make our object work the way we want it to; we can make the animation restart upon restoration of the object. To accomplish that, we could, for example, create a startAnimation() helper method that does what the constructor currently does. We could then call that method from the constructor, after which we read the object back in. Not bad, but it introduces more complexity. Now, anyone who wants to use that animation object will have to know that method has to be called following the normal deserialization process. That does not make for a seamless mechanism, something the Java Serialization API promises developers.

There is, however, a strange yet crafty solution. By using a built-in feature of the serialization mechanism, developers can enhance the normal process by providing two methods inside their class files. Those methods are:

  • private void writeObject(ObjectOutputStream out) throws IOException;
  • private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException;

Notice that both methods are (and must be) declared private, proving that neither method is inherited and overridden or overloaded. The trick here is that the virtual machine will automatically check to see if either method is declared during the corresponding method call. The virtual machine can call private methods of your class whenever it wants but no other objects can. Thus, the integrity of the class is maintained and the serialization protocol can continue to work as normal. The serialization protocol is always used the same way, by calling either ObjectOutputStream.writeObject() or ObjectInputStream.readObject(). So, even though those specialized private methods are provided, the object serialization works the same way as far as any calling object is concerned.

Considering all that, let's look at a revised version of PersistentAnimation that includes those private methods to allow us to have control over the deserialization process, giving us a pseudo-constructor:

10 import;
20 public class PersistentAnimation implements Serializable, Runnable
30 {
40 transient private Thread animator;
50 private int animationSpeed;
60 public PersistentAnimation(int animationSpeed)
70 {
80 this.animationSpeed = animationSpeed;
90 startAnimation();
100 }
110 public void run()
120 {
130 while(true)
140 {
150 // do animation here
160 }
170 }
180 private void writeObject(ObjectOutputStream out) throws IOException
190 {
200 out.defaultWriteObject();
220 }
230 private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException
240 {
250 // our "pseudo-constructor"
260 in.defaultReadObject();
270 // now we are a "live" object again, so let's run rebuild and start
280 startAnimation();
300 }
310 private void startAnimation()
320 {
330 animator = new Thread(this);
340 animator.start();
350 }
360 }

Notice the first line of each of the new private methods. Those calls do what they sound like -- they perform the default writing and reading of the flattened object, which is important because we are not replacing the normal process, we are only adding to it. Those methods work because the call to ObjectOutputStream.writeObject() kicks off the serialization protocol. First, the object is checked to ensure it implements Serializable and then it is checked to see whether either of those private methods are provided. If they are provided, the stream class is passed as the parameter, giving the code control over its usage.

Those private methods can be used for any customization you need to make to the serialization process. Encryption could be added to the output and decryption to the input (note that the bytes are written and read in cleartext with no obfuscation at all). They could be used to add extra data to the stream, perhaps a company versioning code. The possibilities are truly limitless.

Stop That Serialization!

OK, we have seen quite a bit about the serialization process, now let's see some more. What if you create a class whose superclass is serializable but you do not want that new class to be serializable? You cannot unimplement an interface, so if your superclass does implement Serializable, your new class implements it, too (assuming both rules listed above are met). To stop the automatic serialization, you can once again use the private methods to just throw the NotSerializableException. Here is how that would be done:

10 private void writeObject(ObjectOutputStream out) throws IOException
20 {
30 throw new NotSerializableException("Not today!");
40 }
50 private void readObject(ObjectInputStream in) throws IOException
60 {
70 throw new NotSerializableException("Not today!");
80 }

Any attempt to write or read that object will now always result in the exception being thrown. Remember, since those methods are declared private, nobody could modify your code without the source code available to them -- no overriding of those methods would be allowed by Java.

Create Your Own Protocol: the Externalizable Interface

Our discussion would be incomplete not to mention the third option for serialization: create your own protocol with the Externalizable interface. Instead of implementing the Serializable interface, you can implement Externalizable, which contains two methods:

  • public void writeExternal(ObjectOutput out) throws IOException;
  • public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException;

Just override those methods to provide your own protocol. Unlike the previous two serialization variations, nothing is provided for free here, though. That is, the protocol is entirely in your hands. Although it's the more difficult scenario, it's also the most controllable. An example situation for that alternate type of serialization: read and write PDF files with a Java application. If you know how to write and read PDF (the sequence of bytes required), you could provide the PDF-specific protocol in the writeExternal and readExternal methods.

Just as before, though, there is no difference in how a class that implements Externalizable is used. Just call writeObject() or readObject and, voila, those externalizable methods will be called automatically.


There are a few things about the serialization protocol that can seem very strange to developers who are not aware. Of course, that is the purpose of the article -- to get you aware! So let's discuss a few of those gotchas and see if we can understand why they exist and how to handle them.

Caching Objects in the Stream

First, consider the situation in which an object is written to a stream and then written again later. By default, an ObjectOutputStream will maintain a reference to an object written to it. That means that if the state of the written object is written and then written again, the new state will not be saved! Here is a code snippet that shows that problem in action:

10 ObjectOutputStream out = new ObjectOutputStream(...);
20 MyObject obj = new MyObject(); // must be Serializable
30 obj.setState(100);
40 out.writeObject(obj); // saves object with state = 100
50 obj.setState(200);
60 out.writeObject(obj); // does not save new object state

There are two ways to control that situation. First, you could make sure to always close the stream after a write call, ensuring the new object is written out each time. Second, you could call the ObjectOutputStream.reset() method, which would tell the stream to release the cache of references it is holding so all new write calls will actually be written. Be careful, though -- the reset flushes the entire object cache, so all objects that have been written could be rewritten.

Version Control

With our second gotcha, imagine you create a class, instantiate it, and write it out to an object stream. That flattened object sits in the file system for some time. Meanwhile, you update the class file, perhaps adding a new field. What happens when you try to read in the flattened object?

Well, the bad news is that an exception will be thrown -- specifically, the -- because all persistent-capable classes are automatically given a unique identifier. If the identifier of the class does not equal the identifier of the flattened object, the exception will be thrown. However, if you really think about it, why should it be thrown just because I added a field? Couldn't the field just be set to its default value and then written out next time?

Yes, but it takes a little code manipulation. The identifier that is part of all classes is maintained in a field called serialVersionUID. If you wish to control versioning, you simply have to provide the serialVersionUID field manually and ensure it is always the same, no matter what changes you make to the classfile. You can use a utility that comes with the JDK distribution called serialver to see what that code would be by default (it is just the hash code of the object by default).

Here is an example of using serialver with a class called Baz:

> serialver Baz
> Baz: static final long serialVersionUID = 10275539472837495L;

Simply copy the returned line with the version ID and paste it into your code. (On a Windows box, you can run that utility with the - show option to simplify the copy and paste procedure.) Now, if you make any changes to the Baz class file, just ensure that same version ID is specified and all will be well.

The version control works great as long as the changes are compatible. Compatible changes include adding or removing a method or a field. Incompatible changes include changing an object's hierarchy or removing the implementation of the Serializable interface. A complete list of compatible and incompatible changes is given in the Java Serialization Specification.

Performance Considerations

Our third gotcha: the default mechanism, although simple to use, is not the best performer. I wrote out a Date object to a file 1,000 times, repeating that procedure 100 times. The average time to write out the Date object was 115 milliseconds. I then manually wrote out the Date object, using standard I/O the same number of iterations; the average time was 52 milliseconds. Almost half the time! There is often a trade-off between convenience and performance, and serialization proves no different. If speed is the primary consideration for your application, you may want to consider building a custom protocol.

Another consideration concerns the aforementioned fact that object references are cached in the output stream. Due to that, the system may not garbage collect the objects written to a stream if the stream is not closed. The best move, as always with I/O, is to close the streams as soon as possible, following the write operations.


Serialization in Java is simple to instigate and almost as simple to implement. Understanding the three different ways of implementing serialization should aid in bending the API to your will. We have seen a lot of the serialization mechanism in that article, and I hope it made things clearer and not worse. The bottom line, as with all coding, is to maintain common sense within the bounds of API familiarity. That article has laid out a strong basis of understanding the Java Serialization API, but I recommend perusing the specification to discover more fine-grained details.

Coffecup Logo

Reprinted with permission from the June 2000 edition of JavaWorld magazine. Copyright, Inc., an IDG Communications company. Register for editorial e-mailalerts

About the Author

Todd Greanier,director of technology for ComTech Training, has been teaching and developing Java since it was introduced publicly. An expert in distributed Java technologies, he teaches classes in a wide range of topics, including JDBC, RMI, CORBA, UML, Swing, servlets/JSP, security, JavaBeans, Enterprise Java Beans, and multithreading. He also creates custom seminars for corporations, slanted to their specific needs. Todd lives in upstate New York with his wife, Stacey, and his cat, Bean.

1 As used on this web site, the terms "Java virtual machine" or "JVM" mean a virtual machine for the Java platform.

Friday, November 18, 2005


Everything picked up from

What is MultiThreading and synchronization? ( From the NET)
"With respect to multithreading, synchronization is the capability to control the access of multiple threads to shared resources. Without synchonization, it is possible for one thread to modify a shared variable while another thread is in the process of using or updating same shared variable. This usually leads to significant errors."

Different ways of using thread?
The thread could be implemented by using runnable interface or by inheriting from the Thread class. The former is more advantageous, 'cause when you are going for multiple inheritance..the only interface can help."

Difference Between array and vectorlist?
Vector is synchronized whereas arraylist is not.

Final in java?
  • A final class can't be extended ie., final class may not be subclassed.
  • A final method can't be overridden when its class is inherited.
  • You can't change value of a final variable (is a constant).
What is serialization?
  • Serialization is a mechanism by which you can save the state of an object by converting it to a byte stream.
  • Whenever an object is to be sent over the network, objects need to be serialized. Moreover if the state of an object is to be saved, objects need to be serilazed
  • One should make sure that all the included objects are also serializable. If any of the objects is not serializable then it throws a NotSerializableException.
This keyword indicates that the value of this member variable does not have to be serialized with the object. When the class will be de-serialized, this variable will be initialized with a default value of its data type (i.e. zero for integers).

Different ways to handle exceptions?
There are two ways to handle exceptions,
1. By wrapping the desired code in a try block followed by a catch block to catch the exceptions. and
2. List the desired exceptions in the throws clause of the method and let the caller of the method handle those exceptions.

In the first approach as a programmer of the method, you urself are dealing with the exception. This is fine if you are in a best position to decide should be done in case of an exception. Whereas if it is not the responsibility of the method to deal with it's own exceptions, then do not use this approach. In this case use the second approach. In the second approach we are forcing the caller of the method to catch the exceptions, that the method is likely to throw. This is often the approach library creators use. They list the exception in the throws clause and we must catch them. You will find the same approach throughout the java libraries we use.

Some Gotchas..
If I write return at the end of the try block, will the finally block still execute
Yes even if you write return as the last statement in the try block and no exception occurs, the finally block will execute. The finally block will execute and then the control return.

If I write System.exit (0); at the end of the try block, will the finally block still execute?
No in this case the finally block will not execute because when you say System.exit (0); the control immediately goes out of the program, and thus finally never executes.

Sunday, November 13, 2005

SQL Creation of a New table

select * into newtable from existing table

Saturday, November 05, 2005

Const in C++

from C++ FAQ

You have to read pointer declarations right-to-left.

  • const Fred* p means "p points to a Fred that is const" — that is, the Fred object can't be changed via p.
  • Fred* const p means "p is a const pointer to a Fred" — that is, you can change the Fred object via p, but you can't change the pointer p itself.
  • const Fred* const p means "p is a const pointer to a const Fred" — that is, you can't change the pointer p itself, nor can you change the Fred object via p.

OO Concepts

What is Encapsulation?
Encapsulation is the ability to hide the internal workings of an object's behavior and its data. For instance, let's say you have a object named Car and this object has a method (another word for behavior) named start(). When you create an instance of a car object and call its start() method you are not worried about what happens to accomplish this, you just want to make sure the state of the car is changed to 'running' afterwards. This kind of behavior hiding is encapsulation and it makes programming much easier. When you want your car object to be in a 'running' state, instead of calling: fuel.on(), starter.on(), etc., you just call start(). This not only makes it easier to work with, but if the internal workings of this start() method have to change, the results will be the same.

What is Polymorphism?
Polymorphism as the name suggests is the ability of the object to be treated differently based on the data types and classes

It is provided in two fashion of Static and Dynamic Binding

Definition (Static Binding)
If the type T of a variable is explicitly associated with its name N by declaration, we say, that N is statically bound to T. The association process is called static binding.

Definition (Dynamic Binding) If the type T of a variable with name N is implicitly associated by its content, we say, that N is dynamically bound to T. The association process is called dynamic binding.

OCI Example

Wednesday, November 02, 2005

ADO.NET architecture

Reading Data from SQL Server 2005 from MSDN

public void ReadMyData(string myConnString) {
string mySelectQuery = "SELECT OrderID, CustomerID FROM Orders";
SqlConnection myConnection = new SqlConnection(myConnString);
SqlCommand myCommand = new SqlCommand(mySelectQuery,myConnection);
SqlDataReader myReader;
myReader = myCommand.ExecuteReader();
// Always call Read before accessing data.
while (myReader.Read()) {
Console.WriteLine(myReader.GetInt32(0) + ", " + myReader.GetString(1));
// always call Close when done reading.
// Close the connection when done with it.