We continue our discussion on statistical analysis in Natural Language Processing
Word2vec uses continuous bag of words model that predicts a word based on the surrounding words and the skip gram model that predicts the surrounding words based on the current word. The skip gram models the conditional probability of a context c given a word w with probability theta using the softmax function
The denominator in the softmax function is the sum of such expectations for i,j ranging over the entire vocabulary where vw and vw' are input and output vector representations of word w in vocabulary W. This is a heavily optimized expression but the idea is that each word tries to predict every word around it. Further research by Levy and Goldberg tried to predict the word not based on surrounding words but based on context where context is not just words but anything that contributes to context
The denominator is also rather large. Also a summation over hundreds of thousands of contexts using the high dimensional vectors for each input and output vector representation of word w in vocabulary W is too expensive to compute. While Word2Vec optimizes this with hierarchical selection, they state their assumption as the selection of context for a word such that it is better than the say expected value of all others, results in good word embedding. This follows from the belief that the improvement comes because similar words have similar vectors. Intuitively, this gives us the notion of thesaurus. People use different words to mean the same thing and their purports often manifest similarity in the tightness between the word and its context in the sampling window.
#codingquestion
Given a stream of digits, evaluate the possible decodings if leading, trailing and consecutive zeros are excluded
For example, 121 => ABA , AU, LA
if A= 1, B =2, .. Z = 26
void decode(List<int>digits, int n, ref stringbuilder s, ref List<string> results)
{
if (n == 0) {Console.Write(s); results.Add(s.reverse()); return;}
if (n == 1) { s += ToLetter(digits, n-1, 1); Console.Write(s); results.Add(s.reverse()); return; }
if (digits[n-1] > 0){
s += ToLetter(digits, n-1, 1);
decode(digits, n-1, ref s, ref results);
s.RemoveLast();
}
if (digits[n-2] < 2 || (digits[n-2] == 2 && digits[n-1] < 7)){
s += ToLetter(digits, n-2, 2);
decode(digits, n, ref s, ref results);
s.RemoveLast();
}
}
void decodeStream(Stream numbers, ref List<string> results)
{
var digits = new List<int>();
var current = numbers.Read();
while ( current != EOF){
digits.Add(current.ToInteger());
s = new StringBuilder();
decode( digits, digits.count(), ref s, ref results);
current = numbers.Read();
}
}
Word2vec uses continuous bag of words model that predicts a word based on the surrounding words and the skip gram model that predicts the surrounding words based on the current word. The skip gram models the conditional probability of a context c given a word w with probability theta using the softmax function
The denominator in the softmax function is the sum of such expectations for i,j ranging over the entire vocabulary where vw and vw' are input and output vector representations of word w in vocabulary W. This is a heavily optimized expression but the idea is that each word tries to predict every word around it. Further research by Levy and Goldberg tried to predict the word not based on surrounding words but based on context where context is not just words but anything that contributes to context
The denominator is also rather large. Also a summation over hundreds of thousands of contexts using the high dimensional vectors for each input and output vector representation of word w in vocabulary W is too expensive to compute. While Word2Vec optimizes this with hierarchical selection, they state their assumption as the selection of context for a word such that it is better than the say expected value of all others, results in good word embedding. This follows from the belief that the improvement comes because similar words have similar vectors. Intuitively, this gives us the notion of thesaurus. People use different words to mean the same thing and their purports often manifest similarity in the tightness between the word and its context in the sampling window.
#codingquestion
Given a stream of digits, evaluate the possible decodings if leading, trailing and consecutive zeros are excluded
For example, 121 => ABA , AU, LA
if A= 1, B =2, .. Z = 26
void decode(List<int>digits, int n, ref stringbuilder s, ref List<string> results)
{
if (n == 0) {Console.Write(s); results.Add(s.reverse()); return;}
if (n == 1) { s += ToLetter(digits, n-1, 1); Console.Write(s); results.Add(s.reverse()); return; }
if (digits[n-1] > 0){
s += ToLetter(digits, n-1, 1);
decode(digits, n-1, ref s, ref results);
s.RemoveLast();
}
if (digits[n-2] < 2 || (digits[n-2] == 2 && digits[n-1] < 7)){
s += ToLetter(digits, n-2, 2);
decode(digits, n, ref s, ref results);
s.RemoveLast();
}
}
void decodeStream(Stream numbers, ref List<string> results)
{
var digits = new List<int>();
var current = numbers.Read();
while ( current != EOF){
digits.Add(current.ToInteger());
s = new StringBuilder();
decode( digits, digits.count(), ref s, ref results);
current = numbers.Read();
}
}
Convert a BST into inorder singly linked list in place.
Node toList(node root)
{
If (root == null) return root;
If (root.left != null)
{
var left = toList(root.left);
for( ; left.right != null; left = left.right);
Left.right = root;
}
If (root.right != null)
{
var right = toList(root);
for(; right.left != null; right = right.left);
root.right = right;
}
Return root;
}
Node toList(node root)
{
If (root == null) return root;
If (root.left != null)
{
var left = toList(root.left);
for( ; left.right != null; left = left.right);
Left.right = root;
}
If (root.right != null)
{
var right = toList(root);
for(; right.left != null; right = right.left);
root.right = right;
}
Return root;
}
No comments:
Post a Comment