Smart string matching algorithm - string

I'm looking for an algorithm that can be used to compare two sentences are provide a matching score. For example,
INPUT
Sentence Input 1: "A quick brown fox jumps over the lazy dog"
Sentence Input 2: "Alpha bravo dog quick beta gamma fox over the lazy dug"
OUTPUT
Output: "A quick brown fox jumps over the lazy dog"
Score: 5 out of 9 (words found in correct order)
Can someone please point me in the right direction?

Can you try this c++ based algorithm to solve the above problem?
int MatchingScore(string s1, string s2) {
int count = 0;
unordered_map<string, int> my_map;
stringstream ss1(s1);
stringstream ss2(s2);
vector<string> tokens1;
vector<string> tokens2;
string temp_str;
// tokenise strings s1, s2 based on space char
while(getline(ss1, temp_str, ' ')) {
tokens1.push_back(temp_str);
}
while(getline(ss2, temp_str, ' ')) {
tokens2.push_back(temp_str);
}
// push words of string1 to hash_map
for(auto s: tokens1) {
my_map[s]++;
}
// while iterating through string2 check if word already present in hash_map
for(auto s: tokens2) {
if(my_map.find(s) != my_map.end()) {
count++;
}
}
return count;
}

You can try this java code:
String sentence = "A quick brown fox jumps over the lazy dog";
String searchPhrase = "Alpha bravo dog quick beta gamma fox over the lazy dug";
String[] searchwords = searchPhrase.trim().split(" ");
int score = 0;
for (int i = 0; i<searchwords.length ;i++) {
if (sentence.toLowerCase().indexOf(searchwords[i].toLowerCase()) != -1) {
score +=1;
} else {
score +=0;
}
}
System.out.println("Score: " + score + " out of " + searchwords.length);

A working solution, Tested with variations. Need to be optimized with regex, etc
Output:
Score: 5 out of 9
A *quick* brown *fox* jumps *over* *the* *lazy* dog
Java code is written in Android Studio junit
String sentence = "A quick brown fox jumps over the lazy dog";
String searchPhrase = "Alpha bravo dog quick beta gamma fox over the lazy dug";
String[] searchWords = sentence.split(" ");
int score = 0;
String outputString = sentence;
for (String searchWord : searchWords) {
String spacedPhrase = " " + searchPhrase + " ";
String spacedWord = " " + searchWord.toLowerCase() + " ";
if (spacedPhrase.toLowerCase().contains(spacedWord)) {
score += 1;
searchPhrase = searchPhrase.substring(spacedPhrase.indexOf(spacedWord) + 1);
outputString = (" " + outputString + " ").replaceFirst(" "+ searchWord + " ",
" *" + searchWord + "* ");
}
}
System.out.println("Score: " + score + " out of " + sentence.split(" ").length);
System.out.println(outputString.trim());
assertEquals("A *quick* brown *fox* jumps *over* *the* *lazy* dog", outputString.trim());

Related

Autohotkey Align Text(Words) columns

how can i align a text on words in autohotkey?
in the sample the text before the word quick and the word fox should be filled with spaces, so that alle words are vertical aligned.
oldtext =
(
a slow dog
the quick red fox
my quick brown and friendly fox
a quick yellow fox
a slow cat
the fox
)
newtext = TextAlign(oldtext, "quick")
newtext = TextAlign(newtext, "fox")
(
a slow dog
the quick red fox
my quick brown and friendly fox
a quick yellow fox
a slow cat
the fox
)
oldtext =
(
a slow dog
the quick red fox
my quick brown and friendly fox
a quick yellow fox
a slow cat
the fox
)
newtext := TextAlign(oldtext, "quick")
newtext := TextAlign(newtext, "fox")
clipboard:=newtext
TextAlign(text, x) {
lines := StrSplit(text, "`n", "`r")
For e, v in lines
CUR := v ~= "i)" x, MAX := (MAX < CUR ? CUR : Max)
for e, v in lines {
y := v ~= "i)" x, z := StrSplit(v, x)
newText .= ((y && y < max) ? (z[1] padSpaces(max - y) x z[2]) : v) "`n"
}
return newText
}
padSpaces(x) {
loop % x
r .= " "
return r
}
results:
a slow dog
the quick red fox
my quick brown and friendly fox
a quick yellow fox
a slow cat
the fox
thanks, my version is a little bit longer but it works too.
StrPad(str, padchar, padlen, left=1)
{
if (i := padlen-StrLen(str))
VarSetCapacity(w, i, asc(padchar)), NumPut(0, &w+i, "Char"), VarSetCapacity(w, -1)
return left ? w str : str w
}
TextAlign(data, xFind)
{
xpos:=0
data2:=""
Loop, Parse, data, `n, `r
{
line:=A_LoopField
pos := InStr(line, xFind, false)
if (pos > xpos)
{
xpos:=pos
}
}
Loop, Parse, data, `n, `r
{
line:=A_LoopField
pos := InStr(line, xFind, false)
if ( (pos > 0) AND (pos < xpos) )
{
xSpace:=StrPad(""," ",xpos-pos)
xRepl:=xSpace xFind
StringReplace, line, line, %xFind%, %xRepl%
data2:=data2 line "`n"
}
else
{
data2:=data2 line "`n"
}
}
return data2
}

How to count letters, digits, spaces in a string

I'm working on a code to count the amount of spaces/digits/letters inside a given input using a loop. I am trying to use the .isdigit() and .isalpha() method to see if each letter in the string is a letter or number, and if true, adding it to the count. My code looks perfect to me, but when I run it, I only get the count for length and spaces (Which is not using the .isspace() method)
Perhaps I am messing up when updating the count within my loop but again.. it all looks good to me, could anyone help steer me in the right direction?
def main():
sentence = input('Enter a sentence: ')
printStats(sentence)
def printStats(input):
print('Statistics on your sentence: ')
print(' Characters:', charCount(input))
print(' Letters:', letterCount(input))
print(' Digits:', digitCount(input))
print(' Spaces:', spaceCount(input))
def charCount(input):
for char in input:
return len(input)
#Section below is where I need help
def letterCount(input):
count=0
for letter in input:
if input.isalpha():
count += 1
return count
def digitCount(input):
count=0
for digit in input:
if input.isdigit():
count += 1
return count
#Section above is where I need help
def spaceCount(input):
for space in input:
return input.count(" ")
main()
Thanks for your time
package com.drools;
public class TEST {
public static void main(String[] args) {
int charCount = 0;
int digitCount = 0;
String word = "NEW YORK 1";
String data[];
int k = 0;
data = word.split("");
int data1 = word.length();
char temp;
for (int i1 = 0; i1 < word.length(); i1++) {
temp = word.charAt(i1);
if (Character.isLetter(temp)) {
charCount++;
} else if (Character.isDigit(temp)) {
digitCount++;
for (int i = 0; i < data.length; i++) {
if (data[i].equals(" ")) {
k++;
}
}
System.out.println("total count "+ data1 + "||number of spaces in the entire word "+ k + " ||characters " + charCount+ " || digits" + digitCount);
}
}}
}
**Out put:**
total count 10||number of spaces in the entire word 2 ||characters 7 || digits1
You need to do letter.isalpha() and digit.isdigit() instead of calling them on the entire input.
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class LettersDigitsSpace {
public static void main(String[] args) {
Scanner input = new Scanner(System.in);
Pattern pletter = Pattern.compile("[a-zA-Z]");
Pattern pdigit = Pattern.compile("\\d");
Pattern pwhitespace = Pattern.compile("\\s");
System.out.println();
System.out.println("-------------------------------------------------");
System.out.println("--- Letters, Digits, and White Spaces counter ---");
System.out.println("-------------------------------------------------");
System.out.println();
System.out.println("Enter String: ");
String val = input.nextLine();
Matcher mletter = pletter.matcher(val);
Matcher mdigit = pdigit.matcher(val);
Matcher mspace = pwhitespace.matcher(val);
int countl = 0, countd = 0, counts = 0;
while (mletter.find()) {
countl++;
}
while (mdigit.find()) {
countd++;
}
while (mspace.find()) {
counts++;
}
System.out.println("\nLetter count: "+countl+"\nDigit count: " + countd + "\nWhite Space count: " + counts);
}
}

Why am I getting incorrect values for string length?

My professor is teaching us Scala using Horstmann's book "Scala for the impatient", and one of our homework exercises are straight from the book; Chapter 4, exercise 2.
We are expected to read in the eBook in text format, the professor has specified that the input file should be "Moby Dick", available for free from the Guttenberg project here: http://www.gutenberg.org/ebooks/2701.txt.utf-8
My code works, as far as counting instances of words. However, he has added the requirement that we must we must format the output in two two columns, with words left justified, and counts right justified. To do so, I am determining the longest word in the book so I can figure the width of the "word" column. However, the values I am getting for the length of the strings is just wrong. In fact, it tells me that all the strings are the same length. "a" is being reported as length 26, just as is "Whale", "Ishmael", etc...
Here's the code:
object Chapter4Exercise2 extends App {
//for sorting
import util.Sorting._
//grab the file
val inputFile = new java.util.Scanner(new java.io.File("moby.txt"))
//create a mutable map where key/values == word/count
val wordMap = collection.mutable.Map[String, Int]() withDefault (_ => 0)
//for formatting output (later), the longest word length is relevant
var longestWord = 0
var theWord: String = ""
//start reading each word in the input file
while (inputFile hasNext) {
//grab the next word for processing, convert it to lower case, trim spaces and punctuation
var nextWord = inputFile.next().toLowerCase().trim().filter(Character.isLetter(_))
//if it's the longest word, update both theWord and longestWord
if (nextWord.size > longestWord) longestWord = nextWord.size; theWord = nextWord; println(theWord + " " + longestWord)
//update the map value for the key with same value as nextWord
wordMap(nextWord) += 1
}
println("Longest word is " + theWord + " at " + longestWord + " Characters")
}
The output of these lines:
if (nextWord.size > longestWord) longestWord = nextWord.size; theWord = nextWord; println(theWord + " " + longestWord)
and
println("Longest word is " + theWord + " at " + longestWord + " Characters")
is way off. It's telling me that EVERY word in the input file is 26 characters long!
Here's a small sample of what's being output:
husks 26
on 26
a 26
surfbeaten 26
beach 26
and 26
then 26
diving 26
down 26
into 26
What am I missing/doing wrong?
if (nextWord.size > longestWord) longestWord = nextWord.size; theWord = nextWord; println(theWord + " " + longestWord)
You shouldn't write multiple statements on a single line like that. Let's write this out in multiple lines and properly indent it:
if (nextWord.size > longestWord)
longestWord = nextWord.size
theWord = nextWord
println(theWord + " " + longestWord)
Do you see the problem now?
Try putting { and } around your if statement alternatives.
You can avoid this kind of pitfall by formatting your code in a structured manner - always using braces around code blocks.
if (nextWord.size > longestWord)
{
longestWord = nextWord.size;
theWord = nextWord;
println(theWord + " " + longestWord);
}
Your current code is equivalent to
if (nextWord.size > longestWord)
{
longestWord = nextWord.size;
}
theWord = nextWord;
println(theWord + " " + longestWord);

How do I insert a character into a string at any given point? (Processing)

I am trying to insert a character into a string using Processing. After some reading around I tried out the following (I think Java) code:
1: String st = new String("abcde");
2: st = StringBuffer(st).insert(2, "C");
and got the following response:
the function StringBuffer(String) does not exist
Is there a different/simpler way of doing this? Do I need to use StringBuffer? I'm a fairly novice programmer so any help greatly appreciated.
Ok, so I've been looking at the processing 'Extended Language API' and there doesn't seem to be some function like that out of the box.
If you look at the String class's substring() function, you'll see an example where there is a String that is cut into two pieces at position 2. And then printed out with other characters between them. Will that help you any further?
String str1 = "CCCP";
String str2 = "Rabbit";
String ss1 = str1.substring(2); // Returns "CP"
String ss2 = str2.substring(3); // Returns "bit"
String ss3 = str1.substring(0, 2); // Returns "CC"
println(ss1 + ":" + ss2 + ":" + ss3); // Prints 'CP:bit:CC'
If we take your example, this would insert the 'C' at the right position:
String st = new String("abcde");
String p1 = st.substring(0,2); // "ab"
String p2 = st.substring(2); // "cde"
st = p1 + "C" + p2; // which will result in "abCcde"
Or create a function for it. Mind you, not super-robust (no checks for empty strings, overflow etc), but does the job:
String insert(original, toInsert, position){
String p1 = original.substring(0,position);
String p2 = original.substring(position);
return p1 + toInsert + p2;
}
...
String st = new String("abcde");
st = insert(st, "C", 2); // "abCcde"
st = insert(st, "D", 4); // "abCcDde"
tested at http://sketch.processing.org
You can insert multiple items with this functions by calling it once for each insert. To make it easier, insert them from right to left as otherwise the position to insert will change as you so inserts to the left.
Example:
String original="The quick fox jumped over lazy dog";
// lets insert "brown" at position 10, "the" at 21, and "'s back" at 34 to make
// "The quick brown fox jumped over the lazy dog's back"
println( original );
original= insertInString( original, "'s back", 34 );
original= insertInString( original, "the ", 26 );
original= insertInString( original, "brown ", 10 );
println( original );
This generated the following output:
The quick fox jumped over lazy dog
The quick brown fox jumped over the lazy dog's back
If you insert from left to right, the positions have to be adjusted after each insert.

Split a string in actionscript?

How do I accomplish this in actionscript (example in c#):
string[] arr = { "1.a", "2.b", "3.d", "4.d", "5.d" };
int countD = 0;
for (int i = 0; i < arr.Length; i++)
{
if (arr[i].Contains("d")) countD++;
}
I need to count a character in an array of strings
Try this:
for(var i:int = 0; i < arr.Length; i++)
{
if(arr[i].indexOf("d") != -1)
countD++;
}
Use indexOf rather than contains. It will return -1 if the character is not in the string, otherwise the string contains at least one instance.
Here are four ways to do it... (well, 3.something)
var myString:String = "The quick brown fox jumped over the lazy "
+ "dog. The quick brown fox jumped over the lazy dog.";
var numOfD:int = 0;
// 1# with an array.filter
numOfD = myString.split("").filter(
function(s:String, i:int, a:Array):Boolean {
return s.toLowerCase() == "d"
}
).length;
trace("1# counts ", numOfD); // output 1# counts 4
// 2# with regex match
numOfD = myString.match(/d/gmi).length;
trace("2# counts ", numOfD); // output 2# counts 4
// 3# with for loop
numOfD = 0;
for (var i:int = 0; i < myString.length; )
numOfD += (myString.charAt(++i).toLocaleLowerCase() == "d");
trace("3# counts ", numOfD); // output 3# counts 4
// 4# with a new prototype function (and regex)
String['prototype'].countOf =
function(char:String):int {
return this.match(new RegExp(char, "gmi")).length;
};
// -- compiler 'strict mode' = true
numOfD = myString['countOf']("d");
trace("4# counts ", numOfD); // output 4# counts 4
// -- compiler 'strict mode' = false
numOfD = myString.countOf("d");
trace("4# counts ", numOfD); // output 4# counts 4
Use the match function on a javascript string.
http://www.cev.washington.edu/lc/CLWEBCLB/jst/js_string.html
Sorry, works the same.
Found it:
var searchString:String = "Lorem ipsum dolor sit amet.";
var index:Number;
index = searchString.indexOf("L");
trace(index); // output: 0
index = searchString.indexOf("l");
trace(index); // output: 14
index = searchString.indexOf("i");
trace(index); // output: 6
index = searchString.indexOf("ipsum");
trace(index); // output: 6
index = searchString.indexOf("i", 7);
trace(index); // output: 19
index = searchString.indexOf("z");
trace(index); // output: -1

Resources