NodeJS is faster than D when computing prime numbers. How?

NodeJS is faster than D when computing prime numbers. How? - node.js

I wrote a simple function for computing prime numbers in D. I thought it was pretty quick, calculating prime numbers up to 100,000. But then I wanted to compare it to NodeJS. When I ran the NodeJS script for the first time, I was astounded at the difference and double checked I wasn't skipping some sort of calculation some how. But the two are pretty identical functionally.
D:
import std.stdio;
import std.math;
import std.datetime;
import std.file;
import std.array;
enum size_t ITERATIONS = 100_000;
bool divisible(real n) {
real d;
for(d = 3; d < floor(n / 2); d += 2) {
if(n % d == 0) {
return true;
}
}
return false;
}
void main() {
StopWatch sw;
size_t T = ITERATIONS;
size_t C = 0;
real n = 2;
real r[ITERATIONS];
r[C] = n;
sw.start();
C++;
for(n = 3; n < T; n += 2) {
if(!divisible(n)) {
r[C] = n;
C++;
}
}
sw.stop();
double seconds = cast(double)sw.peek().usecs / 1_000_000;
writeln("\n\n", C, " prime numbers calculated in ", seconds, " seconds.");
File file = File("primes.txt", "w");
file.writeln("\n", C, " prime numbers calculated ", seconds, " seconds.");
foreach(number; r[0..C]) {
file.writeln(number);
}
file.writeln("\n", "end");
file.close();
}
NodeJS:
var fs = require('fs');
var ITERATIONS = 100000;
function divisible(n) {
var d;
for(d = 3; d < Math.floor(n / 2); d += 2) {
if(n % d == 0) {
return true;
}
}
return false;
}
(function() {
var buffer = [ ],
now = Date.now(),
C = 0
n = 2
;
buffer.push(n);
C++;
for(n = 3; n < ITERATIONS; n += 2) {
if(!divisible(n)) {
buffer.push(n);
C++;
}
}
var time = Date.now() - now,
seconds = time / 1000
;
console.log("\n\n", C, " prime numbers calculated. Process took ", seconds, " seconds.");
buffer.push("\n" + C + " prime numbers calculated. Process took " + seconds + " seconds.");
fs.writeFile("node_primes.txt", buffer.join("\n"), function(err) {
if(err) throw err;
console.log("Primes have been written to file.");
});
})();
Results:
Calculating 100,000 primes:
D: 3.49126 seconds
NodeJS: 0.652 seconds
Can anybody explain why this is happening?
Thanks in advance.

By unnecessarily declaring variables as real, you are forcing floating point arithmetic where integer arithmetic could be used. Replace all instances of real with int, get rid of that floor() and your D program will run as fast as the Node.JS version:
import std.stdio;
import std.math;
import std.datetime;
import std.file;
import std.array;
enum size_t ITERATIONS = 100_000;
bool divisible(int n) {
int d;
for(d = 3; d < n / 2; d += 2) {
if(n % d == 0) {
return true;
}
}
return false;
}
void main() {
StopWatch sw;
size_t T = ITERATIONS;
size_t C = 0;
int n = 2;
int r[ITERATIONS];
r[C] = n;
sw.start();
C++;
for(n = 3; n < T; n += 2) {
if(!divisible(n)) {
r[C] = n;
C++;
}
}
sw.stop();
double seconds = cast(double)sw.peek().usecs / 1_000_000;
writeln("\n\n", C, " prime numbers calculated in ", seconds, " seconds.");
File file = File("primes.txt", "w");
file.writeln("\n", C, " prime numbers calculated ", seconds, " seconds.");
foreach(number; r[0..C]) {
file.writeln(number);
}
file.writeln("\n", "end");
file.close();
}

Related

Google Code Jam: My solution to Train Timetable problem is failing

I'm trying to solve this problem from Google's Code Jam 2008:
The problem is called Train Timetable and you can find the full explanation here:
Code Jam - Train Timetable
Note: I've decided to solve the problem with Node.js.
My code is the next:
function timeToMinutes(time) {
const timeArray = time.split(":");
const hours = parseInt(timeArray[0]);
const minutes = parseInt(timeArray[1]);
const hoursInMinutes = hours * 60;
const total = hoursInMinutes + minutes;
return total;
}
function timetableFiller(NAB, NBA, array) {
let timetable = {
departuresFromA: [],
arrivalsToB: [],
departuresFromB: [],
arrivalsToA: [],
};
for (let i = 0; i < NAB + NBA; i++) {
let tempArr = [];
tempArr = array[i].split(" ");
if (i < NAB) {
timetable.departuresFromA.push(tempArr[0]);
timetable.arrivalsToB.push(tempArr[1]);
} else {
timetable.departuresFromB.push(tempArr[0]);
timetable.arrivalsToA.push(tempArr[1]);
}
}
return timetable;
}
function timetableToMinutes(timetable) {
let timetableMinutes = {
departuresFromA: [],
arrivalsToB: [],
departuresFromB: [],
arrivalsToA: [],
};
for (const property in timetable) {
timetable[property].map((element) =>
timetableMinutes[property].push(timeToMinutes(element))
);
}
return timetableMinutes;
}
function trainsNeededCounter(arrivalsFromDestiny, departuresFromOrigin, tat) {
let trainsNeeded = departuresFromOrigin.length;
for (let i = 0; i < arrivalsFromDestiny.length; i++) {
for (let j = 0; j < departuresFromOrigin.length; j++) {
if (arrivalsFromDestiny[i] + tat <= departuresFromOrigin[j]) {
trainsNeeded = trainsNeeded - 1;
departuresFromOrigin.splice(j, 1);
}
}
}
return trainsNeeded;
}
function responseGenerator(inputA, inputB, caseNumber) {
return `Case #${caseNumber}: ${inputA} ${inputB}`;
}
function problemSolution(input) {
const numberOfCases = parseInt(input[0]);
input.shift();
let response = [];
let caseNumber = 0;
let NAB;
let NBA;
for (let i = 0; i < input.length; i = i + NAB + NBA + 2) {
caseNumber = caseNumber + 1;
const tat = parseInt(input[i]);
const arrayNTrips = input[i + 1].split(" ");
NAB = parseInt(arrayNTrips[0]);
NBA = parseInt(arrayNTrips[1]);
const arraySchedule = input.slice(i + 2, i + 2 + NAB + NBA);
const timetable = timetableFiller(NAB, NBA, arraySchedule);
const timetableMinutes = timetableToMinutes(timetable);
const trainsNeededAB = trainsNeededCounter(
timetableMinutes.arrivalsToA,
timetableMinutes.departuresFromA,
tat
);
const trainsNeededBA = trainsNeededCounter(
timetableMinutes.arrivalsToB,
timetableMinutes.departuresFromB,
tat
);
response.push(
responseGenerator(trainsNeededAB, trainsNeededBA, caseNumber)
);
}
return response;
}
function readInput() {
const readline = require("readline");
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
terminal: false,
});
let problem = [];
rl.on("line", (line) => {
problem.push(line);
}).on("close", () => {
const solution = problemSolution(problem);
solution.map((response) => console.log(response));
});
}
readInput();
How to replicate the issue
You should login into Code Jam with your Google account.
Paste into the code area on the right side and activate the Test run mode.
As input you can copy paste the sample input provided in the problem and you can see that the output is exactly as the sample output.
I've tried with my own variations of the input and the responses seems correct but when I run the real attempt the platform says WA or Wrong Answer.
Thank you so much for your help!

I made a video about this recently. You should check it out.
I think you can understand the logic flow from it. We are both doing the same thing basically.
https://youtu.be/_Cp51vMDZAs
-check this out
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
using namespace std;
void solve(int t)
{
int NA, NB;
float T;
cin >> T >> NA >> NB;
cin.ignore();
vector<string> ASchedule, BSchedule;
if (NA > 0)
for (int i = 0; i < NA; i++)
{
string s;
getline(cin, s);
ASchedule.push_back(s);
}
if (NB > 0)
for (int i = 0; i < NB; i++)
{
string s;
getline(cin, s);
BSchedule.push_back(s);
}
int alength, blength;
alength = (int)ASchedule.size();
blength = (int)BSchedule.size();
if (alength == 0 || blength == 0)
{
cout << "Case #" << t << ": " << alength << " " << blength << endl;
return;
}
float TT = T / 10;
string val, value;
int d;
float ADH, ADM, AAH, AAM, BDH, BDM, BAH, BAM;
vector<float> AD, AA, BD, BA;
for (int i = 0; i < alength; i++)
{
val = ASchedule[i];
ADH = stof(val.substr(0, 2));
AAH = stof(val.substr(6, 2));
ADM = stof(val.substr(3, 2));
AAM = stof(val.substr(9, 2));
if (val.at(9) == '0')
{
AAM /= 10;
AAM += TT;
AAM *= 10;
}
else
AAM += T;
if (AAM > 59)
{
d = -1;
while (AAM != 59)
{
AAM -= 1;
d++;
}
AAH++;
AAM = 0;
AAM += d;
}
// if (ADH > 23)
// ADH = 0;
// if (AAH > 23)
// AAH = 0;
ADM /= 100;
ADH += ADM;
AAM /= 100;
AAH += AAM;
AD.push_back(ADH);
AA.push_back(AAH);
}
for (int j = 0; j < blength; j++)
{
value = BSchedule[j];
BDH = stof(value.substr(0, 2));
BDM = stof(value.substr(3, 2));
BAH = stof(value.substr(6, 2));
BAM = stof(value.substr(9, 2));
if (value.at(9) == '0')
{
BAM /= 10;
BAM += TT;
BAM *= 10;
}
else
BAM += T;
if (BAM > 59)
{
d = -1;
while (BAM != 59)
{
BAM -= 1;
d++;
}
BAH++;
BAM = 0;
BAM += d;
}
// if (BDH > 23)
// BDH = 0;
// if (BAH > 23)
// BAH = 0;
BDM /= 100;
BDH += BDM;
BAM /= 100;
BAH += BAM;
BA.push_back(BAH);
BD.push_back(BDH);
}
int no1 = alength, no2 = blength;
sort(BD.begin(), BD.end());
sort(BA.begin(), BA.end());
sort(AA.begin(), AA.end());
sort(AD.begin(), AD.end());
for (int i = 0; i < alength; i++)
for (int j = 0; j < blength; j++)
if (AD[i] >= BA[j])
{
no1--;
BA[j] = 50;
break;
}
for (int i = 0; i < blength; i++)
for (int j = 0; j < alength; j++)
if (AA[j] <= BD[i])
{
no2--;
AA[j] = 50;
break;
}
cout << "Case #" << t << ": " << no1 << " " << no2 << endl;
}
int main()
{
int N;
cin >> N;
cin.ignore();
for (int t = 1; t <= N; t++)
solve(t);
}

In place string manipulation for character count

Given a string, do in place replacement of every character with it's immediate count.
eg: "aaabbbcc" - "a3b3c2"
"abab" - "a1b1a1b1"

You can do something like below
function getResult(input) {
if(!input) {
return ''; // if string is empty, return ''
}
if (input.length === 1) { // if string length is 1, return input + '1'
return input + '1'
}
var inputLength = input.length; // total length of the input string
var preChar = input[0]; // track previous character
var count = 1; // count of the previous character
for (var i = 1; i < inputLength; i++) {
if (preChar === input[i]) { // previous char and current char match
preChar = input[i]; // update prevChar with current char
count++; // count of prevChar is incremented
if (i + 1 < inputLength) { // not the end of the string
input = input.substr(0, i) + '#' + input.substr(i + 1); // update with dummy char '#' to maintain the length of the string, later will be removed
} else { // if it is the end of the string, update with count
input = input.substr(0, i) + count + input.substr(i + 1);
}
continue;
}
// if current char is not matched with prevChar
preChar = input[i]; // update the preChar with current char
// update the input with prevChar count and current char
input = input.substr(0, i) + count + input[i] + input.substr(i + 1);
inputLength++; // new count is added in the string, so total length of the string is increased by one
i++; // increment the i also because total length is increased
count = 1; // update the count with 1
if (i + 1 === inputLength) { // if it is last element
input = input.substr(0, i+1) + count + input.substr(i + 2);
}
}
input = input.replace(/\#/g, '') // remove dummy char '#' with ''
return input; // return the input
}
console.log("result for aaabbbcc is ", getResult('aaabbbcc'))
console.log("result for abab is ", getResult('abab'))

This answer assumes the OP uses ASCII
Analysis of the requirement of the the OP
Analysis of the bytes required:
When there is 1 repeat only, 2 bytes are needed.
When there are 2 repeats, 2 bytes are needed.
When there are 3, 2 bytes are needed.
When there are 9, 2 bytes.
When there are 99, 3 bytes.
When there are 999, 4 bytes.
When there are (2^64 - 1) repeats, only 20 bytes are needed.
As you can see, the bytes required to express the integer are log10(N)(at least 1) where N is the number of repeats.
Repeat-once
So, when there is only 1 repeat and there is no spare space, it has to keep iterating until
sequence that have more bytes than needed(have at least 3 repeats) to give it space
to reallocate when the end is reached
(In a string where most repeat at least twice or thrice and those who only repeat once are not gathered in one place, this is seldom required. This algorithm makes this assumption).
Then, the string will be modified backward to prevent any overwritten of data. This work because for each repeat-once sequence, it will take up double bytes to store them, so when the sequence at i will be written at 2i.
C++14 Code
#include <algorithm>
#include <cassert>
#include <utility>
#include <type_traits>
// Definition of helper functions for converting ints to string(raw and no need for format string).
template <class size_t, class UnsignedInt,
class = std::enable_if_t<std::is_unsigned<UnsignedInt>::value>>
size_t to_str_len(UnsignedInt i) noexcept
{
size_t cnt = 0;
do {
++cnt;
i /= 10;
} while (i > 0);
return cnt;
}
/*
* out should either points to the beginning of array or the last.
* step should be either 1 or -1.
*/
template <class RandomAccessIt, class UnsignedInt,
class = std::enable_if_t<std::is_unsigned<UnsignedInt>::value>>
auto uitos_impl(RandomAccessIt out, int step, UnsignedInt i) noexcept
{
do {
*out = static_cast<char>(i % 10) + 48;
out += step;
i /= 10;
} while (i > 0);
return out;
}
template <class BidirectionalIt>
void reverse_str(BidirectionalIt beg, BidirectionalIt end) noexcept
{
do {
std::iter_swap(beg++, --end);
} while (beg < end);
}
template <class RandomAccessIt, class UnsignedInt>
auto uitos(RandomAccessIt beg, UnsignedInt i) noexcept
{
auto ret = uitos_impl(beg, 1, i);
// Reverse the string to get the right order
reverse_str(beg, ret);
return ret;
}
template <class RanIt, class UnsignedInt>
auto r_uitos(RanIt last, UnsignedInt i) noexcept
{
return uitos_impl(last, -1, i);
}
template <class size_t, class RandomAccessIt>
size_t count_repeat(RandomAccessIt beg, RandomAccessIt end) noexcept
{
auto first = beg;
auto &val = *beg;
do {
++beg;
} while (beg != end && *beg == val);
return beg - first;
}
template <class size_t, class RandomAccessIt>
size_t r_count_repeat(RandomAccessIt last) noexcept
{
auto it = last;
auto &val = *it;
do {
--it;
} while (*it == val);
return last - it;
}
template <class string,
class size_type = typename string::size_type>
struct character_count
{
static_assert(std::is_unsigned<size_type>::value,
"size_type should be unsigned");
static_assert(!std::is_empty<size_type>::value,
"size_type should not be empty");
private:
using str_size_t = typename string::size_type;
using str_it = typename string::iterator;
static_assert(std::is_same<typename string::value_type, char>::value,
"Only support ASCII");
static_assert(sizeof(size_type) <= sizeof(str_size_t),
"size_type should not be bigger than typename string::size_type");
string str;
str_it in;
str_it out;
str_it end;
size_type len;
void cnt_repeat() noexcept
{
assert(in != end);
len = count_repeat<size_type>(in, str.end());
}
void advance_in() noexcept
{
assert(in != end);
in += len;
assert(in <= end);
}
void process_impl()
{
assert(in != end);
assert(out <= in);
// The main loop
do {
cnt_repeat();
if (len > 1 || out < in) {
*out = *in;
out = uitos(out + 1, len);
advance_in();
assert(out <= in);
} else {
auto first = find_enough_space();
auto deduced_first = write_backward();
assert(first == deduced_first);
}
} while (in != end);
}
auto find_enough_space()
{
assert(out == in);
auto first = in;
size_type bytes_required = 0;
do {
cnt_repeat();
advance_in();
bytes_required += to_str_len<size_type>(len) + 1;
if (size_type(in - first) >= bytes_required) {
out = first + bytes_required;
return first;
}
} while (in != str.end());
auto first_offset = first - str.begin();
// Hopefully this path won't be executed.
auto new_size = first_offset + bytes_required;
auto original_size = str.size();
assert(new_size > original_size);
str.resize(new_size);
first = str.begin() + first_offset;
out = str.end();
in = str.begin() + original_size;
end = str.begin() + original_size;
return first;
}
auto write_backward() noexcept
{
auto original_out = out--;
auto original_in = in--;
do {
len = r_count_repeat<size_type>(in);
out = r_uitos(out, len);
*out-- = *((in -= len) + 1);
} while (out != in);
auto ret = out + 1;
out = original_out;
in = original_in;
return ret;
}
public:
character_count(string &&arg):
str(std::move(arg)), in(str.begin()), out(str.begin()), end(str.end())
{}
character_count(const string &arg):
str(arg), in(str.begin()), out(str.begin()), end(str.end())
{}
/*
* ```str``` should not be empty and should not have non-visible character
*/
auto& process()
{
assert(!str.empty());
process_impl();
str.erase(out, str.end());
return *this;
}
auto& get_result() & noexcept
{
return str;
}
auto&& get_result() && noexcept
{
return std::move(str);
}
auto& get_result() const& noexcept
{
return str;
}
/*
* ```str``` should not be empty and should not have non-visible character
*/
auto& set_string(const string &arg) noexcept
{
str = arg;
in = str.begin();
out = str.begin();
end = str.end();
return *this;
}
/*
* ```str``` should not be empty and should not have non-visible character
*/
auto& set_string(string &&arg) noexcept
{
str = std::move(arg);
in = str.begin();
out = str.begin();
end = str.end();
return *this;
}
};

Assuming you're using Javascript, this is how I would do it. Just match all the groups with a regex and loop over them:
function charCount (str) {
const exp = /(\w)\1*/g
let matches = (str).match(exp)
let output = ''
matches.forEach(group => {
output += group[0] + group.length
})
return output
}
const str = 'abab'
const str2 = 'aaabbbcc'
console.log(charCount(str)) // a1b1a1b1
console.log(charCount(str2)) // a3b3c2

Encode string "aaa" to "3[a]"

give a string s, encode it by the format: "aaa" to "3[a]". The length of encoded string should the shortest.
example: "abbabb" to "2[a2[b]]"
update: suppose the string only contains lowercase letters
update: here is my code in c++, but it's slow. I know one of the improvement is using KMP to compute if the current string is combined by a repeat string.
// this function is used to check if a string is combined by repeating a substring.
// Also Here can be replaced by doing KMP algorithm for whole string to improvement
bool checkRepeating(string& s, int l, int r, int start, int end){
if((end-start+1)%(r-l+1) != 0)
return false;
int len = r-l+1;
bool res = true;
for(int i=start; i<=end; i++){
if(s[(i-start)%len+l] != s[i]){
res = false;
break;
}
}
return res;
}
// this function is used to get the length of the current number
int getLength(int l1, int l2){
return (int)(log10(l2/l1+1)+1);
}
string shortestEncodeString(string s){
int len = s.length();
vector< vector<int> > res(len, vector<int>(len, 0));
//Initial the matrix
for(int i=0; i<len; i++){
for(int j=0; j<=i; j++){
res[j][i] = i-j+1;
}
}
unordered_map<string, string> record;
for(int i=0; i<len; i++){
for(int j=i; j>=0; j--){
string temp = s.substr(j, i-j+1);
/* if the current substring has showed before, then no need to compute again
* Here is a example for this part: if the string is "abcabc".
* if we see the second "abc", then no need to compute again, just use the
* result from first "abc".
**/
if(record.find(temp) != record.end()){
res[j][i] = record[temp].size();
continue;
}
string ans = temp;
for(int k=j; k<i; k++){
string str1 = s.substr(j, k-j+1);
string str2 = s.substr(k+1, i-k);
if(res[j][i] > res[j][k] + res[k+1][i]){
res[j][i] = res[j][k]+res[k+1][i];
ans = record[str1] + record[str2];
}
if(checkRepeating(s, j, k, k+1, i) == true && res[j][i] > 2+getLength(k-j+1, i-k)+res[j][k]){
res[j][i] = 2+getLength(k-j+1, i-k)+res[j][k];
ans = to_string((i-j+1)/(k-j+1)) + '[' + record[str1] +']';
}
}
record[temp] = ans;
}
}
return record[s];
}

With very little to start with in terms of a question statement, I took a quick stab at this using JavaScript because it's easy to demonstrate. The comments are in the code, but basically there are alternating stages of joining adjacent elements, run-length checking, joining adjacent elements, and on and on until there is only one element left - the final encoded value.
I hope this helps.
function encode(str) {
var tmp = str.split('');
var arr = [];
do {
if (tmp.length === arr.length) {
// Join adjacent elements
arr.length = 0;
for (var i = 0; i < tmp.length; i += 2) {
if (i < tmp.length - 1) {
arr.push(tmp[i] + tmp[i + 1]);
} else {
arr.push(tmp[i]);
}
}
tmp.length = 0;
} else {
// Swap arrays and clear tmp
arr = tmp.slice();
tmp.length = 0;
}
// Build up the run-length strings
for (var i = 0; i < arr.length;) {
var runlength = runLength(arr, i);
if (runlength > 1) {
tmp.push(runlength + '[' + arr[i] + ']');
} else {
tmp.push(arr[i]);
}
i += runlength;
}
console.log(tmp);
} while (tmp.length > 1);
return tmp.join();
}
// Get the longest run length from a given index
function runLength(arr, ind) {
var count = 1;
for (var i = ind; i < arr.length - 1; i++) {
if (arr[i + 1] === arr[ind]) {
count++;
} else {
break;
}
}
return count;
}
<input id='inp' value='abbabb'>
<button type="submit" onClick='javascript:document.getElementById("result").value=encode(document.getElementById("inp").value)'>Encode</button>
<br>
<input id='result' value='2[a2[b]]'>

If I have given the word ssseeerabbcccdd then output should be like s3e3rab2c3d2 in c#

class Program
{
static void Main(string[] args)
{
Console.Write("Enter Word : ");
string word = Console.ReadLine();
for (var i = 0; i < word.Length; i++)
{
var check = true;
var count = 0;
for (var k = i - 1; k <= 0; k--)
{
if (word[k] == word[i]) check = false;
}
if (check)
{
for (var j = i; j<= word.Length; j++)
{
if (word[i] == word[j]) count++;
}
Console.WriteLine(word[i] + " Occurs at " + count + " places.");
}
}
Console.ReadLine();
}
}
I have tried this one but not working.

The problem is that check is set to false if the string contains two adjacent identical characters. So if (check) {...} won't be executed, if this is the case. Another problem is that you set k to string position -1 in the first iteration of for (var k = i - 1; k <= 0; k--). If i=0, then k will be -1. You also need only one inner loop.
Here is a possible solution, although slightly different than your's.
class Program
{
static void Main(string[] args)
{
Console.Write("Enter Word : ");
string word = Console.ReadLine();
int i = 0;
while (i < word.Length)
{
int count = 1;
for (int k = i+1; k < word.Length; k++)
if (word[i] == word[k])
count++;
if (count>1)
{
Console.WriteLine(word[i] + " Occurs at " + count + " places.");
}
i += count; // continue next char or after after repeated chars
}
Console.ReadLine();
}
}

How to find the longest substring with no repeated characters?

I want an algorithm to find the longest substring of characters in a given string containing no repeating characters. I can think of an O(n*n) algorithm which considers all the substrings of a given string and calculates the number of non-repeating characters. For example, consider the string "AABGAKG" in which the longest substring of unique characters is 5 characters long which corresponds to BGAKG.
Can anyone suggest a better way to do it ?
Thanks
Edit: I think I'm not able to explain my question properly to others. You can have repeating characters in a substring (It's not that we need all distinct characters in a substring which geeksforgeeks solution does). The thing which I have to find is maximum no of non-repeating characters in any substring (it may be a case that some characters are repeated).
for eg, say string is AABGAKGIMN then BGAKGIMN is the solution.

for every start = 0 ... (n-1), try to expend end to the right-most position.
keep a bool array used[26] to remember if any character is already used.
suppose currently we finished (start, end)
for start+1,
first clear by set: used[str[start]] = false;
while ((end+1 < n) && (!used[str[end+1]])) { used[str[end+1]]=true; ++end;}
now we have check new (start, end). Total Complexity is O(N).

Here is the solution in C#. I tested in in Visual studio 2012 and it works
public static int LongestSubstNonrepChar(string str) {
int curSize = 0;
int maxSize = 0;
int end = 0;
bool[] present = new bool[256];
for (int start = 0; start < str.Length; start++) {
end = start;
while (end < str.Length) {
if (!present[str[end]] && end < str.Length)
{
curSize++;
present[str[end]] = true;
end++;
}
else
break;
}
if (curSize > maxSize) {
maxSize = curSize;
}
//reset current size and the set all letter to false
curSize = 0;
for (int i = 0; i < present.Length; i++)
present[i] = false;
}
return maxSize;
}

Pretty tricky question, I give you an O(n) solution based on C#.
public string MaxSubStringKUniqueChars(string source, int k)
{
if (string.IsNullOrEmpty(source) || k > source.Length) return string.Empty;
var start = 0;
var ret = string.Empty;
IDictionary<char, int> dict = new Dictionary<char, int>();
for (var i = 0; i < source.Length; i++)
{
if (dict.ContainsKey(source[i]))
{
dict[source[i]] = 1 + dict[source[i]];
}
else
{
dict[source[i]] = 1;
}
if (dict.Count == k + 1)
{
if (i - start > ret.Length)
{
ret = source.Substring(start, i - start);
}
while (dict.Count > k)
{
int count = dict[source[start]];
if (count == 1)
{
dict.Remove(source[start]);
}
else
{
dict[source[start]] = dict[source[start]] - 1;
}
start++;
}
}
}
//just for edge case like "aabbcceee", should return "cceee"
if (dict.Count == k && source.Length - start > ret.Length)
{
return source.Substring(start, source.Length - start);
}
return ret;
}
`
//This is the test case.
public void TestMethod1()
{
var ret = Item001.MaxSubStringKUniqueChars("aabcd", 2);
Assert.AreEqual("aab", ret);
ret = Item001.MaxSubStringKUniqueChars("aabbccddeee", 2);
Assert.AreEqual("ddeee", ret);
ret = Item001.MaxSubStringKUniqueChars("abccccccccaaddddeeee", 3);
Assert.AreEqual("ccccccccaadddd", ret);
ret = Item001.MaxSubStringKUniqueChars("ababcdcdedddde", 2);
Assert.AreEqual("dedddde", ret);
}

How about this:
public static String getLongestSubstringNoRepeats( String string ){
int iLongestSoFar = 0;
int posLongestSoFar = 0;
char charPrevious = 0;
int xCharacter = 0;
int iCurrentLength = 0;
while( xCharacter < string.length() ){
char charCurrent = string.charAt( xCharacter );
iCurrentLength++;
if( charCurrent == charPrevious ){
if( iCurrentLength > iLongestSoFar ){
iLongestSoFar = iCurrentLength;
posLongestSoFar = xCharacter;
}
iCurrentLength = 1;
}
charPrevious = charCurrent;
xCharacter++;
}
if( iCurrentLength > iLongestSoFar ){
return string.substring( posLongestSoFar );
} else {
return string.substring( posLongestSoFar, posLongestSoFar + iLongestSoFar );
}
}

Let s be the given string, and n its length.
Define f(i) to be the longest [contiguous] substring of s ending at s[i] with distinct letters. That's unique and well-defined.
Compute f(i) for each i. It's easy to deduce from f(i-1) and s[i]:
If the letter s[i] is in f(i-1), let j be the greatest position j < i such that s[j] = s[i]. Then f(i) is s[j+1 .. i] (in Python notation)
Otherwise, f(i) is f(i-1) with s[i] appended.
The solution to your problem is any f(i) of maximal length (not necessarily unique).
You could implement this algorithm to run in O(n * 26) time, where 26 is the number of letters in the alphabet.

public static int longestNonDupSubstring(char[] str) {
int maxCount = 0;
int count = 0;
int maxEnd = 0;
for(int i=1;i < str.length;i++) {
if(str[i] != str[i-1]) {
count++;
}
if (str[i] == str[i-1]) {
if(maxCount<count) {
maxCount = count;
maxEnd = i;
}
count = 0;
}
if ( i!=str.length-1 && str[i] == str[i+1]) {
if(maxCount<count) {
maxCount = count - 1;
maxEnd = i-1;
}
count = 0;
}
}
int startPos = maxEnd - maxCount + 1;
for(int i = 0; i < maxCount; i++) {
System.out.print(str[startPos+i]);
}
return maxCount;
}

//Given a string ,find the longest sub-string with all distinct characters in it.If there are multiple such strings,print them all.
#include<iostream>
#include<cstring>
#include<array>
using namespace std;
//for a string with all small letters
//for capital letters use 65 instead of 97
int main()
{
array<int ,26> count ;
array<string,26>largest;
for(int i = 0 ;i <26;i++)
count[i]=0;
string s = "abcdefghijrrstqrstuvwxyzprr";
string out = "";
int k = 0,max=0;
for(int i = 0 ; i < s.size() ; i++)
{
if(count[s[i] - 97]==1)
{
int loc = out.find(s[i]);
for(int j=0;j<=loc;j++) count[out[j] - 97]=0;
if(out.size() > max)
{
max = out.size();
k=1;
largest[0] = out;
}
else if(out.size()==max) largest[k++]=out;
out.assign(out,loc+1,out.size()-loc-1);
}
out = out + s[i];
count[s[i] - 97]++;
}
for(int i=0;i<k;i++) cout<<largest[i] << endl;
//output will be
// abcdefghijr
// qrstuvwxyzp
}

Let me contribute a little as well. I have this solution with complexity will be O(N). The algorithm’s space complexity will be O(K), where K is the number of distinct characters in the input string.
public static int NoRepeatSubstring(string str)
{
int start = 0;
int maxLen = 0;
Dictionary<char, int> dic = new Dictionary<char, int>();
for (int i = 0; i < str.Length; i++)
{
char rightChar = str[i];
// if the map already contains the 'rightChar', shrink the window from the beginning so that
// we have only one occurrence of 'rightChar'
if (dic.ContainsKey(rightChar))
{
// this is tricky; in the current window, we will not have any 'rightChar' after its previous index
// and if 'start' is already ahead of the last index of 'rightChar', we'll keep 'windowStart'
start = Math.Max(start, dic[rightChar] + 1);
}
if (dic.ContainsKey(str[i]))
dic[str[i]] = i;
else
dic.Add(str[i], i);
maxLen = Math.Max(maxLen, i - start + 1);
}
return maxLen;
}
And here some Unit Tests:
Assert.Equal(3, SlideWindow.NoRepeatSubstring("aabccbb"));
Assert.Equal(2, SlideWindow.NoRepeatSubstring("abbbb"));
Assert.Equal(3, SlideWindow.NoRepeatSubstring("abccde"));

string MaximumSubstringNonRepeating(string text)
{
string max = null;
bool isCapture = false;
foreach (string s in Regex.Split(text, #"(.)\1+"))
{
if (!isCapture && (max == null || s.Length > max.Length))
{
max = s;
}
isCapture = !isCapture;
}
return max;
}
. matches any character. ( ) captures that character. \1 matches the captured character again. + repeats that character. The whole pattern matches two or more repetitions of any one character. "AA" or ",,,,".
Regex.Split() splits the string at every match of the pattern, and returns an array of the pieces that are in between. (One caveat: It also includes the captured substrings. In this case, the one character that are being repeated. The captures will show up in between the pieces. This is way I just added the isCapture flag.)
The function cuts out all the repeated characters, and returns the longest piece that where in between the repeated each set of repeated characters.
>>> MaximumSubstringNonRepeating("AABGAKG") // "AA" is repeated
"BGAKG"
>>> MaximumSubstringNonRepeating("AABGAKGIMNZZZD") // "AA" and "ZZZ" are repeated.
"BGAKGIMN"

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

NodeJS is faster than D when computing prime numbers. How? - node.js

Related

Google Code Jam: My solution to Train Timetable problem is failing

In place string manipulation for character count

Encode string "aaa" to "3[a]"

If I have given the word ssseeerabbcccdd then output should be like s3e3rab2c3d2 in c#

How to find the longest substring with no repeated characters?

Categories

Resources